diff --git a/.gitignore b/.gitignore index 18f75a7..8be3fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,4 @@ dist web-dist .env web/*.tsbuildinfo -data/accounts.json -data/oauth-state.json -data/requests-trace.jsonl +data diff --git a/README.md b/README.md index 889332b..6abdba6 100644 --- a/README.md +++ b/README.md @@ -112,8 +112,9 @@ Because this is often deployed remotely (Unraid/VPS), onboarding uses a manual r 2. For OpenAI accounts, enter the account email 3. Click **Start OAuth** 4. Complete login in browser -5. Copy the full redirect URL shown after the callback completes -6. Paste that URL in the dashboard and click **Complete OAuth** +5. Wait for the local callback page to open on `localhost:1455` +6. The dashboard should autofill the callback URL, or you can copy it from that page +7. Click **Complete OAuth** Mistral accounts still use manual token entry in the dashboard. @@ -281,6 +282,7 @@ Model alias admin endpoints: | `OAUTH_TOKEN_URL` | `https://auth.openai.com/oauth/token` | OAuth token endpoint | | `OAUTH_SCOPE` | `openid profile email offline_access` | OAuth scope | | `OAUTH_REDIRECT_URI` | `http://localhost:1455/auth/callback` | Redirect URI | +| `OAUTH_CALLBACK_BIND_HOST` | `` | Override bind host for the local OAuth callback helper server (for example `0.0.0.0` in Docker) | | `MISTRAL_COMPACT_UPSTREAM_PATH` | `/v1/responses/compact` | Mistral upstream path for compact responses | --- diff --git a/docker-compose.yml b/docker-compose.yml index 07b9dd9..9c68467 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,6 +3,7 @@ services: build: . container_name: multivibe ports: + - "4010:4010" - "1455:1455" environment: - PORT=1455 @@ -22,6 +23,7 @@ services: - OAUTH_TOKEN_URL=https://auth.openai.com/oauth/token - OAUTH_SCOPE=openid profile email offline_access - OAUTH_REDIRECT_URI=http://localhost:1455/auth/callback + - OAUTH_CALLBACK_BIND_HOST=0.0.0.0 volumes: - ./data:/data restart: unless-stopped diff --git a/package.json b/package.json index d2d56c0..835d2e5 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,8 @@ "build:api": "tsc -p tsconfig.json", "build:web": "npm --prefix web run build", "build": "npm run build:web && npm run build:api", - "start": "node dist/server.js" + "start": "node dist/server.js", + "test": "node --test --test-force-exit test/*.test.js" }, "dependencies": { "@foxglove/wasm-zstd": "^1.0.1", diff --git a/src/account-utils.ts b/src/account-utils.ts index ed513da..d35db51 100644 --- a/src/account-utils.ts +++ b/src/account-utils.ts @@ -1,37 +1,63 @@ import { OAuthConfig } from "./oauth.js"; import { mergeTokenIntoAccount, refreshAccessToken } from "./oauth.js"; -import { normalizeProvider, rememberError } from "./quota.js"; +import { + clearAuthFailureState, + normalizeProvider, + rememberError, +} from "./quota.js"; import type { Account } from "./types.js"; +import { + TOKEN_REFRESH_COOLDOWN_MS, + TOKEN_REFRESH_MARGIN_MS, +} from "./config.js"; + +const refreshInFlight = new Map>(); export async function ensureValidToken( account: Account, oauthConfig: OAuthConfig, ): Promise { if (normalizeProvider(account) !== "openai") return account; - if (!account.expiresAt || Date.now() < account.expiresAt - 5 * 60_000) + if (!account.expiresAt || Date.now() < account.expiresAt - TOKEN_REFRESH_MARGIN_MS) return account; if (!account.refreshToken) return account; - - try { - const refreshed = await refreshAccessToken( - oauthConfig, - account.refreshToken, - ); - const merged = mergeTokenIntoAccount(account, refreshed); - merged.state = { - ...merged.state, - needsTokenRefresh: false, - }; - return merged; - } catch (err: any) { - rememberError( - account, - `refresh token failed: ${err?.message ?? String(err)}`, - ); - account.state = { - ...account.state, - needsTokenRefresh: true, - }; + const refreshToken = account.refreshToken; + if ( + typeof account.state?.refreshBlockedUntil === "number" && + Date.now() < account.state.refreshBlockedUntil + ) { return account; } + + const existing = refreshInFlight.get(account.id); + if (existing) return existing; + + const run = (async () => { + try { + const refreshed = await refreshAccessToken( + oauthConfig, + refreshToken, + ); + const merged = mergeTokenIntoAccount(account, refreshed); + clearAuthFailureState(merged); + return merged; + } catch (err: any) { + const message = err?.message ?? String(err); + rememberError(account, `refresh token failed: ${message}`); + const failureCount = (account.state?.refreshFailureCount ?? 0) + 1; + account.state = { + ...account.state, + needsTokenRefresh: true, + refreshFailureCount: failureCount, + refreshBlockedUntil: + Date.now() + TOKEN_REFRESH_COOLDOWN_MS * Math.min(failureCount, 6), + }; + return account; + } finally { + refreshInFlight.delete(account.id); + } + })(); + + refreshInFlight.set(account.id, run); + return run; } diff --git a/src/config.ts b/src/config.ts index d24e2f5..988eeca 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,5 +1,6 @@ import os from "node:os"; +export const HOST = process.env.HOST ?? "127.0.0.1"; export const PORT = Number(process.env.PORT ?? 1455); export const STORE_PATH = process.env.STORE_PATH ?? "/data/accounts.json"; export const OAUTH_STATE_PATH = @@ -28,17 +29,19 @@ export const ZAI_UPSTREAM_PATH = export const ZAI_COMPACT_UPSTREAM_PATH = process.env.ZAI_COMPACT_UPSTREAM_PATH ?? "/v1/chat/completions"; export const ADMIN_TOKEN = process.env.ADMIN_TOKEN ?? ""; +export const STORE_ENCRYPTION_KEY = + process.env.STORE_ENCRYPTION_KEY ?? ""; export const MAX_ACCOUNT_RETRY_ATTEMPTS = Math.max( 1, Number(process.env.MAX_ACCOUNT_RETRY_ATTEMPTS ?? 5), ); -export const MAX_UPSTREAM_RETRIES = Math.max( +export const MAX_GET_RETRIES = Math.max( 0, - Number(process.env.MAX_UPSTREAM_RETRIES ?? 3), + Number(process.env.MAX_GET_RETRIES ?? 2), ); -export const UPSTREAM_BASE_DELAY_MS = Math.max( +export const RETRY_BASE_DELAY_MS = Math.max( 100, - Number(process.env.UPSTREAM_BASE_DELAY_MS ?? 1000), + Number(process.env.RETRY_BASE_DELAY_MS ?? 250), ); export const PI_USER_AGENT = `pi (${os.platform()} ${os.release()}; ${os.arch()})`; @@ -57,6 +60,39 @@ export const MODELS_CACHE_MS = Number( export const TOKEN_REFRESH_MARGIN_MS = Number( process.env.TOKEN_REFRESH_MARGIN_MS ?? 60_000, ); +export const TOKEN_REFRESH_COOLDOWN_MS = Number( + process.env.TOKEN_REFRESH_COOLDOWN_MS ?? 5 * 60_000, +); +export const UPSTREAM_REQUEST_TIMEOUT_MS = Number( + process.env.UPSTREAM_REQUEST_TIMEOUT_MS ?? 60_000, +); +export const MODEL_DISCOVERY_TIMEOUT_MS = Number( + process.env.MODEL_DISCOVERY_TIMEOUT_MS ?? 8_000, +); +export const OAUTH_REQUEST_TIMEOUT_MS = Number( + process.env.OAUTH_REQUEST_TIMEOUT_MS ?? 15_000, +); +export const OAUTH_CALLBACK_BIND_HOST = + process.env.OAUTH_CALLBACK_BIND_HOST ?? ""; +export const MODEL_COMPATIBILITY_TTL_MS = Number( + process.env.MODEL_COMPATIBILITY_TTL_MS ?? 6 * 60 * 60_000, +); +export const SERVER_HEADERS_TIMEOUT_MS = Number( + process.env.SERVER_HEADERS_TIMEOUT_MS ?? 30_000, +); +export const SERVER_KEEP_ALIVE_TIMEOUT_MS = Number( + process.env.SERVER_KEEP_ALIVE_TIMEOUT_MS ?? 5_000, +); +export const SERVER_REQUEST_TIMEOUT_MS = Number( + process.env.SERVER_REQUEST_TIMEOUT_MS ?? 90_000, +); +export const SHUTDOWN_GRACE_MS = Number( + process.env.SHUTDOWN_GRACE_MS ?? 10_000, +); +export const TRACE_COMPACTION_INTERVAL = Math.max( + 1, + Number(process.env.TRACE_COMPACTION_INTERVAL ?? 100), +); export const ACCOUNT_FLUSH_INTERVAL_MS = Number( process.env.ACCOUNT_FLUSH_INTERVAL_MS ?? 5_000, diff --git a/src/crypto.ts b/src/crypto.ts new file mode 100644 index 0000000..39ab686 --- /dev/null +++ b/src/crypto.ts @@ -0,0 +1,50 @@ +import { createCipheriv, createDecipheriv, createHash, randomBytes } from "node:crypto"; + +type Envelope = { + v: 1; + alg: "aes-256-gcm"; + iv: string; + tag: string; + data: string; +}; + +function deriveKey(secret: string): Buffer { + return createHash("sha256").update(secret, "utf8").digest(); +} + +export function encryptJson(value: T, secret: string): string { + const iv = randomBytes(12); + const cipher = createCipheriv("aes-256-gcm", deriveKey(secret), iv); + const plaintext = Buffer.from(JSON.stringify(value), "utf8"); + const ciphertext = Buffer.concat([cipher.update(plaintext), cipher.final()]); + const envelope: Envelope = { + v: 1, + alg: "aes-256-gcm", + iv: iv.toString("base64"), + tag: cipher.getAuthTag().toString("base64"), + data: ciphertext.toString("base64"), + }; + return JSON.stringify(envelope, null, 2); +} + +export function decryptJson(raw: string, secret: string): T { + const parsed = JSON.parse(raw) as Envelope; + if (!parsed || parsed.v !== 1 || parsed.alg !== "aes-256-gcm") { + throw new Error("unsupported encrypted payload"); + } + const decipher = createDecipheriv( + "aes-256-gcm", + deriveKey(secret), + Buffer.from(parsed.iv, "base64"), + ); + decipher.setAuthTag(Buffer.from(parsed.tag, "base64")); + const decrypted = Buffer.concat([ + decipher.update(Buffer.from(parsed.data, "base64")), + decipher.final(), + ]); + return JSON.parse(decrypted.toString("utf8")) as T; +} + +export function looksEncryptedJson(raw: string): boolean { + return /^\s*\{\s*"v"\s*:\s*1\s*,\s*"alg"\s*:\s*"aes-256-gcm"/.test(raw); +} diff --git a/src/oauth-callback-server.ts b/src/oauth-callback-server.ts new file mode 100644 index 0000000..a095a38 --- /dev/null +++ b/src/oauth-callback-server.ts @@ -0,0 +1,152 @@ +import http from "node:http"; + +function isLoopbackHostname(hostname: string): boolean { + return hostname === "127.0.0.1" || hostname === "::1" || hostname === "localhost"; +} + +function callbackPageHtml() { + return ` + + + + + MultiVibe OAuth Callback + + + +
+

OAuth callback received

+

The full callback URL is below. It has also been sent back to the dashboard window when possible.

+ +
+ + You can paste this into the dashboard if it does not autofill. +
+

Expected path: /auth/callback

+
+ + +`; +} + +export function createOAuthCallbackServer(redirectUri: string): http.Server | null { + let url: URL; + try { + url = new URL(redirectUri); + } catch { + return null; + } + + if (url.protocol !== "http:" || !isLoopbackHostname(url.hostname) || !url.port) { + return null; + } + + const expectedPath = url.pathname || "/"; + + return http.createServer((req, res) => { + const requestUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`); + + if (req.method !== "GET" || requestUrl.pathname !== expectedPath) { + res.statusCode = 404; + res.setHeader("content-type", "text/plain; charset=utf-8"); + res.end("not found"); + return; + } + + res.statusCode = 200; + res.setHeader("content-type", "text/html; charset=utf-8"); + res.end(callbackPageHtml()); + }); +} diff --git a/src/oauth.ts b/src/oauth.ts index b19f1fd..e52b50b 100644 --- a/src/oauth.ts +++ b/src/oauth.ts @@ -1,5 +1,6 @@ import { createHash, randomBytes, randomUUID } from "node:crypto"; import type { Account, OAuthFlowState } from "./types.js"; +import { OAUTH_REQUEST_TIMEOUT_MS } from "./config.js"; export type OAuthConfig = { authorizationUrl: string; @@ -89,10 +90,12 @@ export function parseAuthorizationInput(input: string): { code?: string; state?: } async function postForm(url: string, body: URLSearchParams): Promise { + const signal = AbortSignal.timeout(OAUTH_REQUEST_TIMEOUT_MS); const res = await fetch(url, { method: "POST", headers: { "content-type": "application/x-www-form-urlencoded" }, body, + signal, }); const text = await res.text(); diff --git a/src/quota.ts b/src/quota.ts index 9a8e2a2..26d769c 100644 --- a/src/quota.ts +++ b/src/quota.ts @@ -1,16 +1,21 @@ import type { Account, ProviderId, UsageSnapshot } from "./types.js"; +import { MODEL_COMPATIBILITY_TTL_MS } from "./config.js"; export const USAGE_CACHE_TTL_MS = Number(process.env.USAGE_CACHE_TTL_MS ?? 300_000); const USAGE_TIMEOUT_MS = Number(process.env.USAGE_TIMEOUT_MS ?? 10_000); const BLOCK_FALLBACK_MS = Number(process.env.BLOCK_FALLBACK_MS ?? 30 * 60_000); -const DEFAULT_ROUTING_WINDOW_MS = Number(process.env.ROUTING_WINDOW_MS ?? 5 * 60 * 1000); +const DEFAULT_ROUTING_WINDOW_MS = Number(process.env.ROUTING_WINDOW_MS ?? 0); +const AUTH_FALLBACK_MS = Number(process.env.AUTH_FALLBACK_MS ?? 60 * 60_000); type RouteCache = { - bucket: number; accountId?: string; + bucketByWindowMs: Map; }; -const routeCache: RouteCache = { bucket: -1, accountId: undefined }; +const routeCache: RouteCache = { + accountId: undefined, + bucketByWindowMs: new Map(), +}; export function normalizeProvider(account?: Account): ProviderId { if (account?.provider === "mistral") return "mistral"; @@ -58,6 +63,43 @@ export function rememberError(account: Account, message: string) { account.state = { ...account.state, lastError: message, recentErrors: next }; } +function isAuthFailureReason(reason: unknown): reason is string { + return typeof reason === "string" && /^auth failure:/i.test(reason); +} + +function isAuthRelatedErrorMessage(message: unknown): message is string { + return ( + typeof message === "string" && + /^(auth failure:|refresh token failed:|usage probe failed 401\b)/i.test( + message, + ) + ); +} + +export function clearAuthFailureState(account: Account) { + const current = account.state; + if (!current) return; + + const blockedByAuth = isAuthFailureReason(current.blockedReason); + const recentErrors = (current.recentErrors ?? []).filter( + (entry) => !isAuthRelatedErrorMessage(entry?.message), + ); + const lastError = isAuthRelatedErrorMessage(current.lastError) + ? undefined + : current.lastError; + + account.state = { + ...current, + blockedUntil: blockedByAuth ? undefined : current.blockedUntil, + blockedReason: blockedByAuth ? undefined : current.blockedReason, + needsTokenRefresh: false, + refreshFailureCount: 0, + refreshBlockedUntil: undefined, + lastError, + recentErrors: recentErrors.length ? recentErrors : undefined, + }; +} + export function usageUntouched(usage?: UsageSnapshot): boolean { return usage?.primary?.usedPercent === 0 && usage?.secondary?.usedPercent === 0; } @@ -95,9 +137,49 @@ export function accountUsable(a: Account): boolean { return !(typeof until === "number" && Date.now() < until); } +function normalizeModelKey(model?: string): string { + const raw = (model ?? "").trim().toLowerCase(); + if (!raw) return ""; + if (!raw.includes("/")) return raw; + return raw.split("/").pop() ?? raw; +} + +export function accountSupportsModel(account: Account, model?: string): boolean { + const key = normalizeModelKey(model); + if (!key) return true; + const record = account.state?.modelAvailability?.[key]; + if (!record) return true; + if (Date.now() - record.checkedAt > MODEL_COMPATIBILITY_TTL_MS) return true; + return record.supported; +} + +export function markModelCompatibility( + account: Account, + model: string | undefined, + supported: boolean, + reason?: string, +) { + const key = normalizeModelKey(model); + if (!key) return; + account.state = { + ...account.state, + modelAvailability: { + ...(account.state?.modelAvailability ?? {}), + [key]: { + supported, + checkedAt: Date.now(), + reason, + }, + }, + }; +} + export function chooseAccount(accounts: Account[]): Account | null { const now = Date.now(); - const windowMs = Number.isFinite(DEFAULT_ROUTING_WINDOW_MS) && DEFAULT_ROUTING_WINDOW_MS > 0 ? DEFAULT_ROUTING_WINDOW_MS : 5 * 60 * 1000; + const windowMs = + Number.isFinite(DEFAULT_ROUTING_WINDOW_MS) && DEFAULT_ROUTING_WINDOW_MS > 0 + ? DEFAULT_ROUTING_WINDOW_MS + : 0; const available = accounts.filter((a) => { if (!a.enabled) return false; @@ -106,11 +188,13 @@ export function chooseAccount(accounts: Account[]): Account | null { }); if (!available.length) return null; - const bucket = nowBucket(now, windowMs); - - if (routeCache.bucket === bucket && routeCache.accountId) { - const sticky = available.find((a) => a.id === routeCache.accountId); - if (sticky) return sticky; + if (windowMs > 0) { + const bucket = nowBucket(now, windowMs); + const stickyBucket = routeCache.bucketByWindowMs.get(windowMs); + if (stickyBucket === bucket && routeCache.accountId) { + const sticky = available.find((a) => a.id === routeCache.accountId); + if (sticky) return sticky; + } } const untouched = available.filter((a) => { @@ -122,6 +206,14 @@ export function chooseAccount(accounts: Account[]): Account | null { const pool = untouched.length ? untouched : available; const sorted = [...pool].sort((a, b) => { + const ap = a.priority ?? Number.MAX_SAFE_INTEGER; + const bp = b.priority ?? Number.MAX_SAFE_INTEGER; + if (ap !== bp) return ap - bp; + + const al = a.state?.lastSelectedAt ?? 0; + const bl = b.state?.lastSelectedAt ?? 0; + if (al !== bl) return al - bl; + const sa = scoreAccount(a); const sb = scoreAccount(b); if (sa !== sb) return sa - sb; @@ -130,16 +222,14 @@ export function chooseAccount(accounts: Account[]): Account | null { const br = b.usage?.secondary?.resetAt ?? Number.MAX_SAFE_INTEGER; if (ar !== br) return ar - br; - const ap = a.priority ?? Number.MAX_SAFE_INTEGER; - const bp = b.priority ?? Number.MAX_SAFE_INTEGER; - if (ap !== bp) return ap - bp; - return a.id.localeCompare(b.id); }); const winner = sorted[0] ?? null; - routeCache.bucket = bucket; routeCache.accountId = winner?.id; + if (windowMs > 0 && winner) { + routeCache.bucketByWindowMs.set(windowMs, nowBucket(now, windowMs)); + } return winner; } @@ -178,6 +268,7 @@ export async function refreshUsageIfNeeded(account: Account, chatgptBaseUrl: str if (!res.ok) throw new Error(`usage probe failed ${res.status}`); const json = await res.json(); account.usage = parseOpenAIUsage(json); + clearAuthFailureState(account); account.state = { ...account.state, lastError: undefined }; return account; } catch (err: any) { @@ -198,6 +289,22 @@ export function markQuotaHit(account: Account, message: string) { rememberError(account, message); } +export function markAuthFailure(account: Account, message: string) { + account.state = { + ...account.state, + blockedUntil: Date.now() + AUTH_FALLBACK_MS, + blockedReason: message, + needsTokenRefresh: true, + }; + rememberError(account, message); +} + +export function markModelUnsupported(account: Account, message: string) { + const modelMatch = message.match(/for ([^:]+):/); + markModelCompatibility(account, modelMatch?.[1], false, message); + rememberError(account, message); +} + // z.ai business error code categories for smarter handling const ZAI_AUTH_ERRORS = new Set([1000, 1001, 1002, 1003, 1004]); const ZAI_ACCOUNT_ERRORS = new Set([1110, 1111, 1112, 1113, 1120, 1121]); diff --git a/src/routes/admin/index.ts b/src/routes/admin/index.ts index 2c39cbf..9fd1ff8 100644 --- a/src/routes/admin/index.ts +++ b/src/routes/admin/index.ts @@ -2,7 +2,11 @@ import express from "express"; import { randomUUID } from "node:crypto"; import { AccountStore, OAuthStateStore } from "../../store.js"; import type { Account, ModelAlias } from "../../types.js"; -import { normalizeProvider, refreshUsageIfNeeded } from "../../quota.js"; +import { + clearAuthFailureState, + normalizeProvider, + refreshUsageIfNeeded, +} from "../../quota.js"; import { accountFromOAuth, buildAuthorizationUrl, @@ -60,6 +64,102 @@ function sanitizeAliasId(value: unknown): string { .replace(/^-+|-+$/g, ""); } +const ACCOUNT_MUTABLE_KEYS = new Set([ + "id", + "provider", + "email", + "accessToken", + "refreshToken", + "expiresAt", + "chatgptAccountId", + "enabled", + "priority", +]); + +function rejectUnknownKeys( + body: Record, + allowed: Set, +): string | undefined { + const unknown = Object.keys(body).filter((key) => !allowed.has(key)); + if (!unknown.length) return undefined; + return `unknown fields: ${unknown.join(", ")}`; +} + +function parseAccountPatch( + body: Record, + allowId: boolean, +): { patch?: Partial; error?: string } { + const error = rejectUnknownKeys(body, ACCOUNT_MUTABLE_KEYS); + if (error) return { error }; + + const patch: Partial = {}; + if (allowId && typeof body.id !== "undefined") { + if (typeof body.id !== "string" || !body.id.trim()) { + return { error: "id must be a non-empty string" }; + } + patch.id = body.id.trim(); + } + if (typeof body.provider !== "undefined") { + if (body.provider !== "openai" && body.provider !== "mistral") { + return { error: "provider must be openai or mistral" }; + } + patch.provider = body.provider; + } + if (typeof body.email !== "undefined") { + if (typeof body.email !== "string") return { error: "email must be a string" }; + patch.email = body.email.trim() || undefined; + } + if (typeof body.accessToken !== "undefined") { + if (typeof body.accessToken !== "string" || !body.accessToken.trim()) { + return { error: "accessToken must be a non-empty string" }; + } + patch.accessToken = body.accessToken.trim(); + } + if (typeof body.refreshToken !== "undefined") { + if (body.refreshToken !== null && typeof body.refreshToken !== "string") { + return { error: "refreshToken must be a string" }; + } + patch.refreshToken = + typeof body.refreshToken === "string" && body.refreshToken.trim() + ? body.refreshToken.trim() + : undefined; + } + if (typeof body.expiresAt !== "undefined") { + if ( + body.expiresAt !== null && + (!Number.isFinite(Number(body.expiresAt)) || Number(body.expiresAt) < 0) + ) { + return { error: "expiresAt must be a positive number" }; + } + patch.expiresAt = + body.expiresAt === null ? undefined : Number(body.expiresAt); + } + if (typeof body.chatgptAccountId !== "undefined") { + if ( + body.chatgptAccountId !== null && + typeof body.chatgptAccountId !== "string" + ) { + return { error: "chatgptAccountId must be a string" }; + } + patch.chatgptAccountId = + typeof body.chatgptAccountId === "string" && + body.chatgptAccountId.trim() + ? body.chatgptAccountId.trim() + : undefined; + } + if (typeof body.enabled !== "undefined") { + if (typeof body.enabled !== "boolean") return { error: "enabled must be a boolean" }; + patch.enabled = body.enabled; + } + if (typeof body.priority !== "undefined") { + if (!Number.isFinite(Number(body.priority))) { + return { error: "priority must be a finite number" }; + } + patch.priority = Number(body.priority); + } + return { patch }; +} + function normalizeAliasTargets(value: unknown): string[] { if (!Array.isArray(value)) return []; return Array.from( @@ -409,6 +509,7 @@ export function createAdminRouter(options: AdminRoutesOptions) { const globalAgg = createUsageAggregate(); const byAccount = new Map>(); const byRoute = new Map>(); + const bySession = new Map>(); for (const trace of filtered) { addTraceToAggregate(globalAgg, trace); @@ -421,6 +522,16 @@ export function createAdminRouter(options: AdminRoutesOptions) { const routeKey = trace.route ?? "unknown"; if (!byRoute.has(routeKey)) byRoute.set(routeKey, createUsageAggregate()); addTraceToAggregate(byRoute.get(routeKey)!, trace); + + const sessionKey = + typeof trace.sessionId === "string" && trace.sessionId.trim() + ? trace.sessionId.trim() + : ""; + if (sessionKey) { + if (!bySession.has(sessionKey)) + bySession.set(sessionKey, createUsageAggregate()); + addTraceToAggregate(bySession.get(sessionKey)!, trace); + } } const accounts = await store.listAccounts(); @@ -453,6 +564,10 @@ export function createAdminRouter(options: AdminRoutesOptions) { .map(([route, agg]) => ({ route, ...finalizeAggregate(agg) })) .sort((a, b) => b.requests - a.requests); + const bySessionOut = Array.from(bySession.entries()) + .map(([sessionId, agg]) => ({ sessionId, ...finalizeAggregate(agg) })) + .sort((a, b) => b.requests - a.requests); + res.json({ ok: true, filters: { @@ -464,6 +579,7 @@ export function createAdminRouter(options: AdminRoutesOptions) { totals: finalizeAggregate(globalAgg), byAccount: byAccountOut, byRoute: byRouteOut, + bySession: bySessionOut, tracesEvaluated: traces.length, tracesMatched: filtered.length, }); @@ -487,28 +603,34 @@ export function createAdminRouter(options: AdminRoutesOptions) { }); router.post("/accounts", async (req, res) => { - const body = req.body ?? {}; - if (!body.accessToken) + const body = (req.body ?? {}) as Record; + const parsed = parseAccountPatch(body, true); + if (parsed.error) return res.status(400).json({ error: parsed.error }); + if (!parsed.patch?.accessToken) { return res.status(400).json({ error: "accessToken required" }); + } const account: Account = { - id: body.id ?? randomUUID(), - provider: body.provider === "mistral" ? "mistral" : "openai", - email: body.email, - accessToken: body.accessToken, - refreshToken: body.refreshToken, - expiresAt: body.expiresAt, - chatgptAccountId: body.chatgptAccountId, - enabled: body.enabled ?? true, - priority: body.priority ?? 0, - usage: body.usage, - state: body.state, + id: parsed.patch.id ?? randomUUID(), + provider: parsed.patch.provider ?? "openai", + email: parsed.patch.email, + accessToken: parsed.patch.accessToken, + refreshToken: parsed.patch.refreshToken, + expiresAt: parsed.patch.expiresAt, + chatgptAccountId: parsed.patch.chatgptAccountId, + enabled: parsed.patch.enabled ?? true, + priority: parsed.patch.priority ?? 0, + usage: undefined, + state: {}, }; await store.upsertAccount(account); res.json({ ok: true, account: redact(account) }); }); router.patch("/accounts/:id", async (req, res) => { - const updated = await store.patchAccount(req.params.id, req.body ?? {}); + const body = (req.body ?? {}) as Record; + const parsed = parseAccountPatch(body, false); + if (parsed.error) return res.status(400).json({ error: parsed.error }); + const updated = await store.patchAccount(req.params.id, parsed.patch ?? {}); if (!updated) return res.status(404).json({ error: "not found" }); res.json({ ok: true, account: redact(updated) }); }); @@ -625,6 +747,7 @@ export function createAdminRouter(options: AdminRoutesOptions) { } else { account = accountFromOAuth(flow, tokenData); } + clearAuthFailureState(account); account = await refreshUsageIfNeeded(account, openaiBaseUrl, true); await store.upsertAccount(account); await oauthStore.update(flow.id, { diff --git a/src/routes/proxy/index.ts b/src/routes/proxy/index.ts index 02cd16d..1e655c4 100644 --- a/src/routes/proxy/index.ts +++ b/src/routes/proxy/index.ts @@ -1,15 +1,16 @@ import { MAX_ACCOUNT_RETRY_ATTEMPTS, - MAX_UPSTREAM_RETRIES, + MAX_GET_RETRIES, MODELS_CACHE_MS, MODELS_CLIENT_VERSION, + MODEL_DISCOVERY_TIMEOUT_MS, PI_USER_AGENT, PROXY_MODELS, + RETRY_BASE_DELAY_MS, TRACE_INCLUDE_BODY, - TOKEN_REFRESH_MARGIN_MS, - UPSTREAM_BASE_DELAY_MS, UPSTREAM_PATH, UPSTREAM_COMPACT_PATH, + UPSTREAM_REQUEST_TIMEOUT_MS, ZAI_BASE_URL, ZAI_UPSTREAM_PATH, ZAI_COMPACT_UPSTREAM_PATH, @@ -31,7 +32,12 @@ import { } from "../../responses/payloads.js"; import { chooseAccountForProvider, + accountSupportsModel, + clearAuthFailureState, isQuotaErrorText, + markModelCompatibility, + markAuthFailure, + markModelUnsupported, markQuotaHit, normalizeProvider, refreshUsageIfNeeded, @@ -39,6 +45,7 @@ import { parseZaiErrorCode, shouldBlockAccountForZaiError, getZaiBlockDuration, + USAGE_CACHE_TTL_MS, } from "../../quota.js"; import { ensureNonEmptyChatCompletion, @@ -66,6 +73,7 @@ type ProxyRoutesOptions = { zaiUpstreamPath: string; zaiCompactUpstreamPath: string; oauthConfig: OAuthConfig; + upstreamRequestTimeoutMs?: number; }; const modelsCache: { at: number; models: ExposedModel[] } = { @@ -86,6 +94,11 @@ const modelsValidationCache: { const MODELS_VALIDATION_CACHE_MS = 60_000; // Refresh every 60 seconds +export function resetDiscoveredModelsCacheForTest() { + modelsCache.at = 0; + modelsCache.models = []; +} + type ExposedModel = { id: string; object: "model"; @@ -253,7 +266,7 @@ async function discoverModels( const url = `${openaiBaseUrl}/backend-api/codex/models?client_version=${encodeURIComponent( MODELS_CLIENT_VERSION, )}`; - const r = await fetch(url, { headers }); + const r = await fetchCodexWithRetry(url, { headers }); if (r.ok) { const json: any = await r.json(); const upstream = Array.isArray(json?.models) ? json.models : []; @@ -278,7 +291,9 @@ async function discoverModels( authorization: `Bearer ${mistralAccount.accessToken}`, accept: "application/json", }; - const r = await fetch(`${mistralBaseUrl}/v1/models`, { headers }); + const r = await fetchCodexWithRetry(`${mistralBaseUrl}/v1/models`, { + headers, + }); if (r.ok) { const json: any = await r.json(); const upstream = Array.isArray(json?.data) ? json.data : []; @@ -468,10 +483,295 @@ function takeNextSSEFrame(buffer: string): SSEFrame { }; } +function frameSignalsResponseCompleted(frame: string): boolean { + return ( + /(?:^|\r?\n)event:\s*response\.completed\b/.test(frame) || + frame.includes('"response.completed"') + ); +} + +function frameSignalsOutputTextDone(frame: string): boolean { + return ( + /(?:^|\r?\n)event:\s*response\.output_text\.done\b/.test(frame) || + frame.includes('"response.output_text.done"') + ); +} + +function frameSignalsResponseTerminal(frame: string): boolean { + return ( + frameSignalsResponseCompleted(frame) || frameSignalsOutputTextDone(frame) + ); +} + +function extractSSEDataPayload(frame: string): any | undefined { + try { + const dataLine = frame + .split(/\r?\n/) + .find((line) => line.trim().startsWith("data:")); + if (!dataLine) return undefined; + return JSON.parse(dataLine.slice(5).trim()); + } catch { + return undefined; + } +} + function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } +function createRequestSignal( + timeoutMs: number, + upstreamAbort?: AbortSignal, +): { signal: AbortSignal; clearTimeout: () => void } { + const controller = new AbortController(); + let timer: NodeJS.Timeout | undefined = setTimeout(() => { + controller.abort(new Error(`request timed out after ${timeoutMs}ms`)); + }, timeoutMs); + const clearTimeoutOnly = () => { + if (!timer) return; + clearTimeout(timer); + timer = undefined; + }; + const onAbort = () => controller.abort(upstreamAbort?.reason); + if (upstreamAbort) { + if (upstreamAbort.aborted) { + controller.abort(upstreamAbort.reason); + } else { + upstreamAbort.addEventListener("abort", onAbort, { once: true }); + } + } + controller.signal.addEventListener( + "abort", + () => { + clearTimeoutOnly(); + if (upstreamAbort) upstreamAbort.removeEventListener("abort", onAbort); + }, + { once: true }, + ); + return { + signal: controller.signal, + clearTimeout: clearTimeoutOnly, + }; +} + +async function readChunkWithInactivityTimeout( + reader: ReadableStreamDefaultReader, + timeoutMs: number, + abortSignal?: AbortSignal, +): Promise> { + return new Promise((resolve, reject) => { + let settled = false; + let timer: NodeJS.Timeout | undefined = setTimeout(() => { + if (settled) return; + settled = true; + cleanup(); + void reader.cancel().catch(() => {}); + reject(new Error(`response stream timed out after ${timeoutMs}ms`)); + }, timeoutMs); + + const cleanup = () => { + if (timer) { + clearTimeout(timer); + timer = undefined; + } + if (abortSignal) abortSignal.removeEventListener("abort", onAbort); + }; + + const onAbort = () => { + if (settled) return; + settled = true; + cleanup(); + void reader.cancel().catch(() => {}); + const reason = abortSignal?.reason; + reject(reason instanceof Error ? reason : new Error(String(reason ?? "aborted"))); + }; + + if (abortSignal) { + if (abortSignal.aborted) { + onAbort(); + return; + } + abortSignal.addEventListener("abort", onAbort, { once: true }); + } + + reader.read().then( + (result) => { + if (settled) return; + settled = true; + cleanup(); + resolve(result); + }, + (error) => { + if (settled) return; + settled = true; + cleanup(); + reject(error); + }, + ); + }); +} + +async function readResponseTextWithInactivityTimeout( + response: Response, + timeoutMs: number, + abortSignal?: AbortSignal, +): Promise { + if (!response.body) return ""; + const reader = response.body.getReader(); + return readReaderTextWithInactivityTimeout( + reader, + new TextDecoder(), + timeoutMs, + abortSignal, + ); +} + +async function readReaderTextWithInactivityTimeout( + reader: ReadableStreamDefaultReader, + decoder: TextDecoder, + timeoutMs: number, + abortSignal?: AbortSignal, + initialText = "", +): Promise { + let text = initialText; + + while (true) { + const { value, done } = await readChunkWithInactivityTimeout( + reader, + timeoutMs, + abortSignal, + ); + if (done) break; + text += decoder.decode(value, { stream: true }); + } + + text += decoder.decode(); + return text; +} + +async function peekResponseTextStart( + response: Response, + timeoutMs: number, + abortSignal?: AbortSignal, +): Promise<{ + reader: ReadableStreamDefaultReader | null; + decoder: TextDecoder; + initialText: string; +}> { + const decoder = new TextDecoder(); + if (!response.body) { + return { reader: null, decoder, initialText: "" }; + } + const reader = response.body.getReader(); + const { value, done } = await readChunkWithInactivityTimeout( + reader, + timeoutMs, + abortSignal, + ); + if (done) { + return { + reader, + decoder, + initialText: decoder.decode(), + }; + } + + return { + reader, + decoder, + initialText: decoder.decode(value, { stream: true }), + }; +} + +function looksLikeSSEPayload(text: string): boolean { + return /(?:^|\r?\n)(event:|data:)\s*/.test(text); +} + +async function readResponsesSSETextUntilTerminalFromReader( + reader: ReadableStreamDefaultReader, + decoder: TextDecoder, + timeoutMs: number, + abortSignal?: AbortSignal, + initialText = "", +): Promise { + let text = initialText; + let sseBuffer = initialText; + let completed = false; + + while (true) { + const { value, done } = await readChunkWithInactivityTimeout( + reader, + timeoutMs, + abortSignal, + ); + if (done) break; + sseBuffer += decoder.decode(value, { stream: true }); + + while (true) { + const next = takeNextSSEFrame(sseBuffer); + if (!next) break; + sseBuffer = next.rest; + text += `${next.frame}\n\n`; + if (frameSignalsResponseTerminal(next.frame)) { + completed = true; + break; + } + } + + if (completed) break; + } + + if (!completed) { + sseBuffer += decoder.decode(); + while (true) { + const next = takeNextSSEFrame(sseBuffer); + if (!next) break; + sseBuffer = next.rest; + text += `${next.frame}\n\n`; + if (frameSignalsResponseTerminal(next.frame)) { + completed = true; + break; + } + } + if (!completed && sseBuffer.trim()) text += sseBuffer; + } + + if (completed) void reader.cancel().catch(() => {}); + return text; +} + +async function readResponsesSSETextUntilTerminal( + response: Response, + timeoutMs: number, + abortSignal?: AbortSignal, +): Promise { + if (!response.body) return ""; + return readResponsesSSETextUntilTerminalFromReader( + response.body.getReader(), + new TextDecoder(), + timeoutMs, + abortSignal, + ); +} + +function isAbortError(error: unknown): boolean { + return ( + error instanceof Error && + (error.name === "AbortError" || /timed out|aborted/i.test(error.message)) + ); +} + +function isDownstreamClientDisconnect( + error: unknown, + abortSignal?: AbortSignal, +): boolean { + return ( + Boolean(abortSignal?.aborted) || + (error instanceof Error && + /downstream client disconnected/i.test(error.message)) + ); +} + function isRetryableUpstreamError(status: number, errorText: string): boolean { if ( status === 429 || @@ -486,34 +786,67 @@ function isRetryableUpstreamError(status: number, errorText: string): boolean { ); } +function isAuthFailure(status: number, errorText: string): boolean { + if (status === 401) return true; + return /token_expired|invalid[_ -]?token|refresh[_ -]?token|unauthorized|auth/i.test( + errorText, + ); +} + +function isModelUnsupported(status: number, errorText: string): boolean { + if (status !== 400 && status !== 404) return false; + return /model.+not supported|unsupported model|does not exist|not available|unknown model/i.test( + errorText, + ); +} + async function fetchCodexWithRetry( url: string, init: RequestInit, + signal?: AbortSignal, ): Promise { let lastError: Error | undefined; - for (let attempt = 0; attempt <= MAX_UPSTREAM_RETRIES; attempt++) { + const maxAttempts = Math.max(0, MAX_GET_RETRIES); + for (let attempt = 0; attempt <= maxAttempts; attempt++) { try { - const response = await fetch(url, init); + const requestSignal = createRequestSignal( + MODEL_DISCOVERY_TIMEOUT_MS, + signal, + ); + const response = await fetch(url, { + ...init, + signal: requestSignal.signal, + }); + requestSignal.clearTimeout(); if (response.ok) return response; const errorText = await response .clone() .text() .catch(() => ""); if ( - attempt < MAX_UPSTREAM_RETRIES && + attempt < maxAttempts && isRetryableUpstreamError(response.status, errorText) ) { - await sleep(UPSTREAM_BASE_DELAY_MS * 2 ** attempt); + await sleep( + Math.floor( + RETRY_BASE_DELAY_MS * 2 ** attempt * (0.5 + Math.random()), + ), + ); continue; } return response; } catch (error: any) { lastError = error instanceof Error ? error : new Error(String(error)); if ( - attempt < MAX_UPSTREAM_RETRIES && - !lastError.message.includes("usage limit") + attempt < maxAttempts && + !lastError.message.includes("usage limit") && + !isAbortError(lastError) ) { - await sleep(UPSTREAM_BASE_DELAY_MS * 2 ** attempt); + await sleep( + Math.floor( + RETRY_BASE_DELAY_MS * 2 ** attempt * (0.5 + Math.random()), + ), + ); continue; } throw lastError; @@ -534,6 +867,7 @@ export function createProxyRouter(options: ProxyRoutesOptions) { zaiUpstreamPath, zaiCompactUpstreamPath, oauthConfig, + upstreamRequestTimeoutMs = UPSTREAM_REQUEST_TIMEOUT_MS, } = options; const { recordTrace } = traceManager; const router = express.Router(); @@ -568,6 +902,12 @@ export function createProxyRouter(options: ProxyRoutesOptions) { // Start background model cache refresh startBackgroundModelRefresh(store, openaiBaseUrl, mistralBaseUrl, zaiBaseUrl); + function refreshUsageInBackground(account: any, usageBaseUrl: string) { + void refreshUsageIfNeeded(account, usageBaseUrl) + .then((refreshed) => store.upsertAccount(refreshed)) + .catch(() => undefined); + } + async function proxyWithRotation( req: express.Request, res: express.Response, @@ -583,6 +923,16 @@ export function createProxyRouter(options: ProxyRoutesOptions) { (req.originalUrl || "").includes("responses/compact"); const clientRequestedStream = Boolean(req.body?.stream); const sessionId = getSessionId(req); + const clientAbort = new AbortController(); + const abortFromClient = () => { + if (!clientAbort.signal.aborted) { + clientAbort.abort(new Error("downstream client disconnected")); + } + }; + req.on("aborted", abortFromClient); + res.on("close", () => { + if (!res.writableEnded) abortFromClient(); + }); let accounts = store.getCachedAccounts(); if (!accounts.length) @@ -595,7 +945,10 @@ let accounts = store.getCachedAccounts(); let usageBaseUrl = openaiBaseUrl; if (provider === "mistral") usageBaseUrl = mistralBaseUrl; else if (provider === "zai") usageBaseUrl = zaiBaseUrl; - await refreshUsageIfNeeded(valid, usageBaseUrl); + const usageFetchedAt = valid.usage?.fetchedAt ?? 0; + if (Date.now() - usageFetchedAt >= USAGE_CACHE_TTL_MS) { + refreshUsageInBackground(valid, usageBaseUrl); + } return valid; }), ); @@ -617,20 +970,24 @@ let accounts = store.getCachedAccounts(); }); } - const discoveredModels = await discoverModels(store, openaiBaseUrl, mistralBaseUrl, zaiBaseUrl); const modelAliases = store.getCachedModelAliases(); const routingCandidates = buildRoutingCandidates( requestModel, - discoveredModels, + modelsCache.models, modelAliases, ); const tried = new Set(); const maxAttempts = Math.min(accounts.length, MAX_ACCOUNT_RETRY_ATTEMPTS); let providerTried = false; + let lastModelUnsupported: + | { status: number; text: string; contentType: string } + | undefined; for (const candidate of routingCandidates) { const providerAccounts = accounts.filter( - (a) => normalizeProvider(a) === candidate.provider, + (a) => + normalizeProvider(a) === candidate.provider && + accountSupportsModel(a, candidate.resolvedModel ?? requestModel), ); if (!providerAccounts.length) continue; providerTried = true; @@ -661,9 +1018,6 @@ let accounts = store.getCachedAccounts(); delete payloadToUpstream.tool_choice; delete payloadToUpstream.parallel_tool_calls; } - if (isResponsesCompactPath && payloadToUpstream && typeof payloadToUpstream === "object") { - delete payloadToUpstream.store; - } if (candidate.resolvedModel) payloadToUpstream.model = candidate.resolvedModel; const requestBody = TRACE_INCLUDE_BODY ? req.body : undefined; const tracedModel = @@ -699,17 +1053,50 @@ let accounts = store.getCachedAccounts(); upstreamBaseUrl = zaiBaseUrl; upstreamPath = isResponsesCompactPath ? zaiCompactUpstreamPath : zaiUpstreamPath; } - const upstream = await fetchCodexWithRetry( - `${upstreamBaseUrl}${upstreamPath}`, - { - method: "POST", - headers, - body: JSON.stringify(payloadToUpstream), - }, + const requestSignal = createRequestSignal( + upstreamRequestTimeoutMs, + clientAbort.signal, ); + const upstream = await fetch(`${upstreamBaseUrl}${upstreamPath}`, { + method: "POST", + headers, + body: JSON.stringify(payloadToUpstream), + signal: requestSignal.signal, + }); + requestSignal.clearTimeout(); const contentType = upstream.headers.get("content-type") ?? ""; - const isStream = contentType.includes("text/event-stream"); + let isStream = contentType.includes("text/event-stream"); + let prefetchedText = ""; + let prefetchedReader: ReadableStreamDefaultReader | null = null; + let prefetchedDecoder: TextDecoder | null = null; + + if ( + upstream.ok && + clientRequestedStream && + !shouldReturnChatCompletions && + !isStream && + upstream.body + ) { + const peeked = await peekResponseTextStart( + upstream, + upstreamRequestTimeoutMs, + clientAbort.signal, + ); + prefetchedText = peeked.initialText; + prefetchedReader = peeked.reader; + prefetchedDecoder = peeked.decoder; + if (looksLikeSSEPayload(prefetchedText)) isStream = true; + } + if (upstream.ok) { + clearAuthFailureState(selected); + markModelCompatibility( + selected, + candidate.resolvedModel ?? requestModel, + true, + ); + await store.upsertAccount(selected); + } if (isStream) { if (shouldReturnChatCompletions && clientRequestedStream) { @@ -728,7 +1115,11 @@ let accounts = store.getCachedAccounts(); let doneSent = false; while (true) { - const { value, done } = await reader.read(); + const { value, done } = await readChunkWithInactivityTimeout( + reader, + upstreamRequestTimeoutMs, + clientAbort.signal, + ); if (done) break; const chunk = decoder.decode(value, { stream: true }); @@ -786,6 +1177,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -799,7 +1191,11 @@ let accounts = store.getCachedAccounts(); } if (shouldReturnChatCompletions) { - const txt = await upstream.text(); + const txt = await readResponsesSSETextUntilTerminal( + upstream, + upstreamRequestTimeoutMs, + clientAbort.signal, + ); const parsedChat = parseResponsesSSEToChatCompletion( txt, req.body?.model ?? payloadToUpstream?.model ?? "unknown", @@ -813,6 +1209,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -829,13 +1226,18 @@ let accounts = store.getCachedAccounts(); } if (!clientRequestedStream) { - const txt = await upstream.text(); + const txt = await readResponsesSSETextUntilTerminal( + upstream, + upstreamRequestTimeoutMs, + clientAbort.signal, + ); const respObj = parseResponsesSSEToResponseObject(txt); res.status(upstream.ok ? 200 : upstream.status).json(respObj); const upstreamError = !upstream.ok ? txt.slice(0, 500) : undefined; recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -852,73 +1254,58 @@ let accounts = store.getCachedAccounts(); res.status(upstream.status); setForwardHeaders(upstream, res); - if (!upstream.body) return res.end(); - const reader = upstream.body.getReader(); - const decoder = new TextDecoder(); + res.flushHeaders(); + const reader = prefetchedReader ?? upstream.body?.getReader() ?? null; + const decoder = prefetchedDecoder ?? new TextDecoder(); + if (!reader) return res.end(); let sseBuffer = ""; let accumulatedUsage: any = null; - while (true) { - const { value, done } = await reader.read(); - if (done) break; - sseBuffer += decoder.decode(value, { stream: true }); + const consumeChunkText = (chunkText: string) => { + if (!chunkText) return; + res.write(chunkText); + sseBuffer += chunkText; while (true) { const next = takeNextSSEFrame(sseBuffer); if (!next) break; sseBuffer = next.rest; - if (next.frame.includes("response.completed")) { - try { - const dataLine = next.frame - .split(/\r?\n/) - .find((line) => line.trim().startsWith("data:")); - if (dataLine) { - const payload = JSON.parse(dataLine.slice(5).trim()); - if (payload?.response?.usage) { - accumulatedUsage = payload.response.usage; - } - } - } catch {} + const payload = extractSSEDataPayload(next.frame); + if (payload?.type === "response.completed") { + if (payload?.response?.usage) { + accumulatedUsage = payload.response.usage; + } + continue; + } + if ( + payload?.type === "response.output_text.done" && + typeof payload?.text === "string" + ) { + continue; } - - const filtered = sanitizeResponsesSSEFrame(next.frame); - if (filtered !== null) res.write(`${filtered}\n\n`); } - } + }; - sseBuffer += decoder.decode(); - while (true) { - const next = takeNextSSEFrame(sseBuffer); - if (!next) break; - sseBuffer = next.rest; - - if (next.frame.includes("response.completed")) { - try { - const dataLine = next.frame - .split(/\r?\n/) - .find((line) => line.trim().startsWith("data:")); - if (dataLine) { - const payload = JSON.parse(dataLine.slice(5).trim()); - if (payload?.response?.usage) { - accumulatedUsage = payload.response.usage; - } - } - } catch {} - } + consumeChunkText(prefetchedText); - const filtered = sanitizeResponsesSSEFrame(next.frame); - if (filtered !== null) res.write(`${filtered}\n\n`); - } - if (sseBuffer.trim()) { - const filtered = sanitizeResponsesSSEFrame(sseBuffer); - if (filtered !== null) res.write(`${filtered}\n\n`); + while (true) { + const { value, done } = await readChunkWithInactivityTimeout( + reader, + upstreamRequestTimeoutMs, + clientAbort.signal, + ); + if (done) break; + consumeChunkText(decoder.decode(value, { stream: true })); } + + consumeChunkText(decoder.decode()); res.end(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -933,7 +1320,11 @@ let accounts = store.getCachedAccounts(); let bufferedText: string | undefined = undefined; if (shouldReturnChatCompletions && clientRequestedStream) { - let raw = await upstream.text(); + let raw = await readResponseTextWithInactivityTimeout( + upstream, + upstreamRequestTimeoutMs, + clientAbort.signal, + ); const upstreamEmptyBody = !raw; if (!raw) raw = JSON.stringify({ @@ -960,6 +1351,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -981,7 +1373,7 @@ let accounts = store.getCachedAccounts(); req.body?.model ?? payloadToUpstream?.model ?? "unknown", ); res.status(200); - res.set("Content-Type", "text.event-stream"); + res.set("Content-Type", "text/event-stream"); res.set("Cache-Control", "no-cache"); res.set("Connection", "keep-alive"); res.write(chatCompletionObjectToSSE(converted)); @@ -990,6 +1382,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -1006,7 +1399,21 @@ let accounts = store.getCachedAccounts(); } } - let text = bufferedText ?? (await upstream.text()); + let text = + bufferedText ?? + (prefetchedReader && prefetchedDecoder + ? await readReaderTextWithInactivityTimeout( + prefetchedReader, + prefetchedDecoder, + upstreamRequestTimeoutMs, + clientAbort.signal, + prefetchedText, + ) + : await readResponseTextWithInactivityTimeout( + upstream, + upstreamRequestTimeoutMs, + clientAbort.signal, + )); const upstreamEmptyBody = !text; if (!text) text = JSON.stringify({ @@ -1061,6 +1468,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -1095,6 +1503,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -1145,6 +1554,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -1175,6 +1585,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -1196,6 +1607,7 @@ let accounts = store.getCachedAccounts(); recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -1212,15 +1624,24 @@ let accounts = store.getCachedAccounts(); return; } - res.status(upstream.status); - setForwardHeaders(upstream, res); - res.type(contentType || "application/json").send(text); - const usage = extractUsageFromPayload(parsed); + const quotaFailure = + upstream.status === 429 || isQuotaErrorText(text); + const authFailure = isAuthFailure(upstream.status, text); + const modelUnsupported = isModelUnsupported(upstream.status, text); + const shouldRotateAccount = + !upstream.ok && + (quotaFailure || authFailure || modelUnsupported); + + if (!shouldRotateAccount) { + res.status(upstream.status); + res.type(contentType || "application/json").send(text); + } recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, @@ -1252,11 +1673,31 @@ let accounts = store.getCachedAccounts(); continue; } - if (upstream.status === 429 || isQuotaErrorText(text)) { + if (quotaFailure) { markQuotaHit(selected, `quota/rate-limit: ${upstream.status}`); await store.upsertAccount(selected); continue; } + if (authFailure) { + markAuthFailure(selected, `auth failure: ${upstream.status}`); + await store.upsertAccount(selected); + continue; + } + if (modelUnsupported) { + const failedModel = + candidate.resolvedModel ?? requestModel ?? "unknown-model"; + lastModelUnsupported = { + status: upstream.status, + text, + contentType, + }; + markModelUnsupported( + selected, + `model unsupported for ${failedModel}: ${upstream.status}`, + ); + await store.upsertAccount(selected); + continue; + } rememberError( selected, @@ -1266,26 +1707,67 @@ let accounts = store.getCachedAccounts(); return; } catch (err: any) { const msg = err?.message ?? String(err); - rememberError(selected, msg); - await store.upsertAccount(selected); + const downstreamClientDisconnected = isDownstreamClientDisconnect( + err, + clientAbort.signal, + ); + const status = downstreamClientDisconnected ? 499 : 599; + if (!downstreamClientDisconnected) { + rememberError(selected, msg); + await store.upsertAccount(selected); + } recordTrace({ at: Date.now(), route: req.path, + sessionId, accountId: selected.id, accountEmail: selected.email, model: tracedModel, - status: 599, + status, stream: false, latencyMs: Date.now() - startedAt, error: msg, requestBody, + isError: downstreamClientDisconnected ? false : undefined, }); + if (downstreamClientDisconnected) return; + if (isAbortError(err)) { + if (clientRequestedStream) { + if (!res.writableEnded) { + if (shouldReturnChatCompletions) { + res.write("data: [DONE]\n\n"); + } + res.end(); + } + return; + } + if (res.headersSent) { + if (!res.writableEnded) { + if (shouldReturnChatCompletions && clientRequestedStream) { + res.write("data: [DONE]\n\n"); + } + res.end(); + } + return; + } + return res.status(504).json({ error: "upstream request timed out" }); + } + if (res.headersSent && !res.writableEnded) { + res.end(); + return; + } } } } if (!providerTried) { return res.status(503).json({ error: "no provider accounts configured for requested model" }); } + if (lastModelUnsupported) { + return res + .status(lastModelUnsupported.status) + .type(lastModelUnsupported.contentType || "application/json") + .send(lastModelUnsupported.text); + } res.status(429).json({ error: "all accounts exhausted or unavailable" }); } diff --git a/src/runtime.ts b/src/runtime.ts new file mode 100644 index 0000000..57abc59 --- /dev/null +++ b/src/runtime.ts @@ -0,0 +1,312 @@ +import express from "express"; +import http from "node:http"; +import path from "node:path"; +import fs from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import { AccountStore, OAuthStateStore } from "./store.js"; +import { createTraceManager } from "./traces.js"; +import { createAdminRouter } from "./routes/admin/index.js"; +import { createProxyRouter } from "./routes/proxy/index.js"; +import { createOAuthCallbackServer } from "./oauth-callback-server.js"; +import { oauthConfig as defaultOAuthConfig } from "./oauth-config.js"; +import type { OAuthConfig } from "./oauth.js"; +import { + ADMIN_TOKEN, + CHATGPT_BASE_URL, + HOST, + MISTRAL_BASE_URL, + MISTRAL_COMPACT_UPSTREAM_PATH, + MISTRAL_UPSTREAM_PATH, + OAUTH_CALLBACK_BIND_HOST, + OAUTH_STATE_PATH, + PORT, + SERVER_HEADERS_TIMEOUT_MS, + SERVER_KEEP_ALIVE_TIMEOUT_MS, + SERVER_REQUEST_TIMEOUT_MS, + SHUTDOWN_GRACE_MS, + STORE_ENCRYPTION_KEY, + STORE_PATH, + TRACE_FILE_PATH, + TRACE_STATS_HISTORY_PATH, + UPSTREAM_PATH, +} from "./config.js"; + +type RuntimeOptions = { + host?: string; + port?: number; + adminToken?: string; + storePath?: string; + oauthStatePath?: string; + traceFilePath?: string; + traceStatsHistoryPath?: string; + openaiBaseUrl?: string; + mistralBaseUrl?: string; + mistralUpstreamPath?: string; + mistralCompactUpstreamPath?: string; + oauthConfig?: OAuthConfig; + oauthCallbackBindHost?: string; + installSignalHandlers?: boolean; + encryptionKey?: string; + upstreamRequestTimeoutMs?: number; +}; + +function isLoopbackHost(host: string): boolean { + return ( + host === "127.0.0.1" || + host === "::1" || + host === "localhost" + ); +} + +export async function createRuntime(options: RuntimeOptions = {}) { + const host = options.host ?? HOST; + const port = options.port ?? PORT; + const adminToken = options.adminToken ?? ADMIN_TOKEN; + const storePath = options.storePath ?? STORE_PATH; + const oauthStatePath = options.oauthStatePath ?? OAUTH_STATE_PATH; + const traceFilePath = options.traceFilePath ?? TRACE_FILE_PATH; + const traceStatsHistoryPath = + options.traceStatsHistoryPath ?? TRACE_STATS_HISTORY_PATH; + const openaiBaseUrl = options.openaiBaseUrl ?? CHATGPT_BASE_URL; + const mistralBaseUrl = options.mistralBaseUrl ?? MISTRAL_BASE_URL; + const mistralUpstreamPath = + options.mistralUpstreamPath ?? MISTRAL_UPSTREAM_PATH; + const mistralCompactUpstreamPath = + options.mistralCompactUpstreamPath ?? MISTRAL_COMPACT_UPSTREAM_PATH; + const oauthConfig = options.oauthConfig ?? defaultOAuthConfig; + const oauthCallbackBindHost = + options.oauthCallbackBindHost ?? OAUTH_CALLBACK_BIND_HOST; + const encryptionKey = options.encryptionKey ?? STORE_ENCRYPTION_KEY; + const upstreamRequestTimeoutMs = options.upstreamRequestTimeoutMs; + + if (!isLoopbackHost(host) && !adminToken) { + throw new Error("ADMIN_TOKEN is required when binding off loopback"); + } + + const app = express(); + app.disable("x-powered-by"); + app.use(express.json({ limit: "20mb" })); + const oauthCallbackServer = createOAuthCallbackServer(oauthConfig.redirectUri); + + const store = new AccountStore(storePath, encryptionKey || undefined); + const oauthStore = new OAuthStateStore( + oauthStatePath, + encryptionKey || undefined, + ); + await store.init(); + await oauthStore.init(); + await fs.mkdir(path.dirname(traceFilePath), { recursive: true }); + + const traceManager = createTraceManager({ + filePath: traceFilePath, + historyFilePath: traceStatsHistoryPath, + }); + + let ready = false; + let shuttingDown = false; + + function adminGuard( + req: express.Request, + res: express.Response, + next: express.NextFunction, + ) { + if (!adminToken) return next(); + const token = + req.header("x-admin-token") || + req.header("authorization")?.replace(/^Bearer\s+/i, ""); + if (token !== adminToken) + return res.status(401).json({ error: "unauthorized" }); + next(); + } + + app.get("/health", (_req, res) => + res.json({ + ok: true, + ready, + shuttingDown, + version: process.env.APP_VERSION ?? "unknown", + gitSha: process.env.APP_GIT_SHA ?? "unknown", + buildId: process.env.APP_BUILD_ID ?? "unknown", + }), + ); + + app.get("/ready", (_req, res) => { + if (!ready || shuttingDown) { + return res.status(503).json({ ok: false, ready, shuttingDown }); + } + return res.json({ ok: true, ready: true }); + }); + + const adminRouter = createAdminRouter({ + store, + oauthStore, + traceManager, + oauthConfig, + openaiBaseUrl, + mistralBaseUrl, + storagePaths: { + accountsPath: storePath, + oauthStatePath, + tracePath: traceFilePath, + traceStatsHistoryPath, + }, + }); + + const proxyRouter = createProxyRouter({ + store, + traceManager, + openaiBaseUrl, + mistralBaseUrl, + mistralUpstreamPath, + mistralCompactUpstreamPath, + oauthConfig, + upstreamRequestTimeoutMs, + }); + + app.use("/admin", adminGuard, adminRouter); + app.use("/v1", proxyRouter); + + const __dirname = path.dirname(fileURLToPath(import.meta.url)); + const webDist = path.resolve(__dirname, "../web-dist"); + app.use(express.static(webDist)); + app.get("*", (req, res, next) => { + if ( + req.path.startsWith("/admin/") || + req.path.startsWith("/v1/") || + req.path === "/health" || + req.path === "/ready" + ) { + return next(); + } + res.sendFile(path.join(webDist, "index.html"), (err) => { + if (err) next(err); + }); + }); + + app.use( + ( + err: unknown, + _req: express.Request, + res: express.Response, + _next: express.NextFunction, + ) => { + console.error(err); + if (res.headersSent) return; + res.status(500).json({ error: "internal server error" }); + }, + ); + + const server = http.createServer(app); + server.headersTimeout = SERVER_HEADERS_TIMEOUT_MS; + server.keepAliveTimeout = SERVER_KEEP_ALIVE_TIMEOUT_MS; + server.requestTimeout = SERVER_REQUEST_TIMEOUT_MS; + + async function start() { + try { + await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(port, host, () => { + server.off("error", reject); + resolve(); + }); + }); + + if (oauthCallbackServer) { + const callbackUrl = new URL(oauthConfig.redirectUri); + await new Promise((resolve, reject) => { + oauthCallbackServer.once("error", reject); + oauthCallbackServer.listen( + Number(callbackUrl.port), + oauthCallbackBindHost || callbackUrl.hostname, + () => { + oauthCallbackServer.off("error", reject); + resolve(); + }, + ); + }); + } + + ready = true; + } catch (err) { + server.closeIdleConnections(); + server.closeAllConnections(); + await new Promise((resolve) => server.close(() => resolve())); + if (oauthCallbackServer) { + oauthCallbackServer.closeAllConnections?.(); + await new Promise((resolve) => oauthCallbackServer.close(() => resolve())); + } + throw err; + } + } + + async function shutdown() { + if (shuttingDown) return; + shuttingDown = true; + ready = false; + await new Promise((resolve) => { + const force = setTimeout(() => { + server.closeAllConnections(); + resolve(); + }, SHUTDOWN_GRACE_MS); + server.close(() => { + clearTimeout(force); + resolve(); + }); + server.closeIdleConnections(); + }); + if (oauthCallbackServer?.listening) { + await new Promise((resolve) => { + const force = setTimeout(() => { + oauthCallbackServer.closeAllConnections?.(); + resolve(); + }, SHUTDOWN_GRACE_MS); + oauthCallbackServer.close(() => { + clearTimeout(force); + resolve(); + }); + }); + } + await store.flushIfDirty(); + await traceManager.compactTraceStorageIfNeeded(); + } + + if (options.installSignalHandlers ?? true) { + const handleSignal = () => { + shutdown() + .catch((err) => { + console.error(err); + }) + .finally(() => { + process.exit(0); + }); + }; + process.once("SIGTERM", handleSignal); + process.once("SIGINT", handleSignal); + } + + return { + app, + server, + store, + oauthStore, + traceManager, + oauthCallbackServer, + start, + shutdown, + state: () => ({ ready, shuttingDown }), + config: { + host, + port, + storePath, + oauthStatePath, + traceFilePath, + traceStatsHistoryPath, + openaiBaseUrl, + mistralBaseUrl, + mistralUpstreamPath, + mistralCompactUpstreamPath, + oauthConfig, + oauthCallbackBindHost, + }, + }; +} diff --git a/src/server.ts b/src/server.ts index f5b504d..1c23497 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,130 +1,17 @@ -import express from "express"; -import path from "node:path"; -import fs from "node:fs/promises"; -import { fileURLToPath } from "node:url"; -import { AccountStore, OAuthStateStore, cleanupOrphanedTmpFiles } from "./store.js"; -import { createTraceManager } from "./traces.js"; -import { createAdminRouter } from "./routes/admin/index.js"; -import { createProxyRouter } from "./routes/proxy/index.js"; -import { installResponsesWebsocketProxy } from "./websocket-responses.js"; -import { oauthConfig } from "./oauth-config.js"; -import { - ADMIN_TOKEN, - CHATGPT_BASE_URL, - MISTRAL_BASE_URL, - MISTRAL_UPSTREAM_PATH, - MISTRAL_COMPACT_UPSTREAM_PATH, - ZAI_BASE_URL, - ZAI_UPSTREAM_PATH, - ZAI_COMPACT_UPSTREAM_PATH, - STORE_PATH, - TRACE_FILE_PATH, - TRACE_STATS_HISTORY_PATH, - UPSTREAM_PATH, - OAUTH_STATE_PATH, - PORT, -} from "./config.js"; -import { createBodyParserMiddleware } from "./middleware/decompression.js"; -import http from "node:http"; +import { createRuntime } from "./runtime.js"; -const app = express(); -app.use(createBodyParserMiddleware()); - -const dataDir = path.dirname(STORE_PATH); -await cleanupOrphanedTmpFiles(dataDir); - -const store = new AccountStore(STORE_PATH); -const oauthStore = new OAuthStateStore(OAUTH_STATE_PATH); -await store.init(); -await oauthStore.init(); -await fs.mkdir(path.dirname(TRACE_FILE_PATH), { recursive: true }); - -const traceManager = createTraceManager({ - filePath: TRACE_FILE_PATH, - historyFilePath: TRACE_STATS_HISTORY_PATH, -}); - -const adminRouter = createAdminRouter({ - store, - oauthStore, - traceManager, - oauthConfig, - openaiBaseUrl: CHATGPT_BASE_URL, - mistralBaseUrl: MISTRAL_BASE_URL, - zaiBaseUrl: ZAI_BASE_URL, - storagePaths: { - accountsPath: STORE_PATH, - oauthStatePath: OAUTH_STATE_PATH, - tracePath: TRACE_FILE_PATH, - traceStatsHistoryPath: TRACE_STATS_HISTORY_PATH, - }, -}); - -const proxyRouter = createProxyRouter({ - store, - traceManager, - openaiBaseUrl: CHATGPT_BASE_URL, - mistralBaseUrl: MISTRAL_BASE_URL, - mistralUpstreamPath: MISTRAL_UPSTREAM_PATH, - mistralCompactUpstreamPath: MISTRAL_COMPACT_UPSTREAM_PATH, - zaiBaseUrl: ZAI_BASE_URL, - zaiUpstreamPath: ZAI_UPSTREAM_PATH, - zaiCompactUpstreamPath: ZAI_COMPACT_UPSTREAM_PATH, - oauthConfig, -}); - -function adminGuard( - req: express.Request, - res: express.Response, - next: express.NextFunction, -) { - if (!ADMIN_TOKEN) return next(); - const token = - req.header("x-admin-token") || - req.header("authorization")?.replace(/^Bearer\s+/i, ""); - if (token !== ADMIN_TOKEN) - return res.status(401).json({ error: "unauthorized" }); - next(); -} - -const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const webDist = path.resolve(__dirname, "../web-dist"); - -app.get("/health", (_req, res) => - res.json({ - ok: true, - version: process.env.APP_VERSION ?? "unknown", - gitSha: process.env.APP_GIT_SHA ?? "unknown", - buildId: process.env.APP_BUILD_ID ?? "unknown", - }), -); - -app.use("/admin", adminGuard, adminRouter); -app.use("/v1", proxyRouter); - -app.use(express.static(webDist)); -app.get("*", (req, res, next) => { - if ( - req.path.startsWith("/admin/") || - req.path.startsWith("/v1/") || - req.path === "/health" - ) - return next(); - res.sendFile(path.join(webDist, "index.html"), (err) => { - if (err) next(); - }); -}); - -const server = http.createServer(app); - -installResponsesWebsocketProxy({ - server, - port: PORT, -}); - -server.listen(PORT, () => { - console.log(`multivibe listening on :${PORT}`); +async function main() { + const runtime = await createRuntime({ installSignalHandlers: true }); + await runtime.start(); + console.log( + `multivibe listening on ${runtime.config.host}:${runtime.config.port}`, + ); console.log( - `store=${STORE_PATH} oauth=${OAUTH_STATE_PATH} trace=${TRACE_FILE_PATH} traceStats=${TRACE_STATS_HISTORY_PATH} redirect=${oauthConfig.redirectUri} openaiUpstream=${CHATGPT_BASE_URL}${UPSTREAM_PATH} mistralUpstream=${MISTRAL_BASE_URL}${MISTRAL_UPSTREAM_PATH} zaiUpstream=${ZAI_BASE_URL}${ZAI_UPSTREAM_PATH}`, + `store=${runtime.config.storePath} oauth=${runtime.config.oauthStatePath} trace=${runtime.config.traceFilePath} traceStats=${runtime.config.traceStatsHistoryPath} redirect=${runtime.config.oauthConfig.redirectUri} openaiUpstream=${runtime.config.openaiBaseUrl} mistralUpstream=${runtime.config.mistralBaseUrl}${runtime.config.mistralUpstreamPath}`, ); +} + +main().catch((err) => { + console.error(err); + process.exit(1); }); diff --git a/src/store.ts b/src/store.ts index 5febc24..1c84225 100644 --- a/src/store.ts +++ b/src/store.ts @@ -9,22 +9,34 @@ import type { StoreFile, } from "./types.js"; import { ACCOUNT_FLUSH_INTERVAL_MS } from "./config.js"; +import { decryptJson, encryptJson, looksEncryptedJson } from "./crypto.js"; const DEFAULT_FILE: StoreFile = { accounts: [], modelAliases: [] }; const DEFAULT_OAUTH_FILE: OAuthStateFile = { states: [] }; -async function ensureFile(filePath: string, seed: object) { +async function ensureFile( + filePath: string, + seed: object, + encryptionKey?: string, +) { await fs.mkdir(path.dirname(filePath), { recursive: true }); try { await fs.access(filePath); } catch { - await writeJsonAtomic(filePath, seed); + await writeJsonAtomic(filePath, seed, encryptionKey); } } -async function writeJsonAtomic(filePath: string, data: unknown): Promise { +async function writeJsonAtomic( + filePath: string, + data: unknown, + encryptionKey?: string, +): Promise { const tmp = `${filePath}.tmp-${randomUUID()}`; - await fs.writeFile(tmp, JSON.stringify(data, null, 2)); + const payload = encryptionKey + ? encryptJson(data, encryptionKey) + : JSON.stringify(data, null, 2); + await fs.writeFile(tmp, payload, { mode: 0o600 }); await fs.rename(tmp, filePath); } @@ -38,27 +50,58 @@ export async function cleanupOrphanedTmpFiles(dataDir: string): Promise { ); } +async function readJsonFile( + filePath: string, + encryptionKey?: string, +): Promise { + const raw = await fs.readFile(filePath, "utf8"); + if (looksEncryptedJson(raw)) { + if (!encryptionKey) { + throw new Error(`encrypted file requires STORE_ENCRYPTION_KEY: ${filePath}`); + } + return decryptJson(raw, encryptionKey); + } + return JSON.parse(raw) as T; +} + export class AccountStore { private inMemoryAccounts: Account[] = []; private inMemoryModelAliases: ModelAlias[] = []; private dirty = false; private flushTimer: NodeJS.Timeout | null = null; + private lastLoadedMtimeMs = 0; - constructor(private filePath: string) {} + constructor( + private filePath: string, + private encryptionKey?: string, + ) {} async init() { - await ensureFile(this.filePath, DEFAULT_FILE); + await ensureFile(this.filePath, DEFAULT_FILE, this.encryptionKey); await this.reloadFromDisk(); } private async reloadFromDisk() { - const raw = await fs.readFile(this.filePath, "utf8"); - const data = JSON.parse(raw) as StoreFile; + const data = await readJsonFile(this.filePath, this.encryptionKey); this.inMemoryAccounts = Array.isArray(data.accounts) ? data.accounts : []; this.inMemoryModelAliases = Array.isArray(data.modelAliases) ? data.modelAliases : []; this.dirty = false; + const stat = await fs.stat(this.filePath); + this.lastLoadedMtimeMs = stat.mtimeMs; + } + + private async reloadFromDiskIfChanged() { + if (this.dirty) return; + try { + const stat = await fs.stat(this.filePath); + if (stat.mtimeMs > this.lastLoadedMtimeMs) { + await this.reloadFromDisk(); + } + } catch { + // best-effort external reload + } } private scheduleFlush() { @@ -74,8 +117,12 @@ export class AccountStore { await writeJsonAtomic(this.filePath, { accounts: this.inMemoryAccounts, modelAliases: this.inMemoryModelAliases, - }); + }, this.encryptionKey); this.dirty = false; + try { + const stat = await fs.stat(this.filePath); + this.lastLoadedMtimeMs = stat.mtimeMs; + } catch {} if (this.flushTimer) { clearTimeout(this.flushTimer); this.flushTimer = null; @@ -136,6 +183,7 @@ export class AccountStore { } async listAccounts(): Promise { + await this.reloadFromDiskIfChanged(); return this.getCachedAccounts(); } @@ -151,6 +199,7 @@ export class AccountStore { } async listModelAliases(): Promise { + await this.reloadFromDiskIfChanged(); return this.getCachedModelAliases(); } @@ -191,19 +240,21 @@ export class AccountStore { } export class OAuthStateStore { - constructor(private filePath: string) {} + constructor( + private filePath: string, + private encryptionKey?: string, + ) {} async init() { - await ensureFile(this.filePath, DEFAULT_OAUTH_FILE); + await ensureFile(this.filePath, DEFAULT_OAUTH_FILE, this.encryptionKey); } private async read(): Promise { - const raw = await fs.readFile(this.filePath, "utf8"); - return JSON.parse(raw) as OAuthStateFile; + return readJsonFile(this.filePath, this.encryptionKey); } private async write(data: OAuthStateFile): Promise { - await writeJsonAtomic(this.filePath, data); + await writeJsonAtomic(this.filePath, data, this.encryptionKey); } async create(state: OAuthFlowState) { diff --git a/src/traces.ts b/src/traces.ts index e45cba3..051bac7 100644 --- a/src/traces.ts +++ b/src/traces.ts @@ -2,11 +2,13 @@ import { estimateCostUsd } from "./model-pricing.js"; import fs from "node:fs/promises"; import { randomUUID } from "node:crypto"; import path from "node:path"; +import { TRACE_COMPACTION_INTERVAL } from "./config.js"; export type TraceEntry = { id: string; at: number; route: string; + sessionId?: string; accountId?: string; accountEmail?: string; model?: string; @@ -87,6 +89,7 @@ export type UsageAggregate = { promptTokens: number; completionTokens: number; totalTokens: number; + costUsd: number; statusCounts: Record; firstAt?: number; lastAt?: number; @@ -189,6 +192,10 @@ function normalizeTrace(raw: any): TraceEntry | null { : `${at}-${route}-${status}`, at, route, + sessionId: + typeof raw.sessionId === "string" && raw.sessionId.trim() + ? raw.sessionId.trim() + : undefined, accountId: typeof raw.accountId === "string" ? raw.accountId : undefined, accountEmail: typeof raw.accountEmail === "string" ? raw.accountEmail : undefined, @@ -258,6 +265,7 @@ function createUsageAggregate(): UsageAggregate { promptTokens: 0, completionTokens: 0, totalTokens: 0, + costUsd: 0, statusCounts: {}, }; } @@ -266,6 +274,14 @@ function addTraceToAggregate(agg: UsageAggregate, trace: TraceEntry) { const status = Number(trace.status); const statusKey = Number.isFinite(status) ? String(status) : "unknown"; const tokens = usageToTokens(trace.usage); + const costUsd = + typeof trace.costUsd === "number" + ? trace.costUsd + : estimateCostUsd( + trace.model, + trace.tokensInput ?? 0, + trace.tokensOutput ?? 0, + ) ?? 0; agg.requests += 1; if (status >= 200 && status < 400) agg.ok += 1; @@ -281,6 +297,7 @@ function addTraceToAggregate(agg: UsageAggregate, trace: TraceEntry) { agg.completionTokens += tokens.completionTokens; agg.totalTokens += tokens.totalTokens; } + agg.costUsd += costUsd; if (typeof trace.at === "number") { agg.firstAt = @@ -320,6 +337,7 @@ function finalizeAggregate(agg: UsageAggregate) { completion: agg.completionTokens, total: agg.totalTokens, }, + costUsd: Math.round(agg.costUsd * 1_000_000) / 1_000_000, statusCounts: agg.statusCounts, firstAt: agg.firstAt, lastAt: agg.lastAt, @@ -529,6 +547,8 @@ export function createTraceManager(config: TraceManagerConfig) { const statsBuckets = new Map(); let totalStored = 0; let cacheInit: Promise | null = null; + let appendSinceCompaction = 0; + let compactionQueued = false; async function ensureParentDir(file: string) { await fs.mkdir(path.dirname(file), { recursive: true }); @@ -538,12 +558,40 @@ export function createTraceManager(config: TraceManagerConfig) { try { const raw = await fs.readFile(filePath, "utf8"); const parsed: TraceEntry[] = []; - for (const line of raw.split("\n")) { - if (!line.trim()) continue; - try { - const normalized = normalizeTrace(JSON.parse(line)); - if (normalized) parsed.push(normalized); - } catch {} + const fileHandle = await fs.open(filePath, 'r'); + let position = 0; + let buffer = Buffer.alloc(65536); // 64KB buffer + let remaining = ''; + + try { + while (true) { + const { bytesRead } = await fileHandle.read(buffer, 0, buffer.length, position); + if (bytesRead === 0) break; + + position += bytesRead; + const chunk = remaining + buffer.toString('utf8', 0, bytesRead); + const lines = chunk.split('\n'); + remaining = lines.pop() || ''; + + for (const line of lines) { + if (!line.trim()) continue; + try { + const normalized = normalizeTrace(JSON.parse(line)); + if (normalized) parsed.push(normalized); + } catch {} + } + } + + // Process any remaining data + if (remaining.trim()) { + try { + const normalized = normalizeTrace(JSON.parse(remaining)); + if (normalized) parsed.push(normalized); + } catch {} + } + + } finally { + await fileHandle.close(); } return parsed.slice(-retentionMax); } catch { @@ -627,6 +675,12 @@ export function createTraceManager(config: TraceManagerConfig) { await fs.rename(tmp, filePath); } + async function appendTraceLine(entry: TraceEntry): Promise { + const json = JSON.stringify(entry); + if (json.length > 1024 * 1024) return; + await fs.appendFile(filePath, `${json}\n`, "utf8"); + } + function toStatsHistoryEntry(entry: TraceEntry): TraceEntry { const { requestBody: _requestBody, @@ -853,17 +907,33 @@ export function createTraceManager(config: TraceManagerConfig) { }; } + function queueCompactionIfNeeded() { + if (compactionQueued) return; + if (traceCache.length <= retentionMax && appendSinceCompaction < TRACE_COMPACTION_INTERVAL) { + return; + } + compactionQueued = true; + traceWriteQueue = traceWriteQueue.then(async () => { + try { + await writeTraceWindow(traceCache.slice(-retentionMax)); + appendSinceCompaction = 0; + } finally { + compactionQueued = false; + } + }); + } + async function appendTrace( entry: Omit< TraceEntry, - "id" | "isError" | "tokensInput" | "tokensOutput" | "tokensTotal" - >, + "id" | "tokensInput" | "tokensOutput" | "tokensTotal" | "isError" + > & { isError?: boolean }, ) { const normalizedTokens = normalizeTokenFields(entry.usage); const finalEntry: TraceEntry = { ...entry, id: randomUUID(), - isError: entry.status >= 400, + isError: entry.isError ?? entry.status >= 400, tokensInput: normalizedTokens.tokensInput, tokensOutput: normalizedTokens.tokensOutput, tokensTotal: normalizedTokens.tokensTotal, @@ -881,8 +951,9 @@ export function createTraceManager(config: TraceManagerConfig) { if (traceCache.length > retentionMax) { traceCache.splice(0, traceCache.length - retentionMax); } - await ensureParentDir(filePath); - await fs.appendFile(filePath, line, "utf8"); + appendSinceCompaction += 1; + await appendTraceLine(finalEntry); + queueCompactionIfNeeded(); }); traceWriteQueue = run.catch(() => undefined); await Promise.all([run, appendStatsHistory(finalEntry)]); diff --git a/src/types.ts b/src/types.ts index ccd1096..1d3e3a6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -24,6 +24,16 @@ export type AccountState = { recentErrors?: AccountError[]; needsTokenRefresh?: boolean; lastUsageRefreshAt?: number; + refreshBlockedUntil?: number; + refreshFailureCount?: number; + modelAvailability?: Record< + string, + { + supported: boolean; + checkedAt: number; + reason?: string; + } + >; }; export type Account = { diff --git a/test/admin-validation.test.js b/test/admin-validation.test.js new file mode 100644 index 0000000..f89f95f --- /dev/null +++ b/test/admin-validation.test.js @@ -0,0 +1,38 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import path from "node:path"; +import { createTempDir, startRuntime, writeJson } from "./helpers.js"; + +test("admin account endpoints reject unknown fields", async () => { + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { accounts: [], modelAliases: [] }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + }); + + try { + const res = await fetch(`${runtime.baseUrl}/admin/accounts`, { + method: "POST", + headers: { + "content-type": "application/json", + "x-admin-token": "test-admin", + }, + body: JSON.stringify({ + id: "x", + accessToken: "token", + enabled: true, + hackedField: true, + }), + }); + + assert.equal(res.status, 400); + const body = await res.json(); + assert.match(body.error, /unknown fields/i); + } finally { + await runtime.close(); + } +}); diff --git a/test/helpers.js b/test/helpers.js new file mode 100644 index 0000000..b331fcb --- /dev/null +++ b/test/helpers.js @@ -0,0 +1,79 @@ +import { mkdtemp, mkdir, writeFile } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import http from "node:http"; + +export async function createTempDir(prefix = "multivibe-test-") { + return mkdtemp(path.join(os.tmpdir(), prefix)); +} + +export async function writeJson(filePath, value) { + await mkdir(path.dirname(filePath), { recursive: true }); + await writeFile(filePath, JSON.stringify(value, null, 2)); +} + +export async function startHttpServer(handler) { + const server = http.createServer(handler); + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const address = server.address(); + const port = typeof address === "object" && address ? address.port : 0; + return { + server, + url: `http://127.0.0.1:${port}`, + close: () => + new Promise((resolve, reject) => { + const timer = setTimeout(() => { + server.closeAllConnections(); + resolve(); + }, 250); + server.close((err) => { + clearTimeout(timer); + if (err) reject(err); + else resolve(); + }); + }), + }; +} + +export async function getAvailablePort() { + const lease = await startHttpServer((_req, res) => { + res.statusCode = 204; + res.end(); + }); + const { port } = new URL(lease.url); + await lease.close(); + return Number(port); +} + +export async function startRuntime(options = {}) { + const { createRuntime } = await import("../dist/runtime.js"); + const callbackPort = await getAvailablePort(); + const runtime = await createRuntime({ + host: "127.0.0.1", + port: 0, + adminToken: "test-admin", + installSignalHandlers: false, + oauthConfig: + options.oauthConfig ?? + { + authorizationUrl: "https://auth.openai.com/oauth/authorize", + tokenUrl: "https://auth.openai.com/oauth/token", + clientId: "test-client", + scope: "openid profile email offline_access", + redirectUri: `http://127.0.0.1:${callbackPort}/auth/callback`, + }, + ...options, + }); + await runtime.start(); + const address = runtime.server.address(); + const port = typeof address === "object" && address ? address.port : 0; + return { + runtime, + baseUrl: `http://127.0.0.1:${port}`, + close: async () => { + runtime.server.closeIdleConnections(); + runtime.server.closeAllConnections(); + await runtime.shutdown(); + }, + }; +} diff --git a/test/manual-stress.mjs b/test/manual-stress.mjs new file mode 100644 index 0000000..be663bf --- /dev/null +++ b/test/manual-stress.mjs @@ -0,0 +1,339 @@ +import assert from "node:assert/strict"; +import http from "node:http"; +import os from "node:os"; +import path from "node:path"; +import { mkdtemp, writeFile } from "node:fs/promises"; +import { createRuntime } from "../dist/runtime.js"; + +async function startHttpServer(handler) { + const server = http.createServer(handler); + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const address = server.address(); + const port = typeof address === "object" && address ? address.port : 0; + return { + server, + url: `http://127.0.0.1:${port}`, + close: () => new Promise((resolve) => server.close(() => resolve())), + }; +} + +async function writeJson(filePath, value) { + await writeFile(filePath, JSON.stringify(value, null, 2)); +} + +async function createBaseFiles() { + const tmp = await mkdtemp(path.join(os.tmpdir(), "multivibe-stress-")); + const storePath = path.join(tmp, "accounts.json"); + const oauthStatePath = path.join(tmp, "oauth-state.json"); + const traceFilePath = path.join(tmp, "traces.jsonl"); + const traceStatsHistoryPath = path.join(tmp, "traces-history.jsonl"); + await writeJson(storePath, { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(oauthStatePath, { states: [] }); + return { storePath, oauthStatePath, traceFilePath, traceStatsHistoryPath }; +} + +function oauthConfig(port) { + return { + authorizationUrl: "https://auth.openai.com/oauth/authorize", + tokenUrl: "https://auth.openai.com/oauth/token", + clientId: "test-client", + scope: "openid profile email offline_access", + redirectUri: `http://127.0.0.1:${port}/auth/callback`, + }; +} + +async function startRuntimeFor(upstreamUrl, files, upstreamRequestTimeoutMs, redirectPort) { + const runtime = await createRuntime({ + host: "127.0.0.1", + port: 0, + adminToken: "test-admin", + installSignalHandlers: false, + storePath: files.storePath, + oauthStatePath: files.oauthStatePath, + traceFilePath: files.traceFilePath, + traceStatsHistoryPath: files.traceStatsHistoryPath, + openaiBaseUrl: upstreamUrl, + upstreamRequestTimeoutMs, + oauthConfig: oauthConfig(redirectPort), + }); + await runtime.start(); + const address = runtime.server.address(); + return { runtime, baseUrl: `http://127.0.0.1:${address.port}` }; +} + +async function runPool(items, limit, worker) { + let index = 0; + const runners = Array.from({ length: limit }, async () => { + while (true) { + const current = index < items.length ? items[index++] : undefined; + if (typeof current === "undefined") return; + await worker(current); + } + }); + await Promise.all(runners); +} + +function responseCompletedFrame(text) { + return ( + "event: response.completed\n" + + "data: " + + JSON.stringify({ + type: "response.completed", + response: { + object: "response", + status: "completed", + output: [ + { + type: "message", + role: "assistant", + content: [{ type: "output_text", text }], + }, + ], + usage: { + input_tokens: 10, + output_tokens: text.length, + total_tokens: 10 + text.length, + }, + }, + }) + + "\n\n" + ); +} + +const files = await createBaseFiles(); + +let requestCounter = 0; +const successUpstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url && + req.url.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + requestCounter += 1; + const mode = requestCounter % 4; + + if (mode === 0) { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":" world"}\n\n', + ); + res.write(responseCompletedFrame("hello world")); + setTimeout(() => { + if (!res.writableEnded) res.end(": linger\n\n"); + }, 120); + return; + } + + if (mode === 1) { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":" world"}\n\n', + ); + res.write( + 'event: response.output_text.done\ndata: {"type":"response.output_text.done","text":"hello world"}\n\n', + ); + setTimeout(() => { + if (!res.writableEnded) res.end(": linger\n\n"); + }, 120); + return; + } + + if (mode === 2) { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + object: "response", + status: "completed", + output: [ + { + type: "message", + role: "assistant", + content: [{ type: "output_text", text: "json path" }], + }, + ], + usage: { input_tokens: 10, output_tokens: 8, total_tokens: 18 }, + }), + ); + return; + } + + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + let i = 0; + const timer = setInterval(() => { + i += 1; + if (i <= 4) { + res.write( + "event: response.output_text.delta\ndata: " + + JSON.stringify({ + type: "response.output_text.delta", + delta: String(i), + }) + + "\n\n", + ); + return; + } + clearInterval(timer); + res.write(responseCompletedFrame("1234")); + setTimeout(() => { + if (!res.writableEnded) res.end(": linger\n\n"); + }, 120); + }, 4); + return; + } + res.writeHead(404).end(); +}); + +const successRuntime = await startRuntimeFor(successUpstream.url, files, 70, 20001); +const successStats = { total: 0, stream: 0, buffered: 0 }; + +await runPool(Array.from({ length: 120 }, (_, i) => i), 12, async (i) => { + const wantStream = i % 2 === 0; + const startedAt = Date.now(); + const res = await fetch(`${successRuntime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: wantStream, + input: `hello-${i}`, + }), + }); + assert.equal(res.status, 200); + const elapsedMs = Date.now() - startedAt; + assert.ok(elapsedMs < 220, `request ${i} took too long: ${elapsedMs}ms`); + if (wantStream) { + const body = await res.text(); + assert.ok( + body.includes("response.completed") || + body.includes("response.output_text.done"), + ); + successStats.stream += 1; + } else { + const body = await res.json(); + const text = body?.output?.[0]?.content?.[0]?.text; + assert.ok( + text === "hello world" || text === "1234" || text === "json path", + `unexpected buffered text: ${text}`, + ); + successStats.buffered += 1; + } + successStats.total += 1; +}); + +await successRuntime.runtime.shutdown(); +await successUpstream.close(); + +const stallUpstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url && + req.url.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + return; + } + res.writeHead(404).end(); +}); + +const stallRuntime = await startRuntimeFor(stallUpstream.url, files, 60, 20002); +const timeoutStats = { buffered504: 0, streamingClosed: 0 }; + +for (let i = 0; i < 10; i++) { + const res = await fetch(`${stallRuntime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: `stall-buffered-${i}`, + }), + }); + assert.equal(res.status, 504); + timeoutStats.buffered504 += 1; +} + +for (let i = 0; i < 10; i++) { + const startedAt = Date.now(); + const res = await fetch(`${stallRuntime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: true, + input: `stall-stream-${i}`, + }), + }); + assert.equal(res.status, 200); + const body = await res.text(); + const elapsedMs = Date.now() - startedAt; + assert.ok( + elapsedMs < 180, + `streaming stall ${i} took too long: ${elapsedMs}ms`, + ); + assert.ok(body.includes("response.output_text.delta")); + timeoutStats.streamingClosed += 1; +} + +await stallRuntime.runtime.shutdown(); +await stallUpstream.close(); + +console.log(JSON.stringify({ successStats, timeoutStats }, null, 2)); diff --git a/test/proxy-behavior.test.js b/test/proxy-behavior.test.js new file mode 100644 index 0000000..4f2dad0 --- /dev/null +++ b/test/proxy-behavior.test.js @@ -0,0 +1,1633 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import path from "node:path"; +import http from "node:http"; +import { readFile } from "node:fs/promises"; +import { createTempDir, startHttpServer, startRuntime, writeJson } from "./helpers.js"; +import { resetDiscoveredModelsCacheForTest } from "../dist/routes/proxy/index.js"; + +function responseObject(text = "OK") { + return { + object: "response", + status: "completed", + output: [ + { + type: "message", + role: "assistant", + content: [{ type: "output_text", text }], + }, + ], + usage: { + input_tokens: 10, + output_tokens: 5, + total_tokens: 15, + }, + }; +} + +test("proxy fails over on model incompatibility and records capability state", async () => { + const seenAccounts = []; + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + const auth = req.headers.authorization ?? ""; + seenAccounts.push(auth); + if (auth === "Bearer acct-1-token") { + res.writeHead(400, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + detail: + "The 'gpt-5.4' model is not supported when using Codex with a ChatGPT account.", + }), + ); + return; + } + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify(responseObject("OK"))); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + const storePath = path.join(tmp, "accounts.json"); + const oauthStatePath = path.join(tmp, "oauth-state.json"); + const traceFilePath = path.join(tmp, "traces.jsonl"); + const traceStatsHistoryPath = path.join(tmp, "traces-history.jsonl"); + await writeJson(storePath, { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + priority: 0, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + { + id: "acct-2", + provider: "openai", + accessToken: "acct-2-token", + enabled: true, + priority: 0, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(oauthStatePath, { states: [] }); + + const runtime = await startRuntime({ + storePath, + oauthStatePath, + traceFilePath, + traceStatsHistoryPath, + openaiBaseUrl: upstream.url, + }); + + try { + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + assert.equal(res.status, 200); + const body = await res.json(); + assert.equal(body.object, "response"); + assert.equal(seenAccounts.length, 2); + assert.deepEqual(seenAccounts, [ + "Bearer acct-1-token", + "Bearer acct-2-token", + ]); + + await runtime.runtime.store.flushIfDirty(); + const store = JSON.parse(await readFile(storePath, "utf8")); + const account1 = store.accounts.find((account) => account.id === "acct-1"); + assert.equal(account1.state.blockedUntil, undefined); + assert.equal(account1.state.blockedReason, undefined); + assert.match(account1.state.lastError, /model unsupported/i); + assert.equal( + account1.state.modelAvailability["gpt-5.4"].supported, + false, + ); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("unsupported model responses do not globally block accounts and return upstream 400", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(400, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + detail: + "The 'None' model is not supported when using Codex with a ChatGPT account.", + }), + ); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + const storePath = path.join(tmp, "accounts.json"); + const oauthStatePath = path.join(tmp, "oauth-state.json"); + await writeJson(storePath, { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + { + id: "acct-2", + provider: "openai", + accessToken: "acct-2-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(oauthStatePath, { states: [] }); + + const runtime = await startRuntime({ + storePath, + oauthStatePath, + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + }); + + try { + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({}), + }); + assert.equal(res.status, 400); + const body = await res.json(); + assert.match(body.detail, /None/); + + await runtime.runtime.store.flushIfDirty(); + const store = JSON.parse(await readFile(storePath, "utf8")); + for (const account of store.accounts) { + assert.equal(account.state.blockedUntil, undefined); + assert.equal(account.state.blockedReason, undefined); + assert.match(account.state.lastError, /model unsupported/i); + } + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy request routing does not block on cold model discovery", async () => { + resetDiscoveredModelsCacheForTest(); + let modelCalls = 0; + let responseCalls = 0; + + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + modelCalls += 1; + setTimeout(() => { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + }, 150); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + responseCalls += 1; + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify(responseObject("OK"))); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + }); + + try { + const startedAt = Date.now(); + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + assert.equal(res.status, 200); + assert.equal(responseCalls, 1); + assert.equal(modelCalls, 0); + assert.ok(Date.now() - startedAt < 150); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("stale usage refresh does not block proxy responses", async () => { + let usageCalls = 0; + let usageCompleted = false; + let responseCalls = 0; + + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + usageCalls += 1; + setTimeout(() => { + usageCompleted = true; + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + }, 150); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + responseCalls += 1; + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify(responseObject("OK"))); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: 0, primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + }); + + try { + const startedAt = Date.now(); + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + assert.equal(res.status, 200); + assert.equal(responseCalls, 1); + assert.equal(usageCalls, 1); + assert.equal(usageCompleted, false); + assert.ok(Date.now() - startedAt < 150); + await new Promise((resolve) => setTimeout(resolve, 200)); + assert.equal(usageCompleted, true); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy does not blindly retry generic upstream 500s for POST responses", async () => { + let responseCalls = 0; + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + responseCalls += 1; + res.writeHead(500, { "content-type": "application/json" }); + res.end(JSON.stringify({ error: "boom" })); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + }); + + try { + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + assert.equal(res.status, 500); + assert.equal(responseCalls, 1); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("successful proxy responses clear stale auth failure state", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify(responseObject("OK"))); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + const storePath = path.join(tmp, "accounts.json"); + await writeJson(storePath, { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: { + blockedUntil: Date.now() + 60_000, + blockedReason: "auth failure: 401", + needsTokenRefresh: true, + refreshFailureCount: 3, + refreshBlockedUntil: Date.now() + 60_000, + lastError: "refresh token failed: token endpoint failed 401", + recentErrors: [ + { at: Date.now(), message: "usage probe failed 401" }, + { at: Date.now() - 1_000, message: "auth failure: 401" }, + { at: Date.now() - 2_000, message: "quota/rate-limit: 429" }, + ], + }, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath, + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + }); + + try { + await runtime.runtime.store.upsertAccount({ + ...(await runtime.runtime.store.listAccounts())[0], + state: { + blockedUntil: undefined, + blockedReason: undefined, + needsTokenRefresh: true, + refreshFailureCount: 3, + refreshBlockedUntil: Date.now() + 60_000, + lastError: "refresh token failed: token endpoint failed 401", + recentErrors: [ + { at: Date.now(), message: "usage probe failed 401" }, + { at: Date.now() - 1_000, message: "auth failure: 401" }, + { at: Date.now() - 2_000, message: "quota/rate-limit: 429" }, + ], + }, + }); + + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + + assert.equal(res.status, 200); + + await runtime.runtime.store.flushIfDirty(); + const store = JSON.parse(await readFile(storePath, "utf8")); + const account = store.accounts.find((entry) => entry.id === "acct-1"); + assert.equal(account.state.needsTokenRefresh, false); + assert.equal(account.state.refreshFailureCount, 0); + assert.equal(account.state.refreshBlockedUntil, undefined); + assert.equal(account.state.lastError, undefined); + assert.equal(account.state.blockedUntil, undefined); + assert.equal(account.state.blockedReason, undefined); + assert.deepEqual(account.state.recentErrors, [ + { + at: account.state.recentErrors[0].at, + message: "quota/rate-limit: 429", + }, + ]); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy keeps a response alive while upstream chunks continue arriving", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + let sent = 0; + const timer = setInterval(() => { + sent += 1; + if (sent <= 3) { + res.write( + `event: response.output_text.delta\ndata: ${JSON.stringify({ + type: "response.output_text.delta", + delta: `part-${sent}`, + })}\n\n`, + ); + return; + } + clearInterval(timer); + res.end( + `event: response.completed\ndata: ${JSON.stringify({ + type: "response.completed", + response: responseObject("slow but valid"), + })}\n\n`, + ); + }, 10); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 80, + }); + + try { + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + assert.equal(res.status, 200); + const body = await res.json(); + assert.equal(body.output[0].content[0].text, "slow but valid"); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy returns 504 immediately on upstream timeout instead of retrying another account", async () => { + const seenAccounts = []; + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + seenAccounts.push(req.headers.authorization ?? ""); + setTimeout(() => { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify(responseObject("too late"))); + }, 80); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + priority: 0, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + { + id: "acct-2", + provider: "openai", + accessToken: "acct-2-token", + enabled: true, + priority: 1, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 25, + }); + + try { + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + assert.equal(res.status, 504); + assert.equal(seenAccounts.length, 1); + assert.equal(seenAccounts[0], "Bearer acct-1-token"); + assert.deepEqual(await res.json(), { error: "upstream request timed out" }); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy returns 504 when an upstream response stalls after headers", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + setTimeout(() => { + if (!res.writableEnded) res.end(); + }, 200); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 25, + }); + + try { + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + + assert.equal(res.status, 504); + assert.deepEqual(await res.json(), { error: "upstream request timed out" }); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("downstream client disconnects stay in traces without poisoning account errors", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + setTimeout(() => { + if (res.writableEnded) return; + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify(responseObject("too late"))); + }, 80); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + const storePath = path.join(tmp, "accounts.json"); + const oauthStatePath = path.join(tmp, "oauth-state.json"); + const traceFilePath = path.join(tmp, "traces.jsonl"); + await writeJson(storePath, { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(oauthStatePath, { states: [] }); + + const runtime = await startRuntime({ + storePath, + oauthStatePath, + traceFilePath, + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + }); + + try { + await new Promise((resolve, reject) => { + const req = http.request( + `${runtime.baseUrl}/v1/responses`, + { + method: "POST", + headers: { "content-type": "application/json" }, + }, + (res) => { + res.resume(); + }, + ); + req.on("error", (err) => { + if (err.code === "ECONNRESET" || err.message === "socket hang up") { + resolve(); + return; + } + reject(err); + }); + req.write( + JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + ); + req.end(); + setTimeout(() => req.destroy(), 10); + }); + + await new Promise((resolve) => setTimeout(resolve, 120)); + await runtime.runtime.store.flushIfDirty(); + + const store = JSON.parse(await readFile(storePath, "utf8")); + const account = store.accounts.find((entry) => entry.id === "acct-1"); + assert.equal(account.state?.lastError, undefined); + assert.equal(account.state?.recentErrors, undefined); + + const traces = (await readFile(traceFilePath, "utf8")) + .trim() + .split("\n") + .filter(Boolean) + .map((line) => JSON.parse(line)); + const trace = traces.at(-1); + assert.equal(trace.status, 499); + assert.equal(trace.isError, false); + assert.equal(trace.error, "downstream client disconnected"); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy closes a stalled streamed response without crashing after headers are sent", async () => { + let calls = 0; + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + calls += 1; + if (calls === 1) { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + return; + } + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify(responseObject("recovered"))); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 25, + }); + + try { + const firstStartedAt = Date.now(); + const first = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: true, + input: "reply with ok", + }), + }); + assert.equal(first.status, 200); + const firstBody = await first.text(); + const firstElapsed = Date.now() - firstStartedAt; + assert.ok(firstElapsed < 180, `expected stall close promptly, got ${firstElapsed}ms`); + assert.match(firstBody, /response\.output_text\.delta/); + + const second = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + assert.equal(second.status, 200); + const secondBody = await second.json(); + assert.equal(secondBody.output[0].content[0].text, "recovered"); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy closes streamed responses once response.completed arrives", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + res.write( + `event: response.completed\ndata: ${JSON.stringify({ + type: "response.completed", + response: responseObject("done"), + })}\n\n`, + ); + setTimeout(() => { + if (!res.writableEnded) res.end(": upstream lingered\n\n"); + }, 200); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 25, + }); + + try { + const startedAt = Date.now(); + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: true, + input: "reply with ok", + }), + }); + + assert.equal(res.status, 200); + const body = await res.text(); + const elapsedMs = Date.now() - startedAt; + assert.match(body, /response\.completed/); + assert.ok(elapsedMs < 180, `expected proxy to close promptly, got ${elapsedMs}ms`); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy preserves control frames for native streamed responses", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + `event: response.created\ndata: ${JSON.stringify({ + type: "response.created", + response: { id: "resp_123", object: "response", status: "in_progress" }, + })}\n\n`, + ); + res.write( + `event: response.in_progress\ndata: ${JSON.stringify({ + type: "response.in_progress", + response: { id: "resp_123", object: "response", status: "in_progress" }, + })}\n\n`, + ); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + res.end( + `event: response.completed\ndata: ${JSON.stringify({ + type: "response.completed", + response: responseObject("done"), + })}\n\n`, + ); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 25, + }); + + try { + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: true, + input: "reply with ok", + }), + }); + + assert.equal(res.status, 200); + const body = await res.text(); + assert.match(body, /response\.created/); + assert.match(body, /response\.in_progress/); + assert.match(body, /response\.output_text\.delta/); + assert.match(body, /response\.completed/); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy detects native streamed responses even when upstream omits content-type", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200); + res.flushHeaders(); + res.write( + 'event: response.created\ndata: {"type":"response.created","response":{"id":"resp_123","object":"response","status":"in_progress"}}\n\n', + ); + setTimeout(() => { + res.end( + `event: response.completed\ndata: ${JSON.stringify({ + type: "response.completed", + response: responseObject("done"), + })}\n\n`, + ); + }, 400); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 500, + }); + + try { + const startedAt = Date.now(); + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: true, + input: "reply with ok", + }), + }); + assert.equal(res.status, 200); + const reader = res.body.getReader(); + const first = await reader.read(); + const firstChunk = new TextDecoder().decode(first.value, { stream: true }); + assert.match(firstChunk, /response\.created/); + assert.ok(Date.now() - startedAt < 250); + await reader.cancel(); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy forwards partial native response chunks before a full SSE frame is complete", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + const createdFrame = + `event: response.created\ndata: ${JSON.stringify({ + type: "response.created", + response: { + id: "resp_123", + object: "response", + status: "in_progress", + metadata: { pad: "x".repeat(4096) }, + }, + })}\n\n`; + const splitAt = Math.floor(createdFrame.length / 2); + res.write(createdFrame.slice(0, splitAt)); + setTimeout(() => { + res.write(createdFrame.slice(splitAt)); + res.end( + 'event: response.output_text.done\ndata: {"type":"response.output_text.done","text":"ok"}\n\n', + ); + }, 150); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 500, + }); + + try { + const startedAt = Date.now(); + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: true, + input: "reply with ok", + }), + }); + + assert.equal(res.status, 200); + const reader = res.body.getReader(); + const first = await reader.read(); + const firstChunkMs = Date.now() - startedAt; + const decoder = new TextDecoder(); + let body = first.done ? "" : decoder.decode(first.value, { stream: true }); + assert.ok(firstChunkMs < 120, `expected first chunk promptly, got ${firstChunkMs}ms`); + assert.match(body, /response\.created/); + + while (true) { + const next = await reader.read(); + if (next.done) break; + body += decoder.decode(next.value, { stream: true }); + } + body += decoder.decode(); + assert.match(body, /response\.output_text\.done/); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy preserves native streamed responses that end after response.output_text.done", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + res.write( + 'event: response.output_text.done\ndata: {"type":"response.output_text.done","text":"hello"}\n\n', + ); + setTimeout(() => { + if (!res.writableEnded) res.end(": upstream lingered\n\n"); + }, 200); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 25, + }); + + try { + const startedAt = Date.now(); + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: true, + input: "reply with ok", + }), + }); + + assert.equal(res.status, 200); + const body = await res.text(); + const elapsedMs = Date.now() - startedAt; + assert.match(body, /response\.output_text\.done/); + assert.doesNotMatch(body, /response\.completed/); + assert.ok(elapsedMs < 180, `expected proxy to close promptly, got ${elapsedMs}ms`); + } finally { + await runtime.close(); + await upstream.close(); + } +}); + +test("proxy returns a buffered response once response.output_text.done arrives", async () => { + const upstream = await startHttpServer(async (req, res) => { + if (req.method === "GET" && req.url === "/backend-api/wham/usage") { + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + rate_limit: { + primary_window: { used_percent: 0 }, + secondary_window: { used_percent: 0 }, + }, + }), + ); + return; + } + if ( + req.method === "GET" && + req.url?.startsWith("/backend-api/codex/models") + ) { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] })); + return; + } + if (req.method === "POST" && req.url === "/backend-api/codex/responses") { + res.writeHead(200, { "content-type": "text/event-stream" }); + res.flushHeaders(); + res.write( + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n', + ); + res.write( + 'event: response.output_text.done\ndata: {"type":"response.output_text.done","text":"hello"}\n\n', + ); + setTimeout(() => { + if (!res.writableEnded) res.end(": upstream lingered\n\n"); + }, 200); + return; + } + res.writeHead(404).end(); + }); + + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { + accounts: [ + { + id: "acct-1", + provider: "openai", + accessToken: "acct-1-token", + enabled: true, + usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } }, + state: {}, + }, + ], + modelAliases: [], + }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + + const runtime = await startRuntime({ + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + openaiBaseUrl: upstream.url, + upstreamRequestTimeoutMs: 25, + }); + + try { + const startedAt = Date.now(); + const res = await fetch(`${runtime.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.4", + stream: false, + input: "reply with ok", + }), + }); + + const elapsedMs = Date.now() - startedAt; + assert.equal(res.status, 200); + assert.ok(elapsedMs < 180, `expected proxy to return promptly, got ${elapsedMs}ms`); + const body = await res.json(); + assert.equal(body.output[0].content[0].text, "hello"); + } finally { + await runtime.close(); + await upstream.close(); + } +}); diff --git a/test/refresh-singleflight.test.js b/test/refresh-singleflight.test.js new file mode 100644 index 0000000..2560887 --- /dev/null +++ b/test/refresh-singleflight.test.js @@ -0,0 +1,55 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { startHttpServer } from "./helpers.js"; + +test("token refresh is single-flight per account", async () => { + let refreshCalls = 0; + const tokenServer = await startHttpServer(async (req, res) => { + if (req.method === "POST" && req.url === "/oauth/token") { + refreshCalls += 1; + await new Promise((resolve) => setTimeout(resolve, 50)); + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + access_token: "fresh-token", + refresh_token: "fresh-refresh", + expires_in: 3600, + }), + ); + return; + } + res.writeHead(404).end(); + }); + + try { + const { ensureValidToken } = await import("../dist/account-utils.js"); + const account = { + id: "acct-1", + provider: "openai", + accessToken: "expired-token", + refreshToken: "refresh-1", + expiresAt: Date.now() - 1_000, + enabled: true, + state: {}, + }; + const oauthConfig = { + authorizationUrl: `${tokenServer.url}/oauth/authorize`, + tokenUrl: `${tokenServer.url}/oauth/token`, + clientId: "client", + scope: "openid", + redirectUri: "http://localhost/callback", + }; + + const results = await Promise.all( + Array.from({ length: 5 }, () => ensureValidToken(account, oauthConfig)), + ); + + assert.equal(refreshCalls, 1); + for (const result of results) { + assert.equal(result.accessToken, "fresh-token"); + assert.equal(result.refreshToken, "fresh-refresh"); + } + } finally { + await tokenServer.close(); + } +}); diff --git a/test/runtime.test.js b/test/runtime.test.js new file mode 100644 index 0000000..f0a3771 --- /dev/null +++ b/test/runtime.test.js @@ -0,0 +1,90 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import path from "node:path"; +import { createTempDir, getAvailablePort, startRuntime, writeJson } from "./helpers.js"; + +test("runtime refuses non-loopback binding without admin auth", async () => { + const { createRuntime } = await import("../dist/runtime.js"); + const tmp = await createTempDir(); + const storePath = path.join(tmp, "accounts.json"); + const oauthStatePath = path.join(tmp, "oauth-state.json"); + await writeJson(storePath, { accounts: [], modelAliases: [] }); + await writeJson(oauthStatePath, { states: [] }); + + await assert.rejects( + () => + createRuntime({ + host: "0.0.0.0", + port: 0, + adminToken: "", + installSignalHandlers: false, + storePath, + oauthStatePath, + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + }), + /ADMIN_TOKEN is required/, + ); +}); + +test("runtime exposes readiness separately from health", async () => { + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { accounts: [], modelAliases: [] }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + const runtime = await startRuntime({ + adminToken: "test-admin", + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + }); + + try { + const health = await fetch(`${runtime.baseUrl}/health`).then((r) => r.json()); + const ready = await fetch(`${runtime.baseUrl}/ready`).then((r) => ({ + status: r.status, + body: r.status === 200 ? r.json() : r.text(), + })); + + assert.equal(health.ok, true); + assert.equal(health.ready, true); + assert.equal(ready.status, 200); + } finally { + await runtime.close(); + } +}); + +test("runtime serves the loopback OAuth callback helper page", async () => { + const tmp = await createTempDir(); + await writeJson(path.join(tmp, "accounts.json"), { accounts: [], modelAliases: [] }); + await writeJson(path.join(tmp, "oauth-state.json"), { states: [] }); + const callbackPort = await getAvailablePort(); + const runtime = await startRuntime({ + adminToken: "test-admin", + storePath: path.join(tmp, "accounts.json"), + oauthStatePath: path.join(tmp, "oauth-state.json"), + traceFilePath: path.join(tmp, "traces.jsonl"), + traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"), + oauthConfig: { + authorizationUrl: "https://auth.openai.com/oauth/authorize", + tokenUrl: "https://auth.openai.com/oauth/token", + clientId: "test-client", + scope: "openid profile email offline_access", + redirectUri: `http://127.0.0.1:${callbackPort}/auth/callback`, + }, + }); + + try { + const res = await fetch( + `http://127.0.0.1:${callbackPort}/auth/callback?code=test-code&state=test-state`, + ); + const body = await res.text(); + + assert.equal(res.status, 200); + assert.match(body, /OAuth callback received/); + assert.match(body, /multivibe-oauth-callback/); + assert.match(body, /Copy callback URL/); + } finally { + await runtime.close(); + } +}); diff --git a/test/store-encryption.test.js b/test/store-encryption.test.js new file mode 100644 index 0000000..e899c1c --- /dev/null +++ b/test/store-encryption.test.js @@ -0,0 +1,34 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import path from "node:path"; +import { readFile } from "node:fs/promises"; +import { createTempDir } from "./helpers.js"; + +test("account store encrypts persisted state when a key is configured", async () => { + const tmp = await createTempDir(); + const filePath = path.join(tmp, "accounts.enc.json"); + const { AccountStore } = await import("../dist/store.js"); + + const store = new AccountStore(filePath, "super-secret-key"); + await store.init(); + await store.upsertAccount({ + id: "acct-1", + provider: "openai", + accessToken: "access-secret", + refreshToken: "refresh-secret", + enabled: true, + state: {}, + }); + await store.flushIfDirty(); + + const raw = await readFile(filePath, "utf8"); + assert.doesNotMatch(raw, /access-secret|refresh-secret/); + assert.match(raw, /"alg"\s*:\s*"aes-256-gcm"/); + + const reloaded = new AccountStore(filePath, "super-secret-key"); + await reloaded.init(); + const accounts = await reloaded.listAccounts(); + assert.equal(accounts.length, 1); + assert.equal(accounts[0].accessToken, "access-secret"); + assert.equal(accounts[0].refreshToken, "refresh-secret"); +}); diff --git a/test/traces.test.js b/test/traces.test.js new file mode 100644 index 0000000..048a89e --- /dev/null +++ b/test/traces.test.js @@ -0,0 +1,70 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import path from "node:path"; +import { readFile } from "node:fs/promises"; +import { createTempDir } from "./helpers.js"; + +test("trace manager keeps a bounded in-memory window and compacts persisted traces", async () => { + const tmp = await createTempDir(); + const { createTraceManager } = await import("../dist/traces.js"); + const manager = createTraceManager({ + filePath: path.join(tmp, "traces.jsonl"), + historyFilePath: path.join(tmp, "traces-history.jsonl"), + retentionMax: 3, + }); + + for (let i = 0; i < 5; i += 1) { + await manager.appendTrace({ + at: Date.now() + i, + route: "/responses", + status: 200, + stream: false, + latencyMs: 10 + i, + model: `gpt-${i}`, + }); + } + + const window = await manager.readTraceWindow(); + assert.equal(window.length, 3); + assert.deepEqual( + window.map((entry) => entry.model), + ["gpt-2", "gpt-3", "gpt-4"], + ); + + await manager.compactTraceStorageIfNeeded(); + const persisted = (await readFile(path.join(tmp, "traces.jsonl"), "utf8")) + .trim() + .split("\n") + .filter(Boolean) + .map((line) => JSON.parse(line)); + assert.equal(persisted.length, 3); + assert.deepEqual( + persisted.map((entry) => entry.model), + ["gpt-2", "gpt-3", "gpt-4"], + ); +}); + +test("trace manager preserves session ids for trace list entries", async () => { + const tmp = await createTempDir(); + const { createTraceManager } = await import("../dist/traces.js"); + const manager = createTraceManager({ + filePath: path.join(tmp, "traces.jsonl"), + historyFilePath: path.join(tmp, "traces-history.jsonl"), + }); + + await manager.appendTrace({ + at: Date.now(), + route: "/responses", + sessionId: "sess_test_123", + status: 200, + stream: true, + latencyMs: 42, + model: "gpt-5.4", + }); + + const [trace] = await manager.readTraceWindow(); + assert.equal(trace?.sessionId, "sess_test_123"); + + const list = await manager.readTraceListWindow(); + assert.equal(list[0]?.sessionId, "sess_test_123"); +}); diff --git a/web/src/App.tsx b/web/src/App.tsx index ed9f11b..8f8e712 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -3,6 +3,7 @@ import "./styles.css"; import { estimateCostUsd } from "./model-pricing"; import { api, tokenDefault } from "./lib/api"; import { + EMPTY_TRACE_USAGE_STATS, EMPTY_TRACE_PAGINATION, EMPTY_TRACE_STATS, TRACE_PAGE_SIZE, @@ -16,6 +17,7 @@ import type { TracePagination, TraceRangePreset, TraceStats, + TraceUsageStats, } from "./types"; import { AccountsTab } from "./components/tabs/AccountsTab"; import { DocsTab } from "./components/tabs/DocsTab"; @@ -33,6 +35,7 @@ export default function App() { const [accounts, setAccounts] = useState([]); const [traces, setTraces] = useState([]); const [traceStats, setTraceStats] = useState(EMPTY_TRACE_STATS); + const [traceUsageStats, setTraceUsageStats] = useState(EMPTY_TRACE_USAGE_STATS); const [tracePagination, setTracePagination] = useState(EMPTY_TRACE_PAGINATION); const [models, setModels] = useState([]); const [aliases, setAliases] = useState([]); @@ -161,12 +164,14 @@ export default function App() { if (typeof sinceMs === "number") params.set("sinceMs", String(sinceMs)); if (typeof untilMs === "number") params.set("untilMs", String(untilMs)); - const [tr, statsRes] = await Promise.all([ + const [tr, statsRes, usageRes] = await Promise.all([ api(`/admin/traces?${params.toString()}`), api(`/admin/stats/traces?${params.toString()}`), + api(`/admin/stats/usage?${params.toString()}`), ]); setTraces((tr.traces ?? []) as Trace[]); setTraceStats((statsRes.stats ?? tr.stats ?? EMPTY_TRACE_STATS) as TraceStats); + setTraceUsageStats((usageRes ?? EMPTY_TRACE_USAGE_STATS) as TraceUsageStats); setTracePagination((tr.pagination ?? { ...EMPTY_TRACE_PAGINATION, page: safePage }) as TracePagination); setExpandedTraceId(null); setExpandedTrace(null); @@ -445,6 +450,7 @@ export default function App() { (null); const [oauthDialog, setOauthDialog] = useState(null); + useEffect(() => { + if (!oauthDialog) return; + + const onMessage = (event: MessageEvent) => { + const data = event.data; + if (!data || typeof data !== "object") return; + if ((data as { type?: string }).type !== "multivibe-oauth-callback") return; + const callbackUrl = (data as { callbackUrl?: string }).callbackUrl; + if (typeof callbackUrl !== "string" || !callbackUrl.trim()) return; + + try { + const received = new URL(callbackUrl); + const expected = new URL(oauthDialog.expectedRedirectUri); + if (received.origin !== expected.origin || received.pathname !== expected.pathname) { + return; + } + } catch { + return; + } + + setOauthDialog((current) => + current ? { ...current, callbackInput: callbackUrl.trim() } : current, + ); + }; + + window.addEventListener("message", onMessage); + return () => window.removeEventListener("message", onMessage); + }, [oauthDialog]); + const closeModal = () => { setShowAddAccount(false); setProvider("openai"); @@ -583,8 +612,8 @@ export function AccountsTab(props: Props) {
Complete the OpenAI login in the opened browser tab. When the browser reaches - the callback page, copy the full URL and paste it here. Do not paste access or - refresh tokens. + the callback page, the full URL should autofill here. If it does not, copy the + full URL and paste it here. Do not paste access or refresh tokens.
+ + + )} +
+ ); -
-

Latency p50/p95 (hourly)

+ const cards: Record React.ReactNode; toolbar?: React.ReactNode }> = { + tokensOverTime: { + title: "Tokens over time (hourly)", + render: () => ( +
+ + + + + + + + + + + + +
+ ), + }, + modelUsage: { + title: "Model usage", + render: () => ( +
+ + + + + + + + + + +
+ ), + }, + modelCost: { + title: "Model cost (USD)", + render: () => ( +
+ + + + + + usd(Number(v) || 0)} /> + + + + +
+ ), + }, + errorTrend: { + title: "Error trend (hourly)", + render: () => ( +
+ + + + + + + + + + + +
+ ), + }, + costOverTime: { + title: "Cost over time (hourly)", + render: () => ( +
+ + + + + + usd(Number(v) || 0)} /> + + + + +
+ ), + }, + latency: { + title: "Latency p50/p95 (hourly)", + fullSpan: true, + render: () => (
@@ -187,13 +367,14 @@ export function TracingTab(props: Props) {
-
- -
-

Model split by token volume

+ ), + }, + tokenSplit: { + title: "Model split by token volume", + render: () => (
- +
+ ), + }, + usageByAccount: { + title: "Usage by account", + render: () => ( +
+ + + + + + + + + + + + + {topAccounts.map((entry) => { + const accountLabel = sanitized + ? maskEmail(entry.account.email) || maskId(entry.accountId) + : entry.account.email ?? entry.accountId; + return ( + + + + + + + + + ); + })} + {!topAccounts.length && ( + + + + )} + +
AccountReqSuccessTokensCostAvg latency
{accountLabel}{entry.requests}{entry.successRate.toFixed(1)}%{formatTokenCount(entry.tokens.total)}{usd(entry.costUsd)}{Math.round(entry.avgLatencyMs)}ms
No account usage in this range.
+
+ ), + }, + usageByRoute: { + title: "Usage by route", + render: () => ( +
+ + + + + + + + + + + + + {topRoutes.map((entry) => ( + + + + + + + + + ))} + {!topRoutes.length && ( + + + + )} + +
RouteReqErrorsStreamTokensAvg latency
{routeLabel(entry.route)}{entry.requests}{entry.errors}{entry.streamingRate.toFixed(1)}%{formatTokenCount(entry.tokens.total)}{Math.round(entry.avgLatencyMs)}ms
No route usage in this range.
+
+ ), + }, + topSessions: { + title: "Top sessions", + toolbar: ( + <> + + + + ), + render: () => ( + <> +

Session IDs are shown by tail only.

+
+ + + + + + + + + + + + + {topSessions.map((entry) => ( + + + + + + + + + ))} + {!topSessions.length && ( + + + + )} + +
SessionReqTokensCostAvg latencyLast seen
{formatSessionTail(entry.sessionId)}{entry.requests}{formatTokenCount(entry.tokens.total)}{usd(entry.costUsd)}{Math.round(entry.avgLatencyMs)}ms{fmt(entry.lastAt)}
No session-tagged traces in this range.
+
+ + ), + }, + }; + + return ( + <> +
+ + + + + +
+ +
+ + + + + +
+ +
+

Analytics card order is saved in this browser.

+
+ + +
+
+ +
+ {orderedCardIds.map((cardId, index) => { + const card = cards[cardId]; + return ( +
+
+

{card.title}

+ {renderCardControls(cardId, index, card.toolbar)} +
+ {card.render()} +
+ ); + })}
@@ -229,18 +603,40 @@ export function TracingTab(props: Props) { - Page {tracePagination.page} / {tracePagination.totalPages} ({tracePagination.total} traces) + + Page {tracePagination.page} / {tracePagination.totalPages} ({tracePagination.total} traces, {tracePagination.pageSize} per page) + +
+
+ {statusEntries.map(([status, count]) => { + const share = + traceUsageStats.totals.requests > 0 + ? (count / traceUsageStats.totals.requests) * 100 + : 0; + return ( + + {status}: {count} ({share.toFixed(1)}%) + + ); + })} + {!statusEntries.length && No traces} +
+

+ Matched {traceUsageStats.tracesMatched} of {traceUsageStats.tracesEvaluated} retained traces in the selected range. +

+
+ @@ -259,10 +655,12 @@ export function TracingTab(props: Props) { const accountLabel = sanitized ? maskEmail(t.accountEmail) || maskId(t.accountId) : t.accountEmail ?? t.accountId ?? "-"; + const sessionLabel = formatSessionTail(t.sessionId); return ( void toggleExpandedTrace(t.id)} className="trace-row"> + {isExpanded && ( -
TimeSession Route Model Account
{fmt(t.at)}{sessionLabel || "-"} {routeLabel(t.route)} {t.model ?? "-"} @@ -289,7 +687,7 @@ export function TracingTab(props: Props) {
+
{expandedTraceLoading &&
Loading trace details...
} {!expandedTraceLoading && expandedTrace && expandedTrace.id === t.id && ( diff --git a/web/src/lib/ui.ts b/web/src/lib/ui.ts index 8c156b0..844699f 100644 --- a/web/src/lib/ui.ts +++ b/web/src/lib/ui.ts @@ -1,6 +1,6 @@ -import type { TracePagination, TraceStats } from "../types"; +import type { TracePagination, TraceStats, TraceUsageStats, UsageSummary } from "../types"; -export const TRACE_PAGE_SIZE = 100; +export const TRACE_PAGE_SIZE = 50; export const CHART_COLORS = ["#1f7a8c", "#2da4b8", "#4c956c", "#f4a259", "#e76f51", "#8a5a44", "#355070", "#43aa8b"]; export const EMPTY_TRACE_STATS: TraceStats = { @@ -27,6 +27,35 @@ export const EMPTY_TRACE_PAGINATION: TracePagination = { hasNext: false, }; +const EMPTY_USAGE_SUMMARY: UsageSummary = { + requests: 0, + ok: 0, + errors: 0, + successRate: 0, + stream: 0, + streamingRate: 0, + latencyMsTotal: 0, + avgLatencyMs: 0, + requestsWithUsage: 0, + tokens: { + prompt: 0, + completion: 0, + total: 0, + }, + costUsd: 0, + statusCounts: {}, +}; + +export const EMPTY_TRACE_USAGE_STATS: TraceUsageStats = { + filters: {}, + totals: EMPTY_USAGE_SUMMARY, + byAccount: [], + byRoute: [], + bySession: [], + tracesEvaluated: 0, + tracesMatched: 0, +}; + export const fmt = (ts?: number) => (!ts ? "-" : new Date(ts).toLocaleString()); export const clampPct = (v: number) => Math.max(0, Math.min(100, v)); export const compactNumber = (v: number) => @@ -66,3 +95,9 @@ export function maskId(v?: string) { if (!v) return "acc-xxxx"; return "*"; } + +export function formatSessionTail(v?: string) { + const value = String(v ?? "").trim(); + if (!value) return "-"; + return value.length <= 8 ? value : `...${value.slice(-8)}`; +} diff --git a/web/src/styles.css b/web/src/styles.css index 426514e..a3991b0 100644 --- a/web/src/styles.css +++ b/web/src/styles.css @@ -199,6 +199,12 @@ button.danger { color: #fff; } +.btn.small, +button.small { + padding: 6px 9px; + font-size: 12px; +} + .tabs { display: flex; gap: 8px; @@ -391,6 +397,44 @@ small { display: block; color: var(--muted); } margin-bottom: 12px; } +.tracing-layout-actions { + display: flex; + justify-content: space-between; + align-items: center; + gap: 12px; +} + +.tracing-layout { + grid-template-columns: repeat(2, minmax(0, 1fr)); + align-items: start; +} + +.tracing-card.full-span { + grid-column: 1 / -1; +} + +.tracing-card-head { + display: flex; + justify-content: space-between; + align-items: flex-start; + gap: 12px; + margin-bottom: 12px; +} + +.tracing-card-head h2 { + margin-bottom: 0; +} + +.tracing-card-toolbar { + justify-content: flex-end; +} + +.trace-summary { + display: grid; + gap: 10px; + margin-bottom: 12px; +} + .chart-wrap { width: 100%; min-height: 260px; @@ -459,6 +503,14 @@ details summary { grid-template-columns: 1fr; } + .tracing-layout { + grid-template-columns: 1fr; + } + + .tracing-card.full-span { + grid-column: auto; + } + .topbar { align-items: flex-start; flex-direction: column; @@ -467,4 +519,10 @@ details summary { .modal-grid { grid-template-columns: 1fr; } + + .tracing-layout-actions, + .tracing-card-head { + align-items: flex-start; + flex-direction: column; + } } diff --git a/web/src/types.ts b/web/src/types.ts index fe9b810..2606ce3 100644 --- a/web/src/types.ts +++ b/web/src/types.ts @@ -15,6 +15,7 @@ export type Trace = { id: string; at: number; route: string; + sessionId?: string; accountId?: string; accountEmail?: string; model?: string; @@ -32,6 +33,52 @@ export type Trace = { hasRequestBody?: boolean; }; +export type UsageSummary = { + requests: number; + ok: number; + errors: number; + successRate: number; + stream: number; + streamingRate: number; + latencyMsTotal: number; + avgLatencyMs: number; + requestsWithUsage: number; + tokens: { + prompt: number; + completion: number; + total: number; + }; + costUsd: number; + statusCounts: Record; + firstAt?: number; + lastAt?: number; +}; + +export type TraceUsageStats = { + filters: { + accountId?: string; + route?: string; + sinceMs?: number; + untilMs?: number; + }; + totals: UsageSummary; + byAccount: Array< + UsageSummary & { + accountId: string; + account: { + id: string; + provider?: "openai" | "mistral"; + email?: string; + enabled?: boolean; + }; + } + >; + byRoute: Array; + bySession: Array; + tracesEvaluated: number; + tracesMatched: number; +}; + export type TraceStats = { totals: { requests: number;