diff --git a/.gitignore b/.gitignore
index 18f75a7..8be3fbd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,4 @@ dist
 web-dist
 .env
 web/*.tsbuildinfo
-data/accounts.json
-data/oauth-state.json
-data/requests-trace.jsonl
+data
diff --git a/README.md b/README.md
index 889332b..6abdba6 100644
--- a/README.md
+++ b/README.md
@@ -112,8 +112,9 @@ Because this is often deployed remotely (Unraid/VPS), onboarding uses a manual r
 2. For OpenAI accounts, enter the account email
 3. Click **Start OAuth**
 4. Complete login in browser
-5. Copy the full redirect URL shown after the callback completes
-6. Paste that URL in the dashboard and click **Complete OAuth**
+5. Wait for the local callback page to open on `localhost:1455`
+6. The dashboard should autofill the callback URL, or you can copy it from that page
+7. Click **Complete OAuth**
 
 Mistral accounts still use manual token entry in the dashboard.
 
@@ -281,6 +282,7 @@ Model alias admin endpoints:
 | `OAUTH_TOKEN_URL`               | `https://auth.openai.com/oauth/token`     | OAuth token endpoint                                                |
 | `OAUTH_SCOPE`                   | `openid profile email offline_access`     | OAuth scope                                                         |
 | `OAUTH_REDIRECT_URI`            | `http://localhost:1455/auth/callback`     | Redirect URI                                                        |
+| `OAUTH_CALLBACK_BIND_HOST`      | ``                                        | Override bind host for the local OAuth callback helper server (for example `0.0.0.0` in Docker) |
 | `MISTRAL_COMPACT_UPSTREAM_PATH` | `/v1/responses/compact`                   | Mistral upstream path for compact responses                         |
 
 ---
diff --git a/docker-compose.yml b/docker-compose.yml
index 07b9dd9..9c68467 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,6 +3,7 @@ services:
     build: .
     container_name: multivibe
     ports:
+      - "4010:4010"
       - "1455:1455"
     environment:
       - PORT=1455
@@ -22,6 +23,7 @@ services:
       - OAUTH_TOKEN_URL=https://auth.openai.com/oauth/token
       - OAUTH_SCOPE=openid profile email offline_access
       - OAUTH_REDIRECT_URI=http://localhost:1455/auth/callback
+      - OAUTH_CALLBACK_BIND_HOST=0.0.0.0
     volumes:
       - ./data:/data
     restart: unless-stopped
diff --git a/package.json b/package.json
index d2d56c0..835d2e5 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,8 @@
     "build:api": "tsc -p tsconfig.json",
     "build:web": "npm --prefix web run build",
     "build": "npm run build:web && npm run build:api",
-    "start": "node dist/server.js"
+    "start": "node dist/server.js",
+    "test": "node --test --test-force-exit test/*.test.js"
   },
   "dependencies": {
     "@foxglove/wasm-zstd": "^1.0.1",
diff --git a/src/account-utils.ts b/src/account-utils.ts
index ed513da..d35db51 100644
--- a/src/account-utils.ts
+++ b/src/account-utils.ts
@@ -1,37 +1,63 @@
 import { OAuthConfig } from "./oauth.js";
 import { mergeTokenIntoAccount, refreshAccessToken } from "./oauth.js";
-import { normalizeProvider, rememberError } from "./quota.js";
+import {
+  clearAuthFailureState,
+  normalizeProvider,
+  rememberError,
+} from "./quota.js";
 import type { Account } from "./types.js";
+import {
+  TOKEN_REFRESH_COOLDOWN_MS,
+  TOKEN_REFRESH_MARGIN_MS,
+} from "./config.js";
+
+const refreshInFlight = new Map<string, Promise<Account>>();
 
 export async function ensureValidToken(
   account: Account,
   oauthConfig: OAuthConfig,
 ): Promise<Account> {
   if (normalizeProvider(account) !== "openai") return account;
-  if (!account.expiresAt || Date.now() < account.expiresAt - 5 * 60_000)
+  if (!account.expiresAt || Date.now() < account.expiresAt - TOKEN_REFRESH_MARGIN_MS)
     return account;
   if (!account.refreshToken) return account;
-
-  try {
-    const refreshed = await refreshAccessToken(
-      oauthConfig,
-      account.refreshToken,
-    );
-    const merged = mergeTokenIntoAccount(account, refreshed);
-    merged.state = {
-      ...merged.state,
-      needsTokenRefresh: false,
-    };
-    return merged;
-  } catch (err: any) {
-    rememberError(
-      account,
-      `refresh token failed: ${err?.message ?? String(err)}`,
-    );
-    account.state = {
-      ...account.state,
-      needsTokenRefresh: true,
-    };
+  const refreshToken = account.refreshToken;
+  if (
+    typeof account.state?.refreshBlockedUntil === "number" &&
+    Date.now() < account.state.refreshBlockedUntil
+  ) {
     return account;
   }
+
+  const existing = refreshInFlight.get(account.id);
+  if (existing) return existing;
+
+  const run = (async () => {
+    try {
+      const refreshed = await refreshAccessToken(
+        oauthConfig,
+        refreshToken,
+      );
+      const merged = mergeTokenIntoAccount(account, refreshed);
+      clearAuthFailureState(merged);
+      return merged;
+    } catch (err: any) {
+      const message = err?.message ?? String(err);
+      rememberError(account, `refresh token failed: ${message}`);
+      const failureCount = (account.state?.refreshFailureCount ?? 0) + 1;
+      account.state = {
+        ...account.state,
+        needsTokenRefresh: true,
+        refreshFailureCount: failureCount,
+        refreshBlockedUntil:
+          Date.now() + TOKEN_REFRESH_COOLDOWN_MS * Math.min(failureCount, 6),
+      };
+      return account;
+    } finally {
+      refreshInFlight.delete(account.id);
+    }
+  })();
+
+  refreshInFlight.set(account.id, run);
+  return run;
 }
diff --git a/src/config.ts b/src/config.ts
index d24e2f5..988eeca 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -1,5 +1,6 @@
 import os from "node:os";
 
+export const HOST = process.env.HOST ?? "127.0.0.1";
 export const PORT = Number(process.env.PORT ?? 1455);
 export const STORE_PATH = process.env.STORE_PATH ?? "/data/accounts.json";
 export const OAUTH_STATE_PATH =
@@ -28,17 +29,19 @@ export const ZAI_UPSTREAM_PATH =
 export const ZAI_COMPACT_UPSTREAM_PATH =
   process.env.ZAI_COMPACT_UPSTREAM_PATH ?? "/v1/chat/completions";
 export const ADMIN_TOKEN = process.env.ADMIN_TOKEN ?? "";
+export const STORE_ENCRYPTION_KEY =
+  process.env.STORE_ENCRYPTION_KEY ?? "";
 export const MAX_ACCOUNT_RETRY_ATTEMPTS = Math.max(
   1,
   Number(process.env.MAX_ACCOUNT_RETRY_ATTEMPTS ?? 5),
 );
-export const MAX_UPSTREAM_RETRIES = Math.max(
+export const MAX_GET_RETRIES = Math.max(
   0,
-  Number(process.env.MAX_UPSTREAM_RETRIES ?? 3),
+  Number(process.env.MAX_GET_RETRIES ?? 2),
 );
-export const UPSTREAM_BASE_DELAY_MS = Math.max(
+export const RETRY_BASE_DELAY_MS = Math.max(
   100,
-  Number(process.env.UPSTREAM_BASE_DELAY_MS ?? 1000),
+  Number(process.env.RETRY_BASE_DELAY_MS ?? 250),
 );
 export const PI_USER_AGENT = `pi (${os.platform()} ${os.release()}; ${os.arch()})`;
 
@@ -57,6 +60,39 @@ export const MODELS_CACHE_MS = Number(
 export const TOKEN_REFRESH_MARGIN_MS = Number(
   process.env.TOKEN_REFRESH_MARGIN_MS ?? 60_000,
 );
+export const TOKEN_REFRESH_COOLDOWN_MS = Number(
+  process.env.TOKEN_REFRESH_COOLDOWN_MS ?? 5 * 60_000,
+);
+export const UPSTREAM_REQUEST_TIMEOUT_MS = Number(
+  process.env.UPSTREAM_REQUEST_TIMEOUT_MS ?? 60_000,
+);
+export const MODEL_DISCOVERY_TIMEOUT_MS = Number(
+  process.env.MODEL_DISCOVERY_TIMEOUT_MS ?? 8_000,
+);
+export const OAUTH_REQUEST_TIMEOUT_MS = Number(
+  process.env.OAUTH_REQUEST_TIMEOUT_MS ?? 15_000,
+);
+export const OAUTH_CALLBACK_BIND_HOST =
+  process.env.OAUTH_CALLBACK_BIND_HOST ?? "";
+export const MODEL_COMPATIBILITY_TTL_MS = Number(
+  process.env.MODEL_COMPATIBILITY_TTL_MS ?? 6 * 60 * 60_000,
+);
+export const SERVER_HEADERS_TIMEOUT_MS = Number(
+  process.env.SERVER_HEADERS_TIMEOUT_MS ?? 30_000,
+);
+export const SERVER_KEEP_ALIVE_TIMEOUT_MS = Number(
+  process.env.SERVER_KEEP_ALIVE_TIMEOUT_MS ?? 5_000,
+);
+export const SERVER_REQUEST_TIMEOUT_MS = Number(
+  process.env.SERVER_REQUEST_TIMEOUT_MS ?? 90_000,
+);
+export const SHUTDOWN_GRACE_MS = Number(
+  process.env.SHUTDOWN_GRACE_MS ?? 10_000,
+);
+export const TRACE_COMPACTION_INTERVAL = Math.max(
+  1,
+  Number(process.env.TRACE_COMPACTION_INTERVAL ?? 100),
+);
 
 export const ACCOUNT_FLUSH_INTERVAL_MS = Number(
   process.env.ACCOUNT_FLUSH_INTERVAL_MS ?? 5_000,
diff --git a/src/crypto.ts b/src/crypto.ts
new file mode 100644
index 0000000..39ab686
--- /dev/null
+++ b/src/crypto.ts
@@ -0,0 +1,50 @@
+import { createCipheriv, createDecipheriv, createHash, randomBytes } from "node:crypto";
+
+type Envelope = {
+  v: 1;
+  alg: "aes-256-gcm";
+  iv: string;
+  tag: string;
+  data: string;
+};
+
+function deriveKey(secret: string): Buffer {
+  return createHash("sha256").update(secret, "utf8").digest();
+}
+
+export function encryptJson<T>(value: T, secret: string): string {
+  const iv = randomBytes(12);
+  const cipher = createCipheriv("aes-256-gcm", deriveKey(secret), iv);
+  const plaintext = Buffer.from(JSON.stringify(value), "utf8");
+  const ciphertext = Buffer.concat([cipher.update(plaintext), cipher.final()]);
+  const envelope: Envelope = {
+    v: 1,
+    alg: "aes-256-gcm",
+    iv: iv.toString("base64"),
+    tag: cipher.getAuthTag().toString("base64"),
+    data: ciphertext.toString("base64"),
+  };
+  return JSON.stringify(envelope, null, 2);
+}
+
+export function decryptJson<T>(raw: string, secret: string): T {
+  const parsed = JSON.parse(raw) as Envelope;
+  if (!parsed || parsed.v !== 1 || parsed.alg !== "aes-256-gcm") {
+    throw new Error("unsupported encrypted payload");
+  }
+  const decipher = createDecipheriv(
+    "aes-256-gcm",
+    deriveKey(secret),
+    Buffer.from(parsed.iv, "base64"),
+  );
+  decipher.setAuthTag(Buffer.from(parsed.tag, "base64"));
+  const decrypted = Buffer.concat([
+    decipher.update(Buffer.from(parsed.data, "base64")),
+    decipher.final(),
+  ]);
+  return JSON.parse(decrypted.toString("utf8")) as T;
+}
+
+export function looksEncryptedJson(raw: string): boolean {
+  return /^\s*\{\s*"v"\s*:\s*1\s*,\s*"alg"\s*:\s*"aes-256-gcm"/.test(raw);
+}
diff --git a/src/oauth-callback-server.ts b/src/oauth-callback-server.ts
new file mode 100644
index 0000000..a095a38
--- /dev/null
+++ b/src/oauth-callback-server.ts
@@ -0,0 +1,152 @@
+import http from "node:http";
+
+function isLoopbackHostname(hostname: string): boolean {
+  return hostname === "127.0.0.1" || hostname === "::1" || hostname === "localhost";
+}
+
+function callbackPageHtml() {
+  return `<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>MultiVibe OAuth Callback</title>
+    <style>
+      :root {
+        color-scheme: light;
+        font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+      }
+      body {
+        margin: 0;
+        min-height: 100vh;
+        display: grid;
+        place-items: center;
+        background: #f5f7fb;
+        color: #0f172a;
+      }
+      main {
+        width: min(680px, calc(100vw - 32px));
+        padding: 24px;
+        border-radius: 18px;
+        background: #ffffff;
+        box-shadow: 0 18px 50px rgba(15, 23, 42, 0.12);
+      }
+      h1 {
+        margin: 0 0 10px;
+        font-size: 24px;
+      }
+      p {
+        margin: 0 0 14px;
+        line-height: 1.5;
+      }
+      textarea {
+        width: 100%;
+        min-height: 148px;
+        margin: 12px 0 16px;
+        padding: 12px;
+        border: 1px solid #cbd5e1;
+        border-radius: 12px;
+        box-sizing: border-box;
+        font: 13px/1.5 ui-monospace, SFMono-Regular, Menlo, monospace;
+        resize: vertical;
+      }
+      .row {
+        display: flex;
+        flex-wrap: wrap;
+        gap: 10px;
+        align-items: center;
+      }
+      button {
+        border: 0;
+        border-radius: 999px;
+        padding: 10px 16px;
+        background: #0f172a;
+        color: #ffffff;
+        font: inherit;
+        cursor: pointer;
+      }
+      code {
+        font: 13px ui-monospace, SFMono-Regular, Menlo, monospace;
+      }
+      .muted {
+        color: #475569;
+        font-size: 14px;
+      }
+    </style>
+  </head>
+  <body>
+    <main>
+      <h1>OAuth callback received</h1>
+      <p>The full callback URL is below. It has also been sent back to the dashboard window when possible.</p>
+      <textarea id="callback-url" readonly></textarea>
+      <div class="row">
+        <button id="copy-button" type="button">Copy callback URL</button>
+        <span class="muted" id="status">You can paste this into the dashboard if it does not autofill.</span>
+      </div>
+      <p class="muted">Expected path: <code>/auth/callback</code></p>
+    </main>
+    <script>
+      (function () {
+        var callbackUrl = window.location.href;
+        var textarea = document.getElementById("callback-url");
+        var status = document.getElementById("status");
+        var copyButton = document.getElementById("copy-button");
+        if (textarea) textarea.value = callbackUrl;
+        if (window.opener && typeof window.opener.postMessage === "function") {
+          window.opener.postMessage(
+            { type: "multivibe-oauth-callback", callbackUrl: callbackUrl },
+            "*",
+          );
+          if (status) status.textContent = "Sent to the dashboard window. You can still copy it manually.";
+        }
+        if (copyButton) {
+          copyButton.addEventListener("click", function () {
+            navigator.clipboard.writeText(callbackUrl).then(
+              function () {
+                if (status) status.textContent = "Callback URL copied.";
+              },
+              function () {
+                if (textarea) {
+                  textarea.focus();
+                  textarea.select();
+                }
+                if (status) status.textContent = "Clipboard access failed. Copy from the text box.";
+              },
+            );
+          });
+        }
+      })();
+    </script>
+  </body>
+</html>`;
+}
+
+export function createOAuthCallbackServer(redirectUri: string): http.Server | null {
+  let url: URL;
+  try {
+    url = new URL(redirectUri);
+  } catch {
+    return null;
+  }
+
+  if (url.protocol !== "http:" || !isLoopbackHostname(url.hostname) || !url.port) {
+    return null;
+  }
+
+  const expectedPath = url.pathname || "/";
+
+  return http.createServer((req, res) => {
+    const requestUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
+
+    if (req.method !== "GET" || requestUrl.pathname !== expectedPath) {
+      res.statusCode = 404;
+      res.setHeader("content-type", "text/plain; charset=utf-8");
+      res.end("not found");
+      return;
+    }
+
+    res.statusCode = 200;
+    res.setHeader("content-type", "text/html; charset=utf-8");
+    res.end(callbackPageHtml());
+  });
+}
diff --git a/src/oauth.ts b/src/oauth.ts
index b19f1fd..e52b50b 100644
--- a/src/oauth.ts
+++ b/src/oauth.ts
@@ -1,5 +1,6 @@
 import { createHash, randomBytes, randomUUID } from "node:crypto";
 import type { Account, OAuthFlowState } from "./types.js";
+import { OAUTH_REQUEST_TIMEOUT_MS } from "./config.js";
 
 export type OAuthConfig = {
   authorizationUrl: string;
@@ -89,10 +90,12 @@ export function parseAuthorizationInput(input: string): { code?: string; state?:
 }
 
 async function postForm(url: string, body: URLSearchParams): Promise<TokenResponse> {
+  const signal = AbortSignal.timeout(OAUTH_REQUEST_TIMEOUT_MS);
   const res = await fetch(url, {
     method: "POST",
     headers: { "content-type": "application/x-www-form-urlencoded" },
     body,
+    signal,
   });
 
   const text = await res.text();
diff --git a/src/quota.ts b/src/quota.ts
index 9a8e2a2..26d769c 100644
--- a/src/quota.ts
+++ b/src/quota.ts
@@ -1,16 +1,21 @@
 import type { Account, ProviderId, UsageSnapshot } from "./types.js";
+import { MODEL_COMPATIBILITY_TTL_MS } from "./config.js";
 
 export const USAGE_CACHE_TTL_MS = Number(process.env.USAGE_CACHE_TTL_MS ?? 300_000);
 const USAGE_TIMEOUT_MS = Number(process.env.USAGE_TIMEOUT_MS ?? 10_000);
 const BLOCK_FALLBACK_MS = Number(process.env.BLOCK_FALLBACK_MS ?? 30 * 60_000);
-const DEFAULT_ROUTING_WINDOW_MS = Number(process.env.ROUTING_WINDOW_MS ?? 5 * 60 * 1000);
+const DEFAULT_ROUTING_WINDOW_MS = Number(process.env.ROUTING_WINDOW_MS ?? 0);
+const AUTH_FALLBACK_MS = Number(process.env.AUTH_FALLBACK_MS ?? 60 * 60_000);
 
 type RouteCache = {
-  bucket: number;
   accountId?: string;
+  bucketByWindowMs: Map<number, number>;
 };
 
-const routeCache: RouteCache = { bucket: -1, accountId: undefined };
+const routeCache: RouteCache = {
+  accountId: undefined,
+  bucketByWindowMs: new Map(),
+};
 
 export function normalizeProvider(account?: Account): ProviderId {
   if (account?.provider === "mistral") return "mistral";
@@ -58,6 +63,43 @@ export function rememberError(account: Account, message: string) {
   account.state = { ...account.state, lastError: message, recentErrors: next };
 }
 
+function isAuthFailureReason(reason: unknown): reason is string {
+  return typeof reason === "string" && /^auth failure:/i.test(reason);
+}
+
+function isAuthRelatedErrorMessage(message: unknown): message is string {
+  return (
+    typeof message === "string" &&
+    /^(auth failure:|refresh token failed:|usage probe failed 401\b)/i.test(
+      message,
+    )
+  );
+}
+
+export function clearAuthFailureState(account: Account) {
+  const current = account.state;
+  if (!current) return;
+
+  const blockedByAuth = isAuthFailureReason(current.blockedReason);
+  const recentErrors = (current.recentErrors ?? []).filter(
+    (entry) => !isAuthRelatedErrorMessage(entry?.message),
+  );
+  const lastError = isAuthRelatedErrorMessage(current.lastError)
+    ? undefined
+    : current.lastError;
+
+  account.state = {
+    ...current,
+    blockedUntil: blockedByAuth ? undefined : current.blockedUntil,
+    blockedReason: blockedByAuth ? undefined : current.blockedReason,
+    needsTokenRefresh: false,
+    refreshFailureCount: 0,
+    refreshBlockedUntil: undefined,
+    lastError,
+    recentErrors: recentErrors.length ? recentErrors : undefined,
+  };
+}
+
 export function usageUntouched(usage?: UsageSnapshot): boolean {
   return usage?.primary?.usedPercent === 0 && usage?.secondary?.usedPercent === 0;
 }
@@ -95,9 +137,49 @@ export function accountUsable(a: Account): boolean {
   return !(typeof until === "number" && Date.now() < until);
 }
 
+function normalizeModelKey(model?: string): string {
+  const raw = (model ?? "").trim().toLowerCase();
+  if (!raw) return "";
+  if (!raw.includes("/")) return raw;
+  return raw.split("/").pop() ?? raw;
+}
+
+export function accountSupportsModel(account: Account, model?: string): boolean {
+  const key = normalizeModelKey(model);
+  if (!key) return true;
+  const record = account.state?.modelAvailability?.[key];
+  if (!record) return true;
+  if (Date.now() - record.checkedAt > MODEL_COMPATIBILITY_TTL_MS) return true;
+  return record.supported;
+}
+
+export function markModelCompatibility(
+  account: Account,
+  model: string | undefined,
+  supported: boolean,
+  reason?: string,
+) {
+  const key = normalizeModelKey(model);
+  if (!key) return;
+  account.state = {
+    ...account.state,
+    modelAvailability: {
+      ...(account.state?.modelAvailability ?? {}),
+      [key]: {
+        supported,
+        checkedAt: Date.now(),
+        reason,
+      },
+    },
+  };
+}
+
 export function chooseAccount(accounts: Account[]): Account | null {
   const now = Date.now();
-  const windowMs = Number.isFinite(DEFAULT_ROUTING_WINDOW_MS) && DEFAULT_ROUTING_WINDOW_MS > 0 ? DEFAULT_ROUTING_WINDOW_MS : 5 * 60 * 1000;
+  const windowMs =
+    Number.isFinite(DEFAULT_ROUTING_WINDOW_MS) && DEFAULT_ROUTING_WINDOW_MS > 0
+      ? DEFAULT_ROUTING_WINDOW_MS
+      : 0;
 
   const available = accounts.filter((a) => {
     if (!a.enabled) return false;
@@ -106,11 +188,13 @@ export function chooseAccount(accounts: Account[]): Account | null {
   });
   if (!available.length) return null;
 
-  const bucket = nowBucket(now, windowMs);
-
-  if (routeCache.bucket === bucket && routeCache.accountId) {
-    const sticky = available.find((a) => a.id === routeCache.accountId);
-    if (sticky) return sticky;
+  if (windowMs > 0) {
+    const bucket = nowBucket(now, windowMs);
+    const stickyBucket = routeCache.bucketByWindowMs.get(windowMs);
+    if (stickyBucket === bucket && routeCache.accountId) {
+      const sticky = available.find((a) => a.id === routeCache.accountId);
+      if (sticky) return sticky;
+    }
   }
 
   const untouched = available.filter((a) => {
@@ -122,6 +206,14 @@ export function chooseAccount(accounts: Account[]): Account | null {
   const pool = untouched.length ? untouched : available;
 
   const sorted = [...pool].sort((a, b) => {
+    const ap = a.priority ?? Number.MAX_SAFE_INTEGER;
+    const bp = b.priority ?? Number.MAX_SAFE_INTEGER;
+    if (ap !== bp) return ap - bp;
+
+    const al = a.state?.lastSelectedAt ?? 0;
+    const bl = b.state?.lastSelectedAt ?? 0;
+    if (al !== bl) return al - bl;
+
     const sa = scoreAccount(a);
     const sb = scoreAccount(b);
     if (sa !== sb) return sa - sb;
@@ -130,16 +222,14 @@ export function chooseAccount(accounts: Account[]): Account | null {
     const br = b.usage?.secondary?.resetAt ?? Number.MAX_SAFE_INTEGER;
     if (ar !== br) return ar - br;
 
-    const ap = a.priority ?? Number.MAX_SAFE_INTEGER;
-    const bp = b.priority ?? Number.MAX_SAFE_INTEGER;
-    if (ap !== bp) return ap - bp;
-
     return a.id.localeCompare(b.id);
   });
 
   const winner = sorted[0] ?? null;
-  routeCache.bucket = bucket;
   routeCache.accountId = winner?.id;
+  if (windowMs > 0 && winner) {
+    routeCache.bucketByWindowMs.set(windowMs, nowBucket(now, windowMs));
+  }
 
   return winner;
 }
@@ -178,6 +268,7 @@ export async function refreshUsageIfNeeded(account: Account, chatgptBaseUrl: str
     if (!res.ok) throw new Error(`usage probe failed ${res.status}`);
     const json = await res.json();
     account.usage = parseOpenAIUsage(json);
+    clearAuthFailureState(account);
     account.state = { ...account.state, lastError: undefined };
     return account;
   } catch (err: any) {
@@ -198,6 +289,22 @@ export function markQuotaHit(account: Account, message: string) {
   rememberError(account, message);
 }
 
+export function markAuthFailure(account: Account, message: string) {
+  account.state = {
+    ...account.state,
+    blockedUntil: Date.now() + AUTH_FALLBACK_MS,
+    blockedReason: message,
+    needsTokenRefresh: true,
+  };
+  rememberError(account, message);
+}
+
+export function markModelUnsupported(account: Account, message: string) {
+  const modelMatch = message.match(/for ([^:]+):/);
+  markModelCompatibility(account, modelMatch?.[1], false, message);
+  rememberError(account, message);
+}
+
 // z.ai business error code categories for smarter handling
 const ZAI_AUTH_ERRORS = new Set([1000, 1001, 1002, 1003, 1004]);
 const ZAI_ACCOUNT_ERRORS = new Set([1110, 1111, 1112, 1113, 1120, 1121]);
diff --git a/src/routes/admin/index.ts b/src/routes/admin/index.ts
index 2c39cbf..9fd1ff8 100644
--- a/src/routes/admin/index.ts
+++ b/src/routes/admin/index.ts
@@ -2,7 +2,11 @@ import express from "express";
 import { randomUUID } from "node:crypto";
 import { AccountStore, OAuthStateStore } from "../../store.js";
 import type { Account, ModelAlias } from "../../types.js";
-import { normalizeProvider, refreshUsageIfNeeded } from "../../quota.js";
+import {
+  clearAuthFailureState,
+  normalizeProvider,
+  refreshUsageIfNeeded,
+} from "../../quota.js";
 import {
   accountFromOAuth,
   buildAuthorizationUrl,
@@ -60,6 +64,102 @@ function sanitizeAliasId(value: unknown): string {
     .replace(/^-+|-+$/g, "");
 }
 
+const ACCOUNT_MUTABLE_KEYS = new Set([
+  "id",
+  "provider",
+  "email",
+  "accessToken",
+  "refreshToken",
+  "expiresAt",
+  "chatgptAccountId",
+  "enabled",
+  "priority",
+]);
+
+function rejectUnknownKeys(
+  body: Record<string, unknown>,
+  allowed: Set<string>,
+): string | undefined {
+  const unknown = Object.keys(body).filter((key) => !allowed.has(key));
+  if (!unknown.length) return undefined;
+  return `unknown fields: ${unknown.join(", ")}`;
+}
+
+function parseAccountPatch(
+  body: Record<string, unknown>,
+  allowId: boolean,
+): { patch?: Partial<Account>; error?: string } {
+  const error = rejectUnknownKeys(body, ACCOUNT_MUTABLE_KEYS);
+  if (error) return { error };
+
+  const patch: Partial<Account> = {};
+  if (allowId && typeof body.id !== "undefined") {
+    if (typeof body.id !== "string" || !body.id.trim()) {
+      return { error: "id must be a non-empty string" };
+    }
+    patch.id = body.id.trim();
+  }
+  if (typeof body.provider !== "undefined") {
+    if (body.provider !== "openai" && body.provider !== "mistral") {
+      return { error: "provider must be openai or mistral" };
+    }
+    patch.provider = body.provider;
+  }
+  if (typeof body.email !== "undefined") {
+    if (typeof body.email !== "string") return { error: "email must be a string" };
+    patch.email = body.email.trim() || undefined;
+  }
+  if (typeof body.accessToken !== "undefined") {
+    if (typeof body.accessToken !== "string" || !body.accessToken.trim()) {
+      return { error: "accessToken must be a non-empty string" };
+    }
+    patch.accessToken = body.accessToken.trim();
+  }
+  if (typeof body.refreshToken !== "undefined") {
+    if (body.refreshToken !== null && typeof body.refreshToken !== "string") {
+      return { error: "refreshToken must be a string" };
+    }
+    patch.refreshToken =
+      typeof body.refreshToken === "string" && body.refreshToken.trim()
+        ? body.refreshToken.trim()
+        : undefined;
+  }
+  if (typeof body.expiresAt !== "undefined") {
+    if (
+      body.expiresAt !== null &&
+      (!Number.isFinite(Number(body.expiresAt)) || Number(body.expiresAt) < 0)
+    ) {
+      return { error: "expiresAt must be a positive number" };
+    }
+    patch.expiresAt =
+      body.expiresAt === null ? undefined : Number(body.expiresAt);
+  }
+  if (typeof body.chatgptAccountId !== "undefined") {
+    if (
+      body.chatgptAccountId !== null &&
+      typeof body.chatgptAccountId !== "string"
+    ) {
+      return { error: "chatgptAccountId must be a string" };
+    }
+    patch.chatgptAccountId =
+      typeof body.chatgptAccountId === "string" &&
+      body.chatgptAccountId.trim()
+        ? body.chatgptAccountId.trim()
+        : undefined;
+  }
+  if (typeof body.enabled !== "undefined") {
+    if (typeof body.enabled !== "boolean") return { error: "enabled must be a boolean" };
+    patch.enabled = body.enabled;
+  }
+  if (typeof body.priority !== "undefined") {
+    if (!Number.isFinite(Number(body.priority))) {
+      return { error: "priority must be a finite number" };
+    }
+    patch.priority = Number(body.priority);
+  }
+  return { patch };
+}
+
 function normalizeAliasTargets(value: unknown): string[] {
   if (!Array.isArray(value)) return [];
   return Array.from(
@@ -409,6 +509,7 @@ export function createAdminRouter(options: AdminRoutesOptions) {
     const globalAgg = createUsageAggregate();
     const byAccount = new Map<string, ReturnType<typeof createUsageAggregate>>();
     const byRoute = new Map<string, ReturnType<typeof createUsageAggregate>>();
+    const bySession = new Map<string, ReturnType<typeof createUsageAggregate>>();
 
     for (const trace of filtered) {
       addTraceToAggregate(globalAgg, trace);
@@ -421,6 +522,16 @@ export function createAdminRouter(options: AdminRoutesOptions) {
       const routeKey = trace.route ?? "unknown";
       if (!byRoute.has(routeKey)) byRoute.set(routeKey, createUsageAggregate());
       addTraceToAggregate(byRoute.get(routeKey)!, trace);
+
+      const sessionKey =
+        typeof trace.sessionId === "string" && trace.sessionId.trim()
+          ? trace.sessionId.trim()
+          : "";
+      if (sessionKey) {
+        if (!bySession.has(sessionKey))
+          bySession.set(sessionKey, createUsageAggregate());
+        addTraceToAggregate(bySession.get(sessionKey)!, trace);
+      }
     }
 
     const accounts = await store.listAccounts();
@@ -453,6 +564,10 @@ export function createAdminRouter(options: AdminRoutesOptions) {
       .map(([route, agg]) => ({ route, ...finalizeAggregate(agg) }))
       .sort((a, b) => b.requests - a.requests);
 
+    const bySessionOut = Array.from(bySession.entries())
+      .map(([sessionId, agg]) => ({ sessionId, ...finalizeAggregate(agg) }))
+      .sort((a, b) => b.requests - a.requests);
+
     res.json({
       ok: true,
       filters: {
@@ -464,6 +579,7 @@ export function createAdminRouter(options: AdminRoutesOptions) {
       totals: finalizeAggregate(globalAgg),
       byAccount: byAccountOut,
       byRoute: byRouteOut,
+      bySession: bySessionOut,
       tracesEvaluated: traces.length,
       tracesMatched: filtered.length,
     });
@@ -487,28 +603,34 @@ export function createAdminRouter(options: AdminRoutesOptions) {
   });
 
   router.post("/accounts", async (req, res) => {
-    const body = req.body ?? {};
-    if (!body.accessToken)
+    const body = (req.body ?? {}) as Record<string, unknown>;
+    const parsed = parseAccountPatch(body, true);
+    if (parsed.error) return res.status(400).json({ error: parsed.error });
+    if (!parsed.patch?.accessToken) {
       return res.status(400).json({ error: "accessToken required" });
+    }
     const account: Account = {
-      id: body.id ?? randomUUID(),
-      provider: body.provider === "mistral" ? "mistral" : "openai",
-      email: body.email,
-      accessToken: body.accessToken,
-      refreshToken: body.refreshToken,
-      expiresAt: body.expiresAt,
-      chatgptAccountId: body.chatgptAccountId,
-      enabled: body.enabled ?? true,
-      priority: body.priority ?? 0,
-      usage: body.usage,
-      state: body.state,
+      id: parsed.patch.id ?? randomUUID(),
+      provider: parsed.patch.provider ?? "openai",
+      email: parsed.patch.email,
+      accessToken: parsed.patch.accessToken,
+      refreshToken: parsed.patch.refreshToken,
+      expiresAt: parsed.patch.expiresAt,
+      chatgptAccountId: parsed.patch.chatgptAccountId,
+      enabled: parsed.patch.enabled ?? true,
+      priority: parsed.patch.priority ?? 0,
+      usage: undefined,
+      state: {},
     };
     await store.upsertAccount(account);
     res.json({ ok: true, account: redact(account) });
   });
 
   router.patch("/accounts/:id", async (req, res) => {
-    const updated = await store.patchAccount(req.params.id, req.body ?? {});
+    const body = (req.body ?? {}) as Record<string, unknown>;
+    const parsed = parseAccountPatch(body, false);
+    if (parsed.error) return res.status(400).json({ error: parsed.error });
+    const updated = await store.patchAccount(req.params.id, parsed.patch ?? {});
     if (!updated) return res.status(404).json({ error: "not found" });
     res.json({ ok: true, account: redact(updated) });
   });
@@ -625,6 +747,7 @@ export function createAdminRouter(options: AdminRoutesOptions) {
       } else {
         account = accountFromOAuth(flow, tokenData);
       }
+      clearAuthFailureState(account);
       account = await refreshUsageIfNeeded(account, openaiBaseUrl, true);
       await store.upsertAccount(account);
       await oauthStore.update(flow.id, {
diff --git a/src/routes/proxy/index.ts b/src/routes/proxy/index.ts
index 02cd16d..1e655c4 100644
--- a/src/routes/proxy/index.ts
+++ b/src/routes/proxy/index.ts
@@ -1,15 +1,16 @@
 import {
   MAX_ACCOUNT_RETRY_ATTEMPTS,
-  MAX_UPSTREAM_RETRIES,
+  MAX_GET_RETRIES,
   MODELS_CACHE_MS,
   MODELS_CLIENT_VERSION,
+  MODEL_DISCOVERY_TIMEOUT_MS,
   PI_USER_AGENT,
   PROXY_MODELS,
+  RETRY_BASE_DELAY_MS,
   TRACE_INCLUDE_BODY,
-  TOKEN_REFRESH_MARGIN_MS,
-  UPSTREAM_BASE_DELAY_MS,
   UPSTREAM_PATH,
   UPSTREAM_COMPACT_PATH,
+  UPSTREAM_REQUEST_TIMEOUT_MS,
   ZAI_BASE_URL,
   ZAI_UPSTREAM_PATH,
   ZAI_COMPACT_UPSTREAM_PATH,
@@ -31,7 +32,12 @@ import {
 } from "../../responses/payloads.js";
 import {
   chooseAccountForProvider,
+  accountSupportsModel,
+  clearAuthFailureState,
   isQuotaErrorText,
+  markModelCompatibility,
+  markAuthFailure,
+  markModelUnsupported,
   markQuotaHit,
   normalizeProvider,
   refreshUsageIfNeeded,
@@ -39,6 +45,7 @@ import {
   parseZaiErrorCode,
   shouldBlockAccountForZaiError,
   getZaiBlockDuration,
+  USAGE_CACHE_TTL_MS,
 } from "../../quota.js";
 import {
   ensureNonEmptyChatCompletion,
@@ -66,6 +73,7 @@ type ProxyRoutesOptions = {
   zaiUpstreamPath: string;
   zaiCompactUpstreamPath: string;
   oauthConfig: OAuthConfig;
+  upstreamRequestTimeoutMs?: number;
 };
 
 const modelsCache: { at: number; models: ExposedModel[] } = {
@@ -86,6 +94,11 @@ const modelsValidationCache: {
 
 const MODELS_VALIDATION_CACHE_MS = 60_000; // Refresh every 60 seconds
 
+export function resetDiscoveredModelsCacheForTest() {
+  modelsCache.at = 0;
+  modelsCache.models = [];
+}
+
 type ExposedModel = {
   id: string;
   object: "model";
@@ -253,7 +266,7 @@ async function discoverModels(
         const url = `${openaiBaseUrl}/backend-api/codex/models?client_version=${encodeURIComponent(
           MODELS_CLIENT_VERSION,
         )}`;
-        const r = await fetch(url, { headers });
+        const r = await fetchCodexWithRetry(url, { headers });
         if (r.ok) {
           const json: any = await r.json();
           const upstream = Array.isArray(json?.models) ? json.models : [];
@@ -278,7 +291,9 @@ async function discoverModels(
           authorization: `Bearer ${mistralAccount.accessToken}`,
           accept: "application/json",
         };
-        const r = await fetch(`${mistralBaseUrl}/v1/models`, { headers });
+        const r = await fetchCodexWithRetry(`${mistralBaseUrl}/v1/models`, {
+          headers,
+        });
         if (r.ok) {
           const json: any = await r.json();
           const upstream = Array.isArray(json?.data) ? json.data : [];
@@ -468,10 +483,295 @@ function takeNextSSEFrame(buffer: string): SSEFrame {
   };
 }
 
+function frameSignalsResponseCompleted(frame: string): boolean {
+  return (
+    /(?:^|\r?\n)event:\s*response\.completed\b/.test(frame) ||
+    frame.includes('"response.completed"')
+  );
+}
+
+function frameSignalsOutputTextDone(frame: string): boolean {
+  return (
+    /(?:^|\r?\n)event:\s*response\.output_text\.done\b/.test(frame) ||
+    frame.includes('"response.output_text.done"')
+  );
+}
+
+function frameSignalsResponseTerminal(frame: string): boolean {
+  return (
+    frameSignalsResponseCompleted(frame) || frameSignalsOutputTextDone(frame)
+  );
+}
+
+function extractSSEDataPayload(frame: string): any | undefined {
+  try {
+    const dataLine = frame
+      .split(/\r?\n/)
+      .find((line) => line.trim().startsWith("data:"));
+    if (!dataLine) return undefined;
+    return JSON.parse(dataLine.slice(5).trim());
+  } catch {
+    return undefined;
+  }
+}
+
 function sleep(ms: number): Promise<void> {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
 
+function createRequestSignal(
+  timeoutMs: number,
+  upstreamAbort?: AbortSignal,
+): { signal: AbortSignal; clearTimeout: () => void } {
+  const controller = new AbortController();
+  let timer: NodeJS.Timeout | undefined = setTimeout(() => {
+    controller.abort(new Error(`request timed out after ${timeoutMs}ms`));
+  }, timeoutMs);
+  const clearTimeoutOnly = () => {
+    if (!timer) return;
+    clearTimeout(timer);
+    timer = undefined;
+  };
+  const onAbort = () => controller.abort(upstreamAbort?.reason);
+  if (upstreamAbort) {
+    if (upstreamAbort.aborted) {
+      controller.abort(upstreamAbort.reason);
+    } else {
+      upstreamAbort.addEventListener("abort", onAbort, { once: true });
+    }
+  }
+  controller.signal.addEventListener(
+    "abort",
+    () => {
+      clearTimeoutOnly();
+      if (upstreamAbort) upstreamAbort.removeEventListener("abort", onAbort);
+    },
+    { once: true },
+  );
+  return {
+    signal: controller.signal,
+    clearTimeout: clearTimeoutOnly,
+  };
+}
+
+async function readChunkWithInactivityTimeout(
+  reader: ReadableStreamDefaultReader<Uint8Array>,
+  timeoutMs: number,
+  abortSignal?: AbortSignal,
+): Promise<ReadableStreamReadResult<Uint8Array>> {
+  return new Promise((resolve, reject) => {
+    let settled = false;
+    let timer: NodeJS.Timeout | undefined = setTimeout(() => {
+      if (settled) return;
+      settled = true;
+      cleanup();
+      void reader.cancel().catch(() => {});
+      reject(new Error(`response stream timed out after ${timeoutMs}ms`));
+    }, timeoutMs);
+
+    const cleanup = () => {
+      if (timer) {
+        clearTimeout(timer);
+        timer = undefined;
+      }
+      if (abortSignal) abortSignal.removeEventListener("abort", onAbort);
+    };
+
+    const onAbort = () => {
+      if (settled) return;
+      settled = true;
+      cleanup();
+      void reader.cancel().catch(() => {});
+      const reason = abortSignal?.reason;
+      reject(reason instanceof Error ? reason : new Error(String(reason ?? "aborted")));
+    };
+
+    if (abortSignal) {
+      if (abortSignal.aborted) {
+        onAbort();
+        return;
+      }
+      abortSignal.addEventListener("abort", onAbort, { once: true });
+    }
+
+    reader.read().then(
+      (result) => {
+        if (settled) return;
+        settled = true;
+        cleanup();
+        resolve(result);
+      },
+      (error) => {
+        if (settled) return;
+        settled = true;
+        cleanup();
+        reject(error);
+      },
+    );
+  });
+}
+
+async function readResponseTextWithInactivityTimeout(
+  response: Response,
+  timeoutMs: number,
+  abortSignal?: AbortSignal,
+): Promise<string> {
+  if (!response.body) return "";
+  const reader = response.body.getReader();
+  return readReaderTextWithInactivityTimeout(
+    reader,
+    new TextDecoder(),
+    timeoutMs,
+    abortSignal,
+  );
+}
+
+async function readReaderTextWithInactivityTimeout(
+  reader: ReadableStreamDefaultReader<Uint8Array>,
+  decoder: TextDecoder,
+  timeoutMs: number,
+  abortSignal?: AbortSignal,
+  initialText = "",
+): Promise<string> {
+  let text = initialText;
+
+  while (true) {
+    const { value, done } = await readChunkWithInactivityTimeout(
+      reader,
+      timeoutMs,
+      abortSignal,
+    );
+    if (done) break;
+    text += decoder.decode(value, { stream: true });
+  }
+
+  text += decoder.decode();
+  return text;
+}
+
+async function peekResponseTextStart(
+  response: Response,
+  timeoutMs: number,
+  abortSignal?: AbortSignal,
+): Promise<{
+  reader: ReadableStreamDefaultReader<Uint8Array> | null;
+  decoder: TextDecoder;
+  initialText: string;
+}> {
+  const decoder = new TextDecoder();
+  if (!response.body) {
+    return { reader: null, decoder, initialText: "" };
+  }
+  const reader = response.body.getReader();
+  const { value, done } = await readChunkWithInactivityTimeout(
+    reader,
+    timeoutMs,
+    abortSignal,
+  );
+  if (done) {
+    return {
+      reader,
+      decoder,
+      initialText: decoder.decode(),
+    };
+  }
+
+  return {
+    reader,
+    decoder,
+    initialText: decoder.decode(value, { stream: true }),
+  };
+}
+
+function looksLikeSSEPayload(text: string): boolean {
+  return /(?:^|\r?\n)(event:|data:)\s*/.test(text);
+}
+
+async function readResponsesSSETextUntilTerminalFromReader(
+  reader: ReadableStreamDefaultReader<Uint8Array>,
+  decoder: TextDecoder,
+  timeoutMs: number,
+  abortSignal?: AbortSignal,
+  initialText = "",
+): Promise<string> {
+  let text = initialText;
+  let sseBuffer = initialText;
+  let completed = false;
+
+  while (true) {
+    const { value, done } = await readChunkWithInactivityTimeout(
+      reader,
+      timeoutMs,
+      abortSignal,
+    );
+    if (done) break;
+    sseBuffer += decoder.decode(value, { stream: true });
+
+    while (true) {
+      const next = takeNextSSEFrame(sseBuffer);
+      if (!next) break;
+      sseBuffer = next.rest;
+      text += `${next.frame}\n\n`;
+      if (frameSignalsResponseTerminal(next.frame)) {
+        completed = true;
+        break;
+      }
+    }
+
+    if (completed) break;
+  }
+
+  if (!completed) {
+    sseBuffer += decoder.decode();
+    while (true) {
+      const next = takeNextSSEFrame(sseBuffer);
+      if (!next) break;
+      sseBuffer = next.rest;
+      text += `${next.frame}\n\n`;
+      if (frameSignalsResponseTerminal(next.frame)) {
+        completed = true;
+        break;
+      }
+    }
+    if (!completed && sseBuffer.trim()) text += sseBuffer;
+  }
+
+  if (completed) void reader.cancel().catch(() => {});
+  return text;
+}
+
+async function readResponsesSSETextUntilTerminal(
+  response: Response,
+  timeoutMs: number,
+  abortSignal?: AbortSignal,
+): Promise<string> {
+  if (!response.body) return "";
+  return readResponsesSSETextUntilTerminalFromReader(
+    response.body.getReader(),
+    new TextDecoder(),
+    timeoutMs,
+    abortSignal,
+  );
+}
+
+function isAbortError(error: unknown): boolean {
+  return (
+    error instanceof Error &&
+    (error.name === "AbortError" || /timed out|aborted/i.test(error.message))
+  );
+}
+
+function isDownstreamClientDisconnect(
+  error: unknown,
+  abortSignal?: AbortSignal,
+): boolean {
+  return (
+    Boolean(abortSignal?.aborted) ||
+    (error instanceof Error &&
+      /downstream client disconnected/i.test(error.message))
+  );
+}
+
 function isRetryableUpstreamError(status: number, errorText: string): boolean {
   if (
     status === 429 ||
@@ -486,34 +786,67 @@ function isRetryableUpstreamError(status: number, errorText: string): boolean {
   );
 }
 
+function isAuthFailure(status: number, errorText: string): boolean {
+  if (status === 401) return true;
+  return /token_expired|invalid[_ -]?token|refresh[_ -]?token|unauthorized|auth/i.test(
+    errorText,
+  );
+}
+
+function isModelUnsupported(status: number, errorText: string): boolean {
+  if (status !== 400 && status !== 404) return false;
+  return /model.+not supported|unsupported model|does not exist|not available|unknown model/i.test(
+    errorText,
+  );
+}
+
 async function fetchCodexWithRetry(
   url: string,
   init: RequestInit,
+  signal?: AbortSignal,
 ): Promise<Response> {
   let lastError: Error | undefined;
-  for (let attempt = 0; attempt <= MAX_UPSTREAM_RETRIES; attempt++) {
+  const maxAttempts = Math.max(0, MAX_GET_RETRIES);
+  for (let attempt = 0; attempt <= maxAttempts; attempt++) {
     try {
-      const response = await fetch(url, init);
+      const requestSignal = createRequestSignal(
+        MODEL_DISCOVERY_TIMEOUT_MS,
+        signal,
+      );
+      const response = await fetch(url, {
+        ...init,
+        signal: requestSignal.signal,
+      });
+      requestSignal.clearTimeout();
       if (response.ok) return response;
       const errorText = await response
         .clone()
         .text()
         .catch(() => "");
       if (
-        attempt < MAX_UPSTREAM_RETRIES &&
+        attempt < maxAttempts &&
         isRetryableUpstreamError(response.status, errorText)
       ) {
-        await sleep(UPSTREAM_BASE_DELAY_MS * 2 ** attempt);
+        await sleep(
+          Math.floor(
+            RETRY_BASE_DELAY_MS * 2 ** attempt * (0.5 + Math.random()),
+          ),
+        );
         continue;
       }
       return response;
     } catch (error: any) {
       lastError = error instanceof Error ? error : new Error(String(error));
       if (
-        attempt < MAX_UPSTREAM_RETRIES &&
-        !lastError.message.includes("usage limit")
+        attempt < maxAttempts &&
+        !lastError.message.includes("usage limit") &&
+        !isAbortError(lastError)
       ) {
-        await sleep(UPSTREAM_BASE_DELAY_MS * 2 ** attempt);
+        await sleep(
+          Math.floor(
+            RETRY_BASE_DELAY_MS * 2 ** attempt * (0.5 + Math.random()),
+          ),
+        );
         continue;
       }
       throw lastError;
@@ -534,6 +867,7 @@ export function createProxyRouter(options: ProxyRoutesOptions) {
       zaiUpstreamPath,
       zaiCompactUpstreamPath,
       oauthConfig,
+      upstreamRequestTimeoutMs = UPSTREAM_REQUEST_TIMEOUT_MS,
     } = options;
   const { recordTrace } = traceManager;
   const router = express.Router();
@@ -568,6 +902,12 @@ export function createProxyRouter(options: ProxyRoutesOptions) {
   // Start background model cache refresh
   startBackgroundModelRefresh(store, openaiBaseUrl, mistralBaseUrl, zaiBaseUrl);
 
+  function refreshUsageInBackground(account: any, usageBaseUrl: string) {
+    void refreshUsageIfNeeded(account, usageBaseUrl)
+      .then((refreshed) => store.upsertAccount(refreshed))
+      .catch(() => undefined);
+  }
+
   async function proxyWithRotation(
     req: express.Request,
     res: express.Response,
@@ -583,6 +923,16 @@ export function createProxyRouter(options: ProxyRoutesOptions) {
       (req.originalUrl || "").includes("responses/compact");
     const clientRequestedStream = Boolean(req.body?.stream);
     const sessionId = getSessionId(req);
+    const clientAbort = new AbortController();
+    const abortFromClient = () => {
+      if (!clientAbort.signal.aborted) {
+        clientAbort.abort(new Error("downstream client disconnected"));
+      }
+    };
+    req.on("aborted", abortFromClient);
+    res.on("close", () => {
+      if (!res.writableEnded) abortFromClient();
+    });
 
 let accounts = store.getCachedAccounts();
     if (!accounts.length)
@@ -595,7 +945,10 @@ let accounts = store.getCachedAccounts();
         let usageBaseUrl = openaiBaseUrl;
         if (provider === "mistral") usageBaseUrl = mistralBaseUrl;
         else if (provider === "zai") usageBaseUrl = zaiBaseUrl;
-        await refreshUsageIfNeeded(valid, usageBaseUrl);
+        const usageFetchedAt = valid.usage?.fetchedAt ?? 0;
+        if (Date.now() - usageFetchedAt >= USAGE_CACHE_TTL_MS) {
+          refreshUsageInBackground(valid, usageBaseUrl);
+        }
         return valid;
       }),
     );
@@ -617,20 +970,24 @@ let accounts = store.getCachedAccounts();
       });
     }
 
-    const discoveredModels = await discoverModels(store, openaiBaseUrl, mistralBaseUrl, zaiBaseUrl);
     const modelAliases = store.getCachedModelAliases();
     const routingCandidates = buildRoutingCandidates(
       requestModel,
-      discoveredModels,
+      modelsCache.models,
       modelAliases,
     );
     const tried = new Set<string>();
     const maxAttempts = Math.min(accounts.length, MAX_ACCOUNT_RETRY_ATTEMPTS);
     let providerTried = false;
+    let lastModelUnsupported:
+      | { status: number; text: string; contentType: string }
+      | undefined;
 
     for (const candidate of routingCandidates) {
       const providerAccounts = accounts.filter(
-        (a) => normalizeProvider(a) === candidate.provider,
+        (a) =>
+          normalizeProvider(a) === candidate.provider &&
+          accountSupportsModel(a, candidate.resolvedModel ?? requestModel),
       );
       if (!providerAccounts.length) continue;
       providerTried = true;
@@ -661,9 +1018,6 @@ let accounts = store.getCachedAccounts();
         delete payloadToUpstream.tool_choice;
         delete payloadToUpstream.parallel_tool_calls;
       }
-      if (isResponsesCompactPath && payloadToUpstream && typeof payloadToUpstream === "object") {
-        delete payloadToUpstream.store;
-      }
       if (candidate.resolvedModel) payloadToUpstream.model = candidate.resolvedModel;
       const requestBody = TRACE_INCLUDE_BODY ? req.body : undefined;
       const tracedModel =
@@ -699,17 +1053,50 @@ let accounts = store.getCachedAccounts();
           upstreamBaseUrl = zaiBaseUrl;
           upstreamPath = isResponsesCompactPath ? zaiCompactUpstreamPath : zaiUpstreamPath;
         }
-        const upstream = await fetchCodexWithRetry(
-          `${upstreamBaseUrl}${upstreamPath}`,
-          {
-            method: "POST",
-            headers,
-            body: JSON.stringify(payloadToUpstream),
-          },
+        const requestSignal = createRequestSignal(
+          upstreamRequestTimeoutMs,
+          clientAbort.signal,
         );
+        const upstream = await fetch(`${upstreamBaseUrl}${upstreamPath}`, {
+          method: "POST",
+          headers,
+          body: JSON.stringify(payloadToUpstream),
+          signal: requestSignal.signal,
+        });
+        requestSignal.clearTimeout();
 
         const contentType = upstream.headers.get("content-type") ?? "";
-        const isStream = contentType.includes("text/event-stream");
+        let isStream = contentType.includes("text/event-stream");
+        let prefetchedText = "";
+        let prefetchedReader: ReadableStreamDefaultReader<Uint8Array> | null = null;
+        let prefetchedDecoder: TextDecoder | null = null;
+
+        if (
+          upstream.ok &&
+          clientRequestedStream &&
+          !shouldReturnChatCompletions &&
+          !isStream &&
+          upstream.body
+        ) {
+          const peeked = await peekResponseTextStart(
+            upstream,
+            upstreamRequestTimeoutMs,
+            clientAbort.signal,
+          );
+          prefetchedText = peeked.initialText;
+          prefetchedReader = peeked.reader;
+          prefetchedDecoder = peeked.decoder;
+          if (looksLikeSSEPayload(prefetchedText)) isStream = true;
+        }
+        if (upstream.ok) {
+          clearAuthFailureState(selected);
+          markModelCompatibility(
+            selected,
+            candidate.resolvedModel ?? requestModel,
+            true,
+          );
+          await store.upsertAccount(selected);
+        }
 
         if (isStream) {
           if (shouldReturnChatCompletions && clientRequestedStream) {
@@ -728,7 +1115,11 @@ let accounts = store.getCachedAccounts();
             let doneSent = false;
 
             while (true) {
-              const { value, done } = await reader.read();
+              const { value, done } = await readChunkWithInactivityTimeout(
+                reader,
+                upstreamRequestTimeoutMs,
+                clientAbort.signal,
+              );
               if (done) break;
 
               const chunk = decoder.decode(value, { stream: true });
@@ -786,6 +1177,7 @@ let accounts = store.getCachedAccounts();
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -799,7 +1191,11 @@ let accounts = store.getCachedAccounts();
           }
 
           if (shouldReturnChatCompletions) {
-            const txt = await upstream.text();
+            const txt = await readResponsesSSETextUntilTerminal(
+              upstream,
+              upstreamRequestTimeoutMs,
+              clientAbort.signal,
+            );
             const parsedChat = parseResponsesSSEToChatCompletion(
               txt,
               req.body?.model ?? payloadToUpstream?.model ?? "unknown",
@@ -813,6 +1209,7 @@ let accounts = store.getCachedAccounts();
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -829,13 +1226,18 @@ let accounts = store.getCachedAccounts();
           }
 
           if (!clientRequestedStream) {
-            const txt = await upstream.text();
+            const txt = await readResponsesSSETextUntilTerminal(
+              upstream,
+              upstreamRequestTimeoutMs,
+              clientAbort.signal,
+            );
             const respObj = parseResponsesSSEToResponseObject(txt);
             res.status(upstream.ok ? 200 : upstream.status).json(respObj);
             const upstreamError = !upstream.ok ? txt.slice(0, 500) : undefined;
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -852,73 +1254,58 @@ let accounts = store.getCachedAccounts();
 
           res.status(upstream.status);
           setForwardHeaders(upstream, res);
-          if (!upstream.body) return res.end();
-          const reader = upstream.body.getReader();
-          const decoder = new TextDecoder();
+          res.flushHeaders();
+          const reader = prefetchedReader ?? upstream.body?.getReader() ?? null;
+          const decoder = prefetchedDecoder ?? new TextDecoder();
+          if (!reader) return res.end();
           let sseBuffer = "";
           let accumulatedUsage: any = null;
 
-          while (true) {
-            const { value, done } = await reader.read();
-            if (done) break;
-            sseBuffer += decoder.decode(value, { stream: true });
+          const consumeChunkText = (chunkText: string) => {
+            if (!chunkText) return;
+            res.write(chunkText);
+            sseBuffer += chunkText;
 
             while (true) {
               const next = takeNextSSEFrame(sseBuffer);
               if (!next) break;
               sseBuffer = next.rest;
 
-              if (next.frame.includes("response.completed")) {
-                try {
-                  const dataLine = next.frame
-                    .split(/\r?\n/)
-                    .find((line) => line.trim().startsWith("data:"));
-                  if (dataLine) {
-                    const payload = JSON.parse(dataLine.slice(5).trim());
-                    if (payload?.response?.usage) {
-                      accumulatedUsage = payload.response.usage;
-                    }
-                  }
-                } catch {}
+              const payload = extractSSEDataPayload(next.frame);
+              if (payload?.type === "response.completed") {
+                if (payload?.response?.usage) {
+                  accumulatedUsage = payload.response.usage;
+                }
+                continue;
+              }
+              if (
+                payload?.type === "response.output_text.done" &&
+                typeof payload?.text === "string"
+              ) {
+                continue;
               }
-
-              const filtered = sanitizeResponsesSSEFrame(next.frame);
-              if (filtered !== null) res.write(`${filtered}\n\n`);
             }
-          }
+          };
 
-          sseBuffer += decoder.decode();
-          while (true) {
-            const next = takeNextSSEFrame(sseBuffer);
-            if (!next) break;
-            sseBuffer = next.rest;
-
-            if (next.frame.includes("response.completed")) {
-              try {
-                const dataLine = next.frame
-                  .split(/\r?\n/)
-                  .find((line) => line.trim().startsWith("data:"));
-                if (dataLine) {
-                  const payload = JSON.parse(dataLine.slice(5).trim());
-                  if (payload?.response?.usage) {
-                    accumulatedUsage = payload.response.usage;
-                  }
-                }
-              } catch {}
-            }
+          consumeChunkText(prefetchedText);
 
-            const filtered = sanitizeResponsesSSEFrame(next.frame);
-            if (filtered !== null) res.write(`${filtered}\n\n`);
-          }
-          if (sseBuffer.trim()) {
-            const filtered = sanitizeResponsesSSEFrame(sseBuffer);
-            if (filtered !== null) res.write(`${filtered}\n\n`);
+          while (true) {
+            const { value, done } = await readChunkWithInactivityTimeout(
+              reader,
+              upstreamRequestTimeoutMs,
+              clientAbort.signal,
+            );
+            if (done) break;
+            consumeChunkText(decoder.decode(value, { stream: true }));
           }
+
+          consumeChunkText(decoder.decode());
           res.end();
 
           recordTrace({
             at: Date.now(),
             route: req.path,
+            sessionId,
             accountId: selected.id,
             accountEmail: selected.email,
             model: tracedModel,
@@ -933,7 +1320,11 @@ let accounts = store.getCachedAccounts();
 
         let bufferedText: string | undefined = undefined;
         if (shouldReturnChatCompletions && clientRequestedStream) {
-          let raw = await upstream.text();
+          let raw = await readResponseTextWithInactivityTimeout(
+            upstream,
+            upstreamRequestTimeoutMs,
+            clientAbort.signal,
+          );
           const upstreamEmptyBody = !raw;
           if (!raw)
             raw = JSON.stringify({
@@ -960,6 +1351,7 @@ let accounts = store.getCachedAccounts();
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -981,7 +1373,7 @@ let accounts = store.getCachedAccounts();
               req.body?.model ?? payloadToUpstream?.model ?? "unknown",
             );
             res.status(200);
-            res.set("Content-Type", "text.event-stream");
+            res.set("Content-Type", "text/event-stream");
             res.set("Cache-Control", "no-cache");
             res.set("Connection", "keep-alive");
             res.write(chatCompletionObjectToSSE(converted));
@@ -990,6 +1382,7 @@ let accounts = store.getCachedAccounts();
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -1006,7 +1399,21 @@ let accounts = store.getCachedAccounts();
           }
         }
 
-        let text = bufferedText ?? (await upstream.text());
+        let text =
+          bufferedText ??
+          (prefetchedReader && prefetchedDecoder
+            ? await readReaderTextWithInactivityTimeout(
+                prefetchedReader,
+                prefetchedDecoder,
+                upstreamRequestTimeoutMs,
+                clientAbort.signal,
+                prefetchedText,
+              )
+            : await readResponseTextWithInactivityTimeout(
+                upstream,
+                upstreamRequestTimeoutMs,
+                clientAbort.signal,
+              ));
         const upstreamEmptyBody = !text;
         if (!text)
           text = JSON.stringify({
@@ -1061,6 +1468,7 @@ let accounts = store.getCachedAccounts();
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -1095,6 +1503,7 @@ let accounts = store.getCachedAccounts();
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -1145,6 +1554,7 @@ let accounts = store.getCachedAccounts();
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -1175,6 +1585,7 @@ let accounts = store.getCachedAccounts();
             recordTrace({
               at: Date.now(),
               route: req.path,
+              sessionId,
               accountId: selected.id,
               accountEmail: selected.email,
               model: tracedModel,
@@ -1196,6 +1607,7 @@ let accounts = store.getCachedAccounts();
           recordTrace({
             at: Date.now(),
             route: req.path,
+            sessionId,
             accountId: selected.id,
             accountEmail: selected.email,
             model: tracedModel,
@@ -1212,15 +1624,24 @@ let accounts = store.getCachedAccounts();
           return;
         }
 
-        res.status(upstream.status);
-        setForwardHeaders(upstream, res);
-        res.type(contentType || "application/json").send(text);
-
         const usage = extractUsageFromPayload(parsed);
+        const quotaFailure =
+          upstream.status === 429 || isQuotaErrorText(text);
+        const authFailure = isAuthFailure(upstream.status, text);
+        const modelUnsupported = isModelUnsupported(upstream.status, text);
+        const shouldRotateAccount =
+          !upstream.ok &&
+          (quotaFailure || authFailure || modelUnsupported);
+
+        if (!shouldRotateAccount) {
+          res.status(upstream.status);
+          res.type(contentType || "application/json").send(text);
+        }
 
         recordTrace({
           at: Date.now(),
           route: req.path,
+          sessionId,
           accountId: selected.id,
           accountEmail: selected.email,
           model: tracedModel,
@@ -1252,11 +1673,31 @@ let accounts = store.getCachedAccounts();
           continue;
         }
 
-        if (upstream.status === 429 || isQuotaErrorText(text)) {
+        if (quotaFailure) {
           markQuotaHit(selected, `quota/rate-limit: ${upstream.status}`);
           await store.upsertAccount(selected);
           continue;
         }
+        if (authFailure) {
+          markAuthFailure(selected, `auth failure: ${upstream.status}`);
+          await store.upsertAccount(selected);
+          continue;
+        }
+        if (modelUnsupported) {
+          const failedModel =
+            candidate.resolvedModel ?? requestModel ?? "unknown-model";
+          lastModelUnsupported = {
+            status: upstream.status,
+            text,
+            contentType,
+          };
+          markModelUnsupported(
+            selected,
+            `model unsupported for ${failedModel}: ${upstream.status}`,
+          );
+          await store.upsertAccount(selected);
+          continue;
+        }
 
         rememberError(
           selected,
@@ -1266,26 +1707,67 @@ let accounts = store.getCachedAccounts();
         return;
       } catch (err: any) {
         const msg = err?.message ?? String(err);
-        rememberError(selected, msg);
-        await store.upsertAccount(selected);
+        const downstreamClientDisconnected = isDownstreamClientDisconnect(
+          err,
+          clientAbort.signal,
+        );
+        const status = downstreamClientDisconnected ? 499 : 599;
+        if (!downstreamClientDisconnected) {
+          rememberError(selected, msg);
+          await store.upsertAccount(selected);
+        }
         recordTrace({
           at: Date.now(),
           route: req.path,
+          sessionId,
           accountId: selected.id,
           accountEmail: selected.email,
           model: tracedModel,
-          status: 599,
+          status,
           stream: false,
           latencyMs: Date.now() - startedAt,
           error: msg,
           requestBody,
+          isError: downstreamClientDisconnected ? false : undefined,
         });
+        if (downstreamClientDisconnected) return;
+        if (isAbortError(err)) {
+          if (clientRequestedStream) {
+            if (!res.writableEnded) {
+              if (shouldReturnChatCompletions) {
+                res.write("data: [DONE]\n\n");
+              }
+              res.end();
+            }
+            return;
+          }
+          if (res.headersSent) {
+            if (!res.writableEnded) {
+              if (shouldReturnChatCompletions && clientRequestedStream) {
+                res.write("data: [DONE]\n\n");
+              }
+              res.end();
+            }
+            return;
+          }
+          return res.status(504).json({ error: "upstream request timed out" });
+        }
+        if (res.headersSent && !res.writableEnded) {
+          res.end();
+          return;
+        }
       }
     }
     }
     if (!providerTried) {
       return res.status(503).json({ error: "no provider accounts configured for requested model" });
     }
+    if (lastModelUnsupported) {
+      return res
+        .status(lastModelUnsupported.status)
+        .type(lastModelUnsupported.contentType || "application/json")
+        .send(lastModelUnsupported.text);
+    }
     res.status(429).json({ error: "all accounts exhausted or unavailable" });
   }
 
diff --git a/src/runtime.ts b/src/runtime.ts
new file mode 100644
index 0000000..57abc59
--- /dev/null
+++ b/src/runtime.ts
@@ -0,0 +1,312 @@
+import express from "express";
+import http from "node:http";
+import path from "node:path";
+import fs from "node:fs/promises";
+import { fileURLToPath } from "node:url";
+import { AccountStore, OAuthStateStore } from "./store.js";
+import { createTraceManager } from "./traces.js";
+import { createAdminRouter } from "./routes/admin/index.js";
+import { createProxyRouter } from "./routes/proxy/index.js";
+import { createOAuthCallbackServer } from "./oauth-callback-server.js";
+import { oauthConfig as defaultOAuthConfig } from "./oauth-config.js";
+import type { OAuthConfig } from "./oauth.js";
+import {
+  ADMIN_TOKEN,
+  CHATGPT_BASE_URL,
+  HOST,
+  MISTRAL_BASE_URL,
+  MISTRAL_COMPACT_UPSTREAM_PATH,
+  MISTRAL_UPSTREAM_PATH,
+  OAUTH_CALLBACK_BIND_HOST,
+  OAUTH_STATE_PATH,
+  PORT,
+  SERVER_HEADERS_TIMEOUT_MS,
+  SERVER_KEEP_ALIVE_TIMEOUT_MS,
+  SERVER_REQUEST_TIMEOUT_MS,
+  SHUTDOWN_GRACE_MS,
+  STORE_ENCRYPTION_KEY,
+  STORE_PATH,
+  TRACE_FILE_PATH,
+  TRACE_STATS_HISTORY_PATH,
+  UPSTREAM_PATH,
+} from "./config.js";
+
+type RuntimeOptions = {
+  host?: string;
+  port?: number;
+  adminToken?: string;
+  storePath?: string;
+  oauthStatePath?: string;
+  traceFilePath?: string;
+  traceStatsHistoryPath?: string;
+  openaiBaseUrl?: string;
+  mistralBaseUrl?: string;
+  mistralUpstreamPath?: string;
+  mistralCompactUpstreamPath?: string;
+  oauthConfig?: OAuthConfig;
+  oauthCallbackBindHost?: string;
+  installSignalHandlers?: boolean;
+  encryptionKey?: string;
+  upstreamRequestTimeoutMs?: number;
+};
+
+function isLoopbackHost(host: string): boolean {
+  return (
+    host === "127.0.0.1" ||
+    host === "::1" ||
+    host === "localhost"
+  );
+}
+
+export async function createRuntime(options: RuntimeOptions = {}) {
+  const host = options.host ?? HOST;
+  const port = options.port ?? PORT;
+  const adminToken = options.adminToken ?? ADMIN_TOKEN;
+  const storePath = options.storePath ?? STORE_PATH;
+  const oauthStatePath = options.oauthStatePath ?? OAUTH_STATE_PATH;
+  const traceFilePath = options.traceFilePath ?? TRACE_FILE_PATH;
+  const traceStatsHistoryPath =
+    options.traceStatsHistoryPath ?? TRACE_STATS_HISTORY_PATH;
+  const openaiBaseUrl = options.openaiBaseUrl ?? CHATGPT_BASE_URL;
+  const mistralBaseUrl = options.mistralBaseUrl ?? MISTRAL_BASE_URL;
+  const mistralUpstreamPath =
+    options.mistralUpstreamPath ?? MISTRAL_UPSTREAM_PATH;
+  const mistralCompactUpstreamPath =
+    options.mistralCompactUpstreamPath ?? MISTRAL_COMPACT_UPSTREAM_PATH;
+  const oauthConfig = options.oauthConfig ?? defaultOAuthConfig;
+  const oauthCallbackBindHost =
+    options.oauthCallbackBindHost ?? OAUTH_CALLBACK_BIND_HOST;
+  const encryptionKey = options.encryptionKey ?? STORE_ENCRYPTION_KEY;
+  const upstreamRequestTimeoutMs = options.upstreamRequestTimeoutMs;
+
+  if (!isLoopbackHost(host) && !adminToken) {
+    throw new Error("ADMIN_TOKEN is required when binding off loopback");
+  }
+
+  const app = express();
+  app.disable("x-powered-by");
+  app.use(express.json({ limit: "20mb" }));
+  const oauthCallbackServer = createOAuthCallbackServer(oauthConfig.redirectUri);
+
+  const store = new AccountStore(storePath, encryptionKey || undefined);
+  const oauthStore = new OAuthStateStore(
+    oauthStatePath,
+    encryptionKey || undefined,
+  );
+  await store.init();
+  await oauthStore.init();
+  await fs.mkdir(path.dirname(traceFilePath), { recursive: true });
+
+  const traceManager = createTraceManager({
+    filePath: traceFilePath,
+    historyFilePath: traceStatsHistoryPath,
+  });
+
+  let ready = false;
+  let shuttingDown = false;
+
+  function adminGuard(
+    req: express.Request,
+    res: express.Response,
+    next: express.NextFunction,
+  ) {
+    if (!adminToken) return next();
+    const token =
+      req.header("x-admin-token") ||
+      req.header("authorization")?.replace(/^Bearer\s+/i, "");
+    if (token !== adminToken)
+      return res.status(401).json({ error: "unauthorized" });
+    next();
+  }
+
+  app.get("/health", (_req, res) =>
+    res.json({
+      ok: true,
+      ready,
+      shuttingDown,
+      version: process.env.APP_VERSION ?? "unknown",
+      gitSha: process.env.APP_GIT_SHA ?? "unknown",
+      buildId: process.env.APP_BUILD_ID ?? "unknown",
+    }),
+  );
+
+  app.get("/ready", (_req, res) => {
+    if (!ready || shuttingDown) {
+      return res.status(503).json({ ok: false, ready, shuttingDown });
+    }
+    return res.json({ ok: true, ready: true });
+  });
+
+  const adminRouter = createAdminRouter({
+    store,
+    oauthStore,
+    traceManager,
+    oauthConfig,
+    openaiBaseUrl,
+    mistralBaseUrl,
+    storagePaths: {
+      accountsPath: storePath,
+      oauthStatePath,
+      tracePath: traceFilePath,
+      traceStatsHistoryPath,
+    },
+  });
+
+  const proxyRouter = createProxyRouter({
+    store,
+    traceManager,
+    openaiBaseUrl,
+    mistralBaseUrl,
+    mistralUpstreamPath,
+    mistralCompactUpstreamPath,
+    oauthConfig,
+    upstreamRequestTimeoutMs,
+  });
+
+  app.use("/admin", adminGuard, adminRouter);
+  app.use("/v1", proxyRouter);
+
+  const __dirname = path.dirname(fileURLToPath(import.meta.url));
+  const webDist = path.resolve(__dirname, "../web-dist");
+  app.use(express.static(webDist));
+  app.get("*", (req, res, next) => {
+    if (
+      req.path.startsWith("/admin/") ||
+      req.path.startsWith("/v1/") ||
+      req.path === "/health" ||
+      req.path === "/ready"
+    ) {
+      return next();
+    }
+    res.sendFile(path.join(webDist, "index.html"), (err) => {
+      if (err) next(err);
+    });
+  });
+
+  app.use(
+    (
+      err: unknown,
+      _req: express.Request,
+      res: express.Response,
+      _next: express.NextFunction,
+    ) => {
+      console.error(err);
+      if (res.headersSent) return;
+      res.status(500).json({ error: "internal server error" });
+    },
+  );
+
+  const server = http.createServer(app);
+  server.headersTimeout = SERVER_HEADERS_TIMEOUT_MS;
+  server.keepAliveTimeout = SERVER_KEEP_ALIVE_TIMEOUT_MS;
+  server.requestTimeout = SERVER_REQUEST_TIMEOUT_MS;
+
+  async function start() {
+    try {
+      await new Promise<void>((resolve, reject) => {
+        server.once("error", reject);
+        server.listen(port, host, () => {
+          server.off("error", reject);
+          resolve();
+        });
+      });
+
+      if (oauthCallbackServer) {
+        const callbackUrl = new URL(oauthConfig.redirectUri);
+        await new Promise<void>((resolve, reject) => {
+          oauthCallbackServer.once("error", reject);
+          oauthCallbackServer.listen(
+            Number(callbackUrl.port),
+            oauthCallbackBindHost || callbackUrl.hostname,
+            () => {
+              oauthCallbackServer.off("error", reject);
+              resolve();
+            },
+          );
+        });
+      }
+
+      ready = true;
+    } catch (err) {
+      server.closeIdleConnections();
+      server.closeAllConnections();
+      await new Promise<void>((resolve) => server.close(() => resolve()));
+      if (oauthCallbackServer) {
+        oauthCallbackServer.closeAllConnections?.();
+        await new Promise<void>((resolve) => oauthCallbackServer.close(() => resolve()));
+      }
+      throw err;
+    }
+  }
+
+  async function shutdown() {
+    if (shuttingDown) return;
+    shuttingDown = true;
+    ready = false;
+    await new Promise<void>((resolve) => {
+      const force = setTimeout(() => {
+        server.closeAllConnections();
+        resolve();
+      }, SHUTDOWN_GRACE_MS);
+      server.close(() => {
+        clearTimeout(force);
+        resolve();
+      });
+      server.closeIdleConnections();
+    });
+    if (oauthCallbackServer?.listening) {
+      await new Promise<void>((resolve) => {
+        const force = setTimeout(() => {
+          oauthCallbackServer.closeAllConnections?.();
+          resolve();
+        }, SHUTDOWN_GRACE_MS);
+        oauthCallbackServer.close(() => {
+          clearTimeout(force);
+          resolve();
+        });
+      });
+    }
+    await store.flushIfDirty();
+    await traceManager.compactTraceStorageIfNeeded();
+  }
+
+  if (options.installSignalHandlers ?? true) {
+    const handleSignal = () => {
+      shutdown()
+        .catch((err) => {
+          console.error(err);
+        })
+        .finally(() => {
+          process.exit(0);
+        });
+    };
+    process.once("SIGTERM", handleSignal);
+    process.once("SIGINT", handleSignal);
+  }
+
+  return {
+    app,
+    server,
+    store,
+    oauthStore,
+    traceManager,
+    oauthCallbackServer,
+    start,
+    shutdown,
+    state: () => ({ ready, shuttingDown }),
+    config: {
+      host,
+      port,
+      storePath,
+      oauthStatePath,
+      traceFilePath,
+      traceStatsHistoryPath,
+      openaiBaseUrl,
+      mistralBaseUrl,
+      mistralUpstreamPath,
+      mistralCompactUpstreamPath,
+      oauthConfig,
+      oauthCallbackBindHost,
+    },
+  };
+}
diff --git a/src/server.ts b/src/server.ts
index f5b504d..1c23497 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -1,130 +1,17 @@
-import express from "express";
-import path from "node:path";
-import fs from "node:fs/promises";
-import { fileURLToPath } from "node:url";
-import { AccountStore, OAuthStateStore, cleanupOrphanedTmpFiles } from "./store.js";
-import { createTraceManager } from "./traces.js";
-import { createAdminRouter } from "./routes/admin/index.js";
-import { createProxyRouter } from "./routes/proxy/index.js";
-import { installResponsesWebsocketProxy } from "./websocket-responses.js";
-import { oauthConfig } from "./oauth-config.js";
-import {
-  ADMIN_TOKEN,
-  CHATGPT_BASE_URL,
-  MISTRAL_BASE_URL,
-  MISTRAL_UPSTREAM_PATH,
-  MISTRAL_COMPACT_UPSTREAM_PATH,
-  ZAI_BASE_URL,
-  ZAI_UPSTREAM_PATH,
-  ZAI_COMPACT_UPSTREAM_PATH,
-  STORE_PATH,
-  TRACE_FILE_PATH,
-  TRACE_STATS_HISTORY_PATH,
-  UPSTREAM_PATH,
-  OAUTH_STATE_PATH,
-  PORT,
-} from "./config.js";
-import { createBodyParserMiddleware } from "./middleware/decompression.js";
-import http from "node:http";
+import { createRuntime } from "./runtime.js";
 
-const app = express();
-app.use(createBodyParserMiddleware());
-
-const dataDir = path.dirname(STORE_PATH);
-await cleanupOrphanedTmpFiles(dataDir);
-
-const store = new AccountStore(STORE_PATH);
-const oauthStore = new OAuthStateStore(OAUTH_STATE_PATH);
-await store.init();
-await oauthStore.init();
-await fs.mkdir(path.dirname(TRACE_FILE_PATH), { recursive: true });
-
-const traceManager = createTraceManager({
-  filePath: TRACE_FILE_PATH,
-  historyFilePath: TRACE_STATS_HISTORY_PATH,
-});
-
-const adminRouter = createAdminRouter({
-  store,
-  oauthStore,
-  traceManager,
-  oauthConfig,
-  openaiBaseUrl: CHATGPT_BASE_URL,
-  mistralBaseUrl: MISTRAL_BASE_URL,
-  zaiBaseUrl: ZAI_BASE_URL,
-  storagePaths: {
-    accountsPath: STORE_PATH,
-    oauthStatePath: OAUTH_STATE_PATH,
-    tracePath: TRACE_FILE_PATH,
-    traceStatsHistoryPath: TRACE_STATS_HISTORY_PATH,
-  },
-});
-
-const proxyRouter = createProxyRouter({
-  store,
-  traceManager,
-  openaiBaseUrl: CHATGPT_BASE_URL,
-  mistralBaseUrl: MISTRAL_BASE_URL,
-  mistralUpstreamPath: MISTRAL_UPSTREAM_PATH,
-  mistralCompactUpstreamPath: MISTRAL_COMPACT_UPSTREAM_PATH,
-  zaiBaseUrl: ZAI_BASE_URL,
-  zaiUpstreamPath: ZAI_UPSTREAM_PATH,
-  zaiCompactUpstreamPath: ZAI_COMPACT_UPSTREAM_PATH,
-  oauthConfig,
-});
-
-function adminGuard(
-  req: express.Request,
-  res: express.Response,
-  next: express.NextFunction,
-) {
-  if (!ADMIN_TOKEN) return next();
-  const token =
-    req.header("x-admin-token") ||
-    req.header("authorization")?.replace(/^Bearer\s+/i, "");
-  if (token !== ADMIN_TOKEN)
-    return res.status(401).json({ error: "unauthorized" });
-  next();
-}
-
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-const webDist = path.resolve(__dirname, "../web-dist");
-
-app.get("/health", (_req, res) =>
-  res.json({
-    ok: true,
-    version: process.env.APP_VERSION ?? "unknown",
-    gitSha: process.env.APP_GIT_SHA ?? "unknown",
-    buildId: process.env.APP_BUILD_ID ?? "unknown",
-  }),
-);
-
-app.use("/admin", adminGuard, adminRouter);
-app.use("/v1", proxyRouter);
-
-app.use(express.static(webDist));
-app.get("*", (req, res, next) => {
-  if (
-    req.path.startsWith("/admin/") ||
-    req.path.startsWith("/v1/") ||
-    req.path === "/health"
-  )
-    return next();
-  res.sendFile(path.join(webDist, "index.html"), (err) => {
-    if (err) next();
-  });
-});
-
-const server = http.createServer(app);
-
-installResponsesWebsocketProxy({
-  server,
-  port: PORT,
-});
-
-server.listen(PORT, () => {
-  console.log(`multivibe listening on :${PORT}`);
+async function main() {
+  const runtime = await createRuntime({ installSignalHandlers: true });
+  await runtime.start();
+  console.log(
+    `multivibe listening on ${runtime.config.host}:${runtime.config.port}`,
+  );
   console.log(
-    `store=${STORE_PATH} oauth=${OAUTH_STATE_PATH} trace=${TRACE_FILE_PATH} traceStats=${TRACE_STATS_HISTORY_PATH} redirect=${oauthConfig.redirectUri} openaiUpstream=${CHATGPT_BASE_URL}${UPSTREAM_PATH} mistralUpstream=${MISTRAL_BASE_URL}${MISTRAL_UPSTREAM_PATH} zaiUpstream=${ZAI_BASE_URL}${ZAI_UPSTREAM_PATH}`,
+    `store=${runtime.config.storePath} oauth=${runtime.config.oauthStatePath} trace=${runtime.config.traceFilePath} traceStats=${runtime.config.traceStatsHistoryPath} redirect=${runtime.config.oauthConfig.redirectUri} openaiUpstream=${runtime.config.openaiBaseUrl} mistralUpstream=${runtime.config.mistralBaseUrl}${runtime.config.mistralUpstreamPath}`,
   );
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
 });
diff --git a/src/store.ts b/src/store.ts
index 5febc24..1c84225 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -9,22 +9,34 @@ import type {
   StoreFile,
 } from "./types.js";
 import { ACCOUNT_FLUSH_INTERVAL_MS } from "./config.js";
+import { decryptJson, encryptJson, looksEncryptedJson } from "./crypto.js";
 
 const DEFAULT_FILE: StoreFile = { accounts: [], modelAliases: [] };
 const DEFAULT_OAUTH_FILE: OAuthStateFile = { states: [] };
 
-async function ensureFile(filePath: string, seed: object) {
+async function ensureFile(
+  filePath: string,
+  seed: object,
+  encryptionKey?: string,
+) {
   await fs.mkdir(path.dirname(filePath), { recursive: true });
   try {
     await fs.access(filePath);
   } catch {
-    await writeJsonAtomic(filePath, seed);
+    await writeJsonAtomic(filePath, seed, encryptionKey);
   }
 }
 
-async function writeJsonAtomic(filePath: string, data: unknown): Promise<void> {
+async function writeJsonAtomic(
+  filePath: string,
+  data: unknown,
+  encryptionKey?: string,
+): Promise<void> {
   const tmp = `${filePath}.tmp-${randomUUID()}`;
-  await fs.writeFile(tmp, JSON.stringify(data, null, 2));
+  const payload = encryptionKey
+    ? encryptJson(data, encryptionKey)
+    : JSON.stringify(data, null, 2);
+  await fs.writeFile(tmp, payload, { mode: 0o600 });
   await fs.rename(tmp, filePath);
 }
 
@@ -38,27 +50,58 @@ export async function cleanupOrphanedTmpFiles(dataDir: string): Promise<void> {
   );
 }
 
+async function readJsonFile<T>(
+  filePath: string,
+  encryptionKey?: string,
+): Promise<T> {
+  const raw = await fs.readFile(filePath, "utf8");
+  if (looksEncryptedJson(raw)) {
+    if (!encryptionKey) {
+      throw new Error(`encrypted file requires STORE_ENCRYPTION_KEY: ${filePath}`);
+    }
+    return decryptJson<T>(raw, encryptionKey);
+  }
+  return JSON.parse(raw) as T;
+}
+
 export class AccountStore {
   private inMemoryAccounts: Account[] = [];
   private inMemoryModelAliases: ModelAlias[] = [];
   private dirty = false;
   private flushTimer: NodeJS.Timeout | null = null;
+  private lastLoadedMtimeMs = 0;
 
-  constructor(private filePath: string) {}
+  constructor(
+    private filePath: string,
+    private encryptionKey?: string,
+  ) {}
 
   async init() {
-    await ensureFile(this.filePath, DEFAULT_FILE);
+    await ensureFile(this.filePath, DEFAULT_FILE, this.encryptionKey);
     await this.reloadFromDisk();
   }
 
   private async reloadFromDisk() {
-    const raw = await fs.readFile(this.filePath, "utf8");
-    const data = JSON.parse(raw) as StoreFile;
+    const data = await readJsonFile<StoreFile>(this.filePath, this.encryptionKey);
     this.inMemoryAccounts = Array.isArray(data.accounts) ? data.accounts : [];
     this.inMemoryModelAliases = Array.isArray(data.modelAliases)
       ? data.modelAliases
       : [];
     this.dirty = false;
+    const stat = await fs.stat(this.filePath);
+    this.lastLoadedMtimeMs = stat.mtimeMs;
+  }
+
+  private async reloadFromDiskIfChanged() {
+    if (this.dirty) return;
+    try {
+      const stat = await fs.stat(this.filePath);
+      if (stat.mtimeMs > this.lastLoadedMtimeMs) {
+        await this.reloadFromDisk();
+      }
+    } catch {
+      // best-effort external reload
+    }
   }
 
   private scheduleFlush() {
@@ -74,8 +117,12 @@ export class AccountStore {
     await writeJsonAtomic(this.filePath, {
       accounts: this.inMemoryAccounts,
       modelAliases: this.inMemoryModelAliases,
-    });
+    }, this.encryptionKey);
     this.dirty = false;
+    try {
+      const stat = await fs.stat(this.filePath);
+      this.lastLoadedMtimeMs = stat.mtimeMs;
+    } catch {}
     if (this.flushTimer) {
       clearTimeout(this.flushTimer);
       this.flushTimer = null;
@@ -136,6 +183,7 @@ export class AccountStore {
   }
 
   async listAccounts(): Promise<Account[]> {
+    await this.reloadFromDiskIfChanged();
     return this.getCachedAccounts();
   }
 
@@ -151,6 +199,7 @@ export class AccountStore {
   }
 
   async listModelAliases(): Promise<ModelAlias[]> {
+    await this.reloadFromDiskIfChanged();
     return this.getCachedModelAliases();
   }
 
@@ -191,19 +240,21 @@ export class AccountStore {
 }
 
 export class OAuthStateStore {
-  constructor(private filePath: string) {}
+  constructor(
+    private filePath: string,
+    private encryptionKey?: string,
+  ) {}
 
   async init() {
-    await ensureFile(this.filePath, DEFAULT_OAUTH_FILE);
+    await ensureFile(this.filePath, DEFAULT_OAUTH_FILE, this.encryptionKey);
   }
 
   private async read(): Promise<OAuthStateFile> {
-    const raw = await fs.readFile(this.filePath, "utf8");
-    return JSON.parse(raw) as OAuthStateFile;
+    return readJsonFile<OAuthStateFile>(this.filePath, this.encryptionKey);
   }
 
   private async write(data: OAuthStateFile): Promise<void> {
-    await writeJsonAtomic(this.filePath, data);
+    await writeJsonAtomic(this.filePath, data, this.encryptionKey);
   }
 
   async create(state: OAuthFlowState) {
diff --git a/src/traces.ts b/src/traces.ts
index e45cba3..051bac7 100644
--- a/src/traces.ts
+++ b/src/traces.ts
@@ -2,11 +2,13 @@ import { estimateCostUsd } from "./model-pricing.js";
 import fs from "node:fs/promises";
 import { randomUUID } from "node:crypto";
 import path from "node:path";
+import { TRACE_COMPACTION_INTERVAL } from "./config.js";
 
 export type TraceEntry = {
   id: string;
   at: number;
   route: string;
+  sessionId?: string;
   accountId?: string;
   accountEmail?: string;
   model?: string;
@@ -87,6 +89,7 @@ export type UsageAggregate = {
   promptTokens: number;
   completionTokens: number;
   totalTokens: number;
+  costUsd: number;
   statusCounts: Record<string, number>;
   firstAt?: number;
   lastAt?: number;
@@ -189,6 +192,10 @@ function normalizeTrace(raw: any): TraceEntry | null {
         : `${at}-${route}-${status}`,
     at,
     route,
+    sessionId:
+      typeof raw.sessionId === "string" && raw.sessionId.trim()
+        ? raw.sessionId.trim()
+        : undefined,
     accountId: typeof raw.accountId === "string" ? raw.accountId : undefined,
     accountEmail:
       typeof raw.accountEmail === "string" ? raw.accountEmail : undefined,
@@ -258,6 +265,7 @@ function createUsageAggregate(): UsageAggregate {
     promptTokens: 0,
     completionTokens: 0,
     totalTokens: 0,
+    costUsd: 0,
     statusCounts: {},
   };
 }
@@ -266,6 +274,14 @@ function addTraceToAggregate(agg: UsageAggregate, trace: TraceEntry) {
   const status = Number(trace.status);
   const statusKey = Number.isFinite(status) ? String(status) : "unknown";
   const tokens = usageToTokens(trace.usage);
+  const costUsd =
+    typeof trace.costUsd === "number"
+      ? trace.costUsd
+      : estimateCostUsd(
+          trace.model,
+          trace.tokensInput ?? 0,
+          trace.tokensOutput ?? 0,
+        ) ?? 0;
 
   agg.requests += 1;
   if (status >= 200 && status < 400) agg.ok += 1;
@@ -281,6 +297,7 @@ function addTraceToAggregate(agg: UsageAggregate, trace: TraceEntry) {
     agg.completionTokens += tokens.completionTokens;
     agg.totalTokens += tokens.totalTokens;
   }
+  agg.costUsd += costUsd;
 
   if (typeof trace.at === "number") {
     agg.firstAt =
@@ -320,6 +337,7 @@ function finalizeAggregate(agg: UsageAggregate) {
       completion: agg.completionTokens,
       total: agg.totalTokens,
     },
+    costUsd: Math.round(agg.costUsd * 1_000_000) / 1_000_000,
     statusCounts: agg.statusCounts,
     firstAt: agg.firstAt,
     lastAt: agg.lastAt,
@@ -529,6 +547,8 @@ export function createTraceManager(config: TraceManagerConfig) {
   const statsBuckets = new Map<number, TraceBucketAggregate>();
   let totalStored = 0;
   let cacheInit: Promise<void> | null = null;
+  let appendSinceCompaction = 0;
+  let compactionQueued = false;
 
   async function ensureParentDir(file: string) {
     await fs.mkdir(path.dirname(file), { recursive: true });
@@ -538,12 +558,40 @@ export function createTraceManager(config: TraceManagerConfig) {
     try {
       const raw = await fs.readFile(filePath, "utf8");
       const parsed: TraceEntry[] = [];
-      for (const line of raw.split("\n")) {
-        if (!line.trim()) continue;
-        try {
-          const normalized = normalizeTrace(JSON.parse(line));
-          if (normalized) parsed.push(normalized);
-        } catch {}
+      const fileHandle = await fs.open(filePath, 'r');
+      let position = 0;
+      let buffer = Buffer.alloc(65536); // 64KB buffer
+      let remaining = '';
+
+      try {
+        while (true) {
+          const { bytesRead } = await fileHandle.read(buffer, 0, buffer.length, position);
+          if (bytesRead === 0) break;
+
+          position += bytesRead;
+          const chunk = remaining + buffer.toString('utf8', 0, bytesRead);
+          const lines = chunk.split('\n');
+          remaining = lines.pop() || '';
+
+          for (const line of lines) {
+            if (!line.trim()) continue;
+            try {
+              const normalized = normalizeTrace(JSON.parse(line));
+              if (normalized) parsed.push(normalized);
+            } catch {}
+          }
+        }
+
+        // Process any remaining data
+        if (remaining.trim()) {
+          try {
+            const normalized = normalizeTrace(JSON.parse(remaining));
+            if (normalized) parsed.push(normalized);
+          } catch {}
+        }
+
+      } finally {
+        await fileHandle.close();
       }
       return parsed.slice(-retentionMax);
     } catch {
@@ -627,6 +675,12 @@ export function createTraceManager(config: TraceManagerConfig) {
     await fs.rename(tmp, filePath);
   }
 
+  async function appendTraceLine(entry: TraceEntry): Promise<void> {
+    const json = JSON.stringify(entry);
+    if (json.length > 1024 * 1024) return;
+    await fs.appendFile(filePath, `${json}\n`, "utf8");
+  }
+
   function toStatsHistoryEntry(entry: TraceEntry): TraceEntry {
     const {
       requestBody: _requestBody,
@@ -853,17 +907,33 @@ export function createTraceManager(config: TraceManagerConfig) {
     };
   }
 
+  function queueCompactionIfNeeded() {
+    if (compactionQueued) return;
+    if (traceCache.length <= retentionMax && appendSinceCompaction < TRACE_COMPACTION_INTERVAL) {
+      return;
+    }
+    compactionQueued = true;
+    traceWriteQueue = traceWriteQueue.then(async () => {
+      try {
+        await writeTraceWindow(traceCache.slice(-retentionMax));
+        appendSinceCompaction = 0;
+      } finally {
+        compactionQueued = false;
+      }
+    });
+  }
+
   async function appendTrace(
     entry: Omit<
       TraceEntry,
-      "id" | "isError" | "tokensInput" | "tokensOutput" | "tokensTotal"
-    >,
+      "id" | "tokensInput" | "tokensOutput" | "tokensTotal" | "isError"
+    > & { isError?: boolean },
   ) {
     const normalizedTokens = normalizeTokenFields(entry.usage);
     const finalEntry: TraceEntry = {
       ...entry,
       id: randomUUID(),
-      isError: entry.status >= 400,
+      isError: entry.isError ?? entry.status >= 400,
       tokensInput: normalizedTokens.tokensInput,
       tokensOutput: normalizedTokens.tokensOutput,
       tokensTotal: normalizedTokens.tokensTotal,
@@ -881,8 +951,9 @@ export function createTraceManager(config: TraceManagerConfig) {
       if (traceCache.length > retentionMax) {
         traceCache.splice(0, traceCache.length - retentionMax);
       }
-      await ensureParentDir(filePath);
-      await fs.appendFile(filePath, line, "utf8");
+      appendSinceCompaction += 1;
+      await appendTraceLine(finalEntry);
+      queueCompactionIfNeeded();
     });
     traceWriteQueue = run.catch(() => undefined);
     await Promise.all([run, appendStatsHistory(finalEntry)]);
diff --git a/src/types.ts b/src/types.ts
index ccd1096..1d3e3a6 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -24,6 +24,16 @@ export type AccountState = {
   recentErrors?: AccountError[];
   needsTokenRefresh?: boolean;
   lastUsageRefreshAt?: number;
+  refreshBlockedUntil?: number;
+  refreshFailureCount?: number;
+  modelAvailability?: Record<
+    string,
+    {
+      supported: boolean;
+      checkedAt: number;
+      reason?: string;
+    }
+  >;
 };
 
 export type Account = {
diff --git a/test/admin-validation.test.js b/test/admin-validation.test.js
new file mode 100644
index 0000000..f89f95f
--- /dev/null
+++ b/test/admin-validation.test.js
@@ -0,0 +1,38 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import path from "node:path";
+import { createTempDir, startRuntime, writeJson } from "./helpers.js";
+
+test("admin account endpoints reject unknown fields", async () => {
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), { accounts: [], modelAliases: [] });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+  });
+
+  try {
+    const res = await fetch(`${runtime.baseUrl}/admin/accounts`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+        "x-admin-token": "test-admin",
+      },
+      body: JSON.stringify({
+        id: "x",
+        accessToken: "token",
+        enabled: true,
+        hackedField: true,
+      }),
+    });
+
+    assert.equal(res.status, 400);
+    const body = await res.json();
+    assert.match(body.error, /unknown fields/i);
+  } finally {
+    await runtime.close();
+  }
+});
diff --git a/test/helpers.js b/test/helpers.js
new file mode 100644
index 0000000..b331fcb
--- /dev/null
+++ b/test/helpers.js
@@ -0,0 +1,79 @@
+import { mkdtemp, mkdir, writeFile } from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import http from "node:http";
+
+export async function createTempDir(prefix = "multivibe-test-") {
+  return mkdtemp(path.join(os.tmpdir(), prefix));
+}
+
+export async function writeJson(filePath, value) {
+  await mkdir(path.dirname(filePath), { recursive: true });
+  await writeFile(filePath, JSON.stringify(value, null, 2));
+}
+
+export async function startHttpServer(handler) {
+  const server = http.createServer(handler);
+  await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve));
+  const address = server.address();
+  const port = typeof address === "object" && address ? address.port : 0;
+  return {
+    server,
+    url: `http://127.0.0.1:${port}`,
+    close: () =>
+      new Promise((resolve, reject) => {
+        const timer = setTimeout(() => {
+          server.closeAllConnections();
+          resolve();
+        }, 250);
+        server.close((err) => {
+          clearTimeout(timer);
+          if (err) reject(err);
+          else resolve();
+        });
+      }),
+  };
+}
+
+export async function getAvailablePort() {
+  const lease = await startHttpServer((_req, res) => {
+    res.statusCode = 204;
+    res.end();
+  });
+  const { port } = new URL(lease.url);
+  await lease.close();
+  return Number(port);
+}
+
+export async function startRuntime(options = {}) {
+  const { createRuntime } = await import("../dist/runtime.js");
+  const callbackPort = await getAvailablePort();
+  const runtime = await createRuntime({
+    host: "127.0.0.1",
+    port: 0,
+    adminToken: "test-admin",
+    installSignalHandlers: false,
+    oauthConfig:
+      options.oauthConfig ??
+      {
+        authorizationUrl: "https://auth.openai.com/oauth/authorize",
+        tokenUrl: "https://auth.openai.com/oauth/token",
+        clientId: "test-client",
+        scope: "openid profile email offline_access",
+        redirectUri: `http://127.0.0.1:${callbackPort}/auth/callback`,
+      },
+    ...options,
+  });
+  await runtime.start();
+  const address = runtime.server.address();
+  const port = typeof address === "object" && address ? address.port : 0;
+  return {
+    runtime,
+    baseUrl: `http://127.0.0.1:${port}`,
+    close: async () => {
+      runtime.server.closeIdleConnections();
+      runtime.server.closeAllConnections();
+      await runtime.shutdown();
+    },
+  };
+}
diff --git a/test/manual-stress.mjs b/test/manual-stress.mjs
new file mode 100644
index 0000000..be663bf
--- /dev/null
+++ b/test/manual-stress.mjs
@@ -0,0 +1,339 @@
+import assert from "node:assert/strict";
+import http from "node:http";
+import os from "node:os";
+import path from "node:path";
+import { mkdtemp, writeFile } from "node:fs/promises";
+import { createRuntime } from "../dist/runtime.js";
+
+async function startHttpServer(handler) {
+  const server = http.createServer(handler);
+  await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve));
+  const address = server.address();
+  const port = typeof address === "object" && address ? address.port : 0;
+  return {
+    server,
+    url: `http://127.0.0.1:${port}`,
+    close: () => new Promise((resolve) => server.close(() => resolve())),
+  };
+}
+
+async function writeJson(filePath, value) {
+  await writeFile(filePath, JSON.stringify(value, null, 2));
+}
+
+async function createBaseFiles() {
+  const tmp = await mkdtemp(path.join(os.tmpdir(), "multivibe-stress-"));
+  const storePath = path.join(tmp, "accounts.json");
+  const oauthStatePath = path.join(tmp, "oauth-state.json");
+  const traceFilePath = path.join(tmp, "traces.jsonl");
+  const traceStatsHistoryPath = path.join(tmp, "traces-history.jsonl");
+  await writeJson(storePath, {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(oauthStatePath, { states: [] });
+  return { storePath, oauthStatePath, traceFilePath, traceStatsHistoryPath };
+}
+
+function oauthConfig(port) {
+  return {
+    authorizationUrl: "https://auth.openai.com/oauth/authorize",
+    tokenUrl: "https://auth.openai.com/oauth/token",
+    clientId: "test-client",
+    scope: "openid profile email offline_access",
+    redirectUri: `http://127.0.0.1:${port}/auth/callback`,
+  };
+}
+
+async function startRuntimeFor(upstreamUrl, files, upstreamRequestTimeoutMs, redirectPort) {
+  const runtime = await createRuntime({
+    host: "127.0.0.1",
+    port: 0,
+    adminToken: "test-admin",
+    installSignalHandlers: false,
+    storePath: files.storePath,
+    oauthStatePath: files.oauthStatePath,
+    traceFilePath: files.traceFilePath,
+    traceStatsHistoryPath: files.traceStatsHistoryPath,
+    openaiBaseUrl: upstreamUrl,
+    upstreamRequestTimeoutMs,
+    oauthConfig: oauthConfig(redirectPort),
+  });
+  await runtime.start();
+  const address = runtime.server.address();
+  return { runtime, baseUrl: `http://127.0.0.1:${address.port}` };
+}
+
+async function runPool(items, limit, worker) {
+  let index = 0;
+  const runners = Array.from({ length: limit }, async () => {
+    while (true) {
+      const current = index < items.length ? items[index++] : undefined;
+      if (typeof current === "undefined") return;
+      await worker(current);
+    }
+  });
+  await Promise.all(runners);
+}
+
+function responseCompletedFrame(text) {
+  return (
+    "event: response.completed\n" +
+    "data: " +
+    JSON.stringify({
+      type: "response.completed",
+      response: {
+        object: "response",
+        status: "completed",
+        output: [
+          {
+            type: "message",
+            role: "assistant",
+            content: [{ type: "output_text", text }],
+          },
+        ],
+        usage: {
+          input_tokens: 10,
+          output_tokens: text.length,
+          total_tokens: 10 + text.length,
+        },
+      },
+    }) +
+    "\n\n"
+  );
+}
+
+const files = await createBaseFiles();
+
+let requestCounter = 0;
+const successUpstream = await startHttpServer(async (req, res) => {
+  if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+    res.writeHead(200, { "content-type": "application/json" });
+    res.end(
+      JSON.stringify({
+        rate_limit: {
+          primary_window: { used_percent: 0 },
+          secondary_window: { used_percent: 0 },
+        },
+      }),
+    );
+    return;
+  }
+  if (
+    req.method === "GET" &&
+    req.url &&
+    req.url.startsWith("/backend-api/codex/models")
+  ) {
+    res.writeHead(200, { "content-type": "application/json" });
+    res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+    return;
+  }
+  if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+    requestCounter += 1;
+    const mode = requestCounter % 4;
+
+    if (mode === 0) {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+      );
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":" world"}\n\n',
+      );
+      res.write(responseCompletedFrame("hello world"));
+      setTimeout(() => {
+        if (!res.writableEnded) res.end(": linger\n\n");
+      }, 120);
+      return;
+    }
+
+    if (mode === 1) {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+      );
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":" world"}\n\n',
+      );
+      res.write(
+        'event: response.output_text.done\ndata: {"type":"response.output_text.done","text":"hello world"}\n\n',
+      );
+      setTimeout(() => {
+        if (!res.writableEnded) res.end(": linger\n\n");
+      }, 120);
+      return;
+    }
+
+    if (mode === 2) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          object: "response",
+          status: "completed",
+          output: [
+            {
+              type: "message",
+              role: "assistant",
+              content: [{ type: "output_text", text: "json path" }],
+            },
+          ],
+          usage: { input_tokens: 10, output_tokens: 8, total_tokens: 18 },
+        }),
+      );
+      return;
+    }
+
+    res.writeHead(200, { "content-type": "text/event-stream" });
+    res.flushHeaders();
+    let i = 0;
+    const timer = setInterval(() => {
+      i += 1;
+      if (i <= 4) {
+        res.write(
+          "event: response.output_text.delta\ndata: " +
+            JSON.stringify({
+              type: "response.output_text.delta",
+              delta: String(i),
+            }) +
+            "\n\n",
+        );
+        return;
+      }
+      clearInterval(timer);
+      res.write(responseCompletedFrame("1234"));
+      setTimeout(() => {
+        if (!res.writableEnded) res.end(": linger\n\n");
+      }, 120);
+    }, 4);
+    return;
+  }
+  res.writeHead(404).end();
+});
+
+const successRuntime = await startRuntimeFor(successUpstream.url, files, 70, 20001);
+const successStats = { total: 0, stream: 0, buffered: 0 };
+
+await runPool(Array.from({ length: 120 }, (_, i) => i), 12, async (i) => {
+  const wantStream = i % 2 === 0;
+  const startedAt = Date.now();
+  const res = await fetch(`${successRuntime.baseUrl}/v1/responses`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      model: "gpt-5.4",
+      stream: wantStream,
+      input: `hello-${i}`,
+    }),
+  });
+  assert.equal(res.status, 200);
+  const elapsedMs = Date.now() - startedAt;
+  assert.ok(elapsedMs < 220, `request ${i} took too long: ${elapsedMs}ms`);
+  if (wantStream) {
+    const body = await res.text();
+    assert.ok(
+      body.includes("response.completed") ||
+        body.includes("response.output_text.done"),
+    );
+    successStats.stream += 1;
+  } else {
+    const body = await res.json();
+    const text = body?.output?.[0]?.content?.[0]?.text;
+    assert.ok(
+      text === "hello world" || text === "1234" || text === "json path",
+      `unexpected buffered text: ${text}`,
+    );
+    successStats.buffered += 1;
+  }
+  successStats.total += 1;
+});
+
+await successRuntime.runtime.shutdown();
+await successUpstream.close();
+
+const stallUpstream = await startHttpServer(async (req, res) => {
+  if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+    res.writeHead(200, { "content-type": "application/json" });
+    res.end(
+      JSON.stringify({
+        rate_limit: {
+          primary_window: { used_percent: 0 },
+          secondary_window: { used_percent: 0 },
+        },
+      }),
+    );
+    return;
+  }
+  if (
+    req.method === "GET" &&
+    req.url &&
+    req.url.startsWith("/backend-api/codex/models")
+  ) {
+    res.writeHead(200, { "content-type": "application/json" });
+    res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+    return;
+  }
+  if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+    res.writeHead(200, { "content-type": "text/event-stream" });
+    res.flushHeaders();
+    res.write(
+      'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+    );
+    return;
+  }
+  res.writeHead(404).end();
+});
+
+const stallRuntime = await startRuntimeFor(stallUpstream.url, files, 60, 20002);
+const timeoutStats = { buffered504: 0, streamingClosed: 0 };
+
+for (let i = 0; i < 10; i++) {
+  const res = await fetch(`${stallRuntime.baseUrl}/v1/responses`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      model: "gpt-5.4",
+      stream: false,
+      input: `stall-buffered-${i}`,
+    }),
+  });
+  assert.equal(res.status, 504);
+  timeoutStats.buffered504 += 1;
+}
+
+for (let i = 0; i < 10; i++) {
+  const startedAt = Date.now();
+  const res = await fetch(`${stallRuntime.baseUrl}/v1/responses`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify({
+      model: "gpt-5.4",
+      stream: true,
+      input: `stall-stream-${i}`,
+    }),
+  });
+  assert.equal(res.status, 200);
+  const body = await res.text();
+  const elapsedMs = Date.now() - startedAt;
+  assert.ok(
+    elapsedMs < 180,
+    `streaming stall ${i} took too long: ${elapsedMs}ms`,
+  );
+  assert.ok(body.includes("response.output_text.delta"));
+  timeoutStats.streamingClosed += 1;
+}
+
+await stallRuntime.runtime.shutdown();
+await stallUpstream.close();
+
+console.log(JSON.stringify({ successStats, timeoutStats }, null, 2));
diff --git a/test/proxy-behavior.test.js b/test/proxy-behavior.test.js
new file mode 100644
index 0000000..4f2dad0
--- /dev/null
+++ b/test/proxy-behavior.test.js
@@ -0,0 +1,1633 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import path from "node:path";
+import http from "node:http";
+import { readFile } from "node:fs/promises";
+import { createTempDir, startHttpServer, startRuntime, writeJson } from "./helpers.js";
+import { resetDiscoveredModelsCacheForTest } from "../dist/routes/proxy/index.js";
+
+function responseObject(text = "OK") {
+  return {
+    object: "response",
+    status: "completed",
+    output: [
+      {
+        type: "message",
+        role: "assistant",
+        content: [{ type: "output_text", text }],
+      },
+    ],
+    usage: {
+      input_tokens: 10,
+      output_tokens: 5,
+      total_tokens: 15,
+    },
+  };
+}
+
+test("proxy fails over on model incompatibility and records capability state", async () => {
+  const seenAccounts = [];
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      const auth = req.headers.authorization ?? "";
+      seenAccounts.push(auth);
+      if (auth === "Bearer acct-1-token") {
+        res.writeHead(400, { "content-type": "application/json" });
+        res.end(
+          JSON.stringify({
+            detail:
+              "The 'gpt-5.4' model is not supported when using Codex with a ChatGPT account.",
+          }),
+        );
+        return;
+      }
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify(responseObject("OK")));
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  const storePath = path.join(tmp, "accounts.json");
+  const oauthStatePath = path.join(tmp, "oauth-state.json");
+  const traceFilePath = path.join(tmp, "traces.jsonl");
+  const traceStatsHistoryPath = path.join(tmp, "traces-history.jsonl");
+  await writeJson(storePath, {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        priority: 0,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+      {
+        id: "acct-2",
+        provider: "openai",
+        accessToken: "acct-2-token",
+        enabled: true,
+        priority: 0,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(oauthStatePath, { states: [] });
+
+  const runtime = await startRuntime({
+    storePath,
+    oauthStatePath,
+    traceFilePath,
+    traceStatsHistoryPath,
+    openaiBaseUrl: upstream.url,
+  });
+
+  try {
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(res.status, 200);
+    const body = await res.json();
+    assert.equal(body.object, "response");
+    assert.equal(seenAccounts.length, 2);
+    assert.deepEqual(seenAccounts, [
+      "Bearer acct-1-token",
+      "Bearer acct-2-token",
+    ]);
+
+    await runtime.runtime.store.flushIfDirty();
+    const store = JSON.parse(await readFile(storePath, "utf8"));
+    const account1 = store.accounts.find((account) => account.id === "acct-1");
+    assert.equal(account1.state.blockedUntil, undefined);
+    assert.equal(account1.state.blockedReason, undefined);
+    assert.match(account1.state.lastError, /model unsupported/i);
+    assert.equal(
+      account1.state.modelAvailability["gpt-5.4"].supported,
+      false,
+    );
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("unsupported model responses do not globally block accounts and return upstream 400", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(400, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          detail:
+            "The 'None' model is not supported when using Codex with a ChatGPT account.",
+        }),
+      );
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  const storePath = path.join(tmp, "accounts.json");
+  const oauthStatePath = path.join(tmp, "oauth-state.json");
+  await writeJson(storePath, {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+      {
+        id: "acct-2",
+        provider: "openai",
+        accessToken: "acct-2-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(oauthStatePath, { states: [] });
+
+  const runtime = await startRuntime({
+    storePath,
+    oauthStatePath,
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+  });
+
+  try {
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({}),
+    });
+    assert.equal(res.status, 400);
+    const body = await res.json();
+    assert.match(body.detail, /None/);
+
+    await runtime.runtime.store.flushIfDirty();
+    const store = JSON.parse(await readFile(storePath, "utf8"));
+    for (const account of store.accounts) {
+      assert.equal(account.state.blockedUntil, undefined);
+      assert.equal(account.state.blockedReason, undefined);
+      assert.match(account.state.lastError, /model unsupported/i);
+    }
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy request routing does not block on cold model discovery", async () => {
+  resetDiscoveredModelsCacheForTest();
+  let modelCalls = 0;
+  let responseCalls = 0;
+
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      modelCalls += 1;
+      setTimeout(() => {
+        res.writeHead(200, { "content-type": "application/json" });
+        res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      }, 150);
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      responseCalls += 1;
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify(responseObject("OK")));
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+  });
+
+  try {
+    const startedAt = Date.now();
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(res.status, 200);
+    assert.equal(responseCalls, 1);
+    assert.equal(modelCalls, 0);
+    assert.ok(Date.now() - startedAt < 150);
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("stale usage refresh does not block proxy responses", async () => {
+  let usageCalls = 0;
+  let usageCompleted = false;
+  let responseCalls = 0;
+
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      usageCalls += 1;
+      setTimeout(() => {
+        usageCompleted = true;
+        res.writeHead(200, { "content-type": "application/json" });
+        res.end(
+          JSON.stringify({
+            rate_limit: {
+              primary_window: { used_percent: 0 },
+              secondary_window: { used_percent: 0 },
+            },
+          }),
+        );
+      }, 150);
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      responseCalls += 1;
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify(responseObject("OK")));
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: 0, primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+  });
+
+  try {
+    const startedAt = Date.now();
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(res.status, 200);
+    assert.equal(responseCalls, 1);
+    assert.equal(usageCalls, 1);
+    assert.equal(usageCompleted, false);
+    assert.ok(Date.now() - startedAt < 150);
+    await new Promise((resolve) => setTimeout(resolve, 200));
+    assert.equal(usageCompleted, true);
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy does not blindly retry generic upstream 500s for POST responses", async () => {
+  let responseCalls = 0;
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      responseCalls += 1;
+      res.writeHead(500, { "content-type": "application/json" });
+      res.end(JSON.stringify({ error: "boom" }));
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+  });
+
+  try {
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(res.status, 500);
+    assert.equal(responseCalls, 1);
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("successful proxy responses clear stale auth failure state", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify(responseObject("OK")));
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  const storePath = path.join(tmp, "accounts.json");
+  await writeJson(storePath, {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {
+          blockedUntil: Date.now() + 60_000,
+          blockedReason: "auth failure: 401",
+          needsTokenRefresh: true,
+          refreshFailureCount: 3,
+          refreshBlockedUntil: Date.now() + 60_000,
+          lastError: "refresh token failed: token endpoint failed 401",
+          recentErrors: [
+            { at: Date.now(), message: "usage probe failed 401" },
+            { at: Date.now() - 1_000, message: "auth failure: 401" },
+            { at: Date.now() - 2_000, message: "quota/rate-limit: 429" },
+          ],
+        },
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath,
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+  });
+
+  try {
+    await runtime.runtime.store.upsertAccount({
+      ...(await runtime.runtime.store.listAccounts())[0],
+      state: {
+        blockedUntil: undefined,
+        blockedReason: undefined,
+        needsTokenRefresh: true,
+        refreshFailureCount: 3,
+        refreshBlockedUntil: Date.now() + 60_000,
+        lastError: "refresh token failed: token endpoint failed 401",
+        recentErrors: [
+          { at: Date.now(), message: "usage probe failed 401" },
+          { at: Date.now() - 1_000, message: "auth failure: 401" },
+          { at: Date.now() - 2_000, message: "quota/rate-limit: 429" },
+        ],
+      },
+    });
+
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+
+    assert.equal(res.status, 200);
+
+    await runtime.runtime.store.flushIfDirty();
+    const store = JSON.parse(await readFile(storePath, "utf8"));
+    const account = store.accounts.find((entry) => entry.id === "acct-1");
+    assert.equal(account.state.needsTokenRefresh, false);
+    assert.equal(account.state.refreshFailureCount, 0);
+    assert.equal(account.state.refreshBlockedUntil, undefined);
+    assert.equal(account.state.lastError, undefined);
+    assert.equal(account.state.blockedUntil, undefined);
+    assert.equal(account.state.blockedReason, undefined);
+    assert.deepEqual(account.state.recentErrors, [
+      {
+        at: account.state.recentErrors[0].at,
+        message: "quota/rate-limit: 429",
+      },
+    ]);
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy keeps a response alive while upstream chunks continue arriving", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      let sent = 0;
+      const timer = setInterval(() => {
+        sent += 1;
+        if (sent <= 3) {
+          res.write(
+            `event: response.output_text.delta\ndata: ${JSON.stringify({
+              type: "response.output_text.delta",
+              delta: `part-${sent}`,
+            })}\n\n`,
+          );
+          return;
+        }
+        clearInterval(timer);
+        res.end(
+          `event: response.completed\ndata: ${JSON.stringify({
+            type: "response.completed",
+            response: responseObject("slow but valid"),
+          })}\n\n`,
+        );
+      }, 10);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 80,
+  });
+
+  try {
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(res.status, 200);
+    const body = await res.json();
+    assert.equal(body.output[0].content[0].text, "slow but valid");
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy returns 504 immediately on upstream timeout instead of retrying another account", async () => {
+  const seenAccounts = [];
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      seenAccounts.push(req.headers.authorization ?? "");
+      setTimeout(() => {
+        res.writeHead(200, { "content-type": "application/json" });
+        res.end(JSON.stringify(responseObject("too late")));
+      }, 80);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        priority: 0,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+      {
+        id: "acct-2",
+        provider: "openai",
+        accessToken: "acct-2-token",
+        enabled: true,
+        priority: 1,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 25,
+  });
+
+  try {
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(res.status, 504);
+    assert.equal(seenAccounts.length, 1);
+    assert.equal(seenAccounts[0], "Bearer acct-1-token");
+    assert.deepEqual(await res.json(), { error: "upstream request timed out" });
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy returns 504 when an upstream response stalls after headers", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+      );
+      setTimeout(() => {
+        if (!res.writableEnded) res.end();
+      }, 200);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 25,
+  });
+
+  try {
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+
+    assert.equal(res.status, 504);
+    assert.deepEqual(await res.json(), { error: "upstream request timed out" });
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("downstream client disconnects stay in traces without poisoning account errors", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      setTimeout(() => {
+        if (res.writableEnded) return;
+        res.writeHead(200, { "content-type": "application/json" });
+        res.end(JSON.stringify(responseObject("too late")));
+      }, 80);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  const storePath = path.join(tmp, "accounts.json");
+  const oauthStatePath = path.join(tmp, "oauth-state.json");
+  const traceFilePath = path.join(tmp, "traces.jsonl");
+  await writeJson(storePath, {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(oauthStatePath, { states: [] });
+
+  const runtime = await startRuntime({
+    storePath,
+    oauthStatePath,
+    traceFilePath,
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+  });
+
+  try {
+    await new Promise((resolve, reject) => {
+      const req = http.request(
+        `${runtime.baseUrl}/v1/responses`,
+        {
+          method: "POST",
+          headers: { "content-type": "application/json" },
+        },
+        (res) => {
+          res.resume();
+        },
+      );
+      req.on("error", (err) => {
+        if (err.code === "ECONNRESET" || err.message === "socket hang up") {
+          resolve();
+          return;
+        }
+        reject(err);
+      });
+      req.write(
+        JSON.stringify({
+          model: "gpt-5.4",
+          stream: false,
+          input: "reply with ok",
+        }),
+      );
+      req.end();
+      setTimeout(() => req.destroy(), 10);
+    });
+
+    await new Promise((resolve) => setTimeout(resolve, 120));
+    await runtime.runtime.store.flushIfDirty();
+
+    const store = JSON.parse(await readFile(storePath, "utf8"));
+    const account = store.accounts.find((entry) => entry.id === "acct-1");
+    assert.equal(account.state?.lastError, undefined);
+    assert.equal(account.state?.recentErrors, undefined);
+
+    const traces = (await readFile(traceFilePath, "utf8"))
+      .trim()
+      .split("\n")
+      .filter(Boolean)
+      .map((line) => JSON.parse(line));
+    const trace = traces.at(-1);
+    assert.equal(trace.status, 499);
+    assert.equal(trace.isError, false);
+    assert.equal(trace.error, "downstream client disconnected");
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy closes a stalled streamed response without crashing after headers are sent", async () => {
+  let calls = 0;
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      calls += 1;
+      if (calls === 1) {
+        res.writeHead(200, { "content-type": "text/event-stream" });
+        res.flushHeaders();
+        res.write(
+          'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+        );
+        return;
+      }
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify(responseObject("recovered")));
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 25,
+  });
+
+  try {
+    const firstStartedAt = Date.now();
+    const first = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: true,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(first.status, 200);
+    const firstBody = await first.text();
+    const firstElapsed = Date.now() - firstStartedAt;
+    assert.ok(firstElapsed < 180, `expected stall close promptly, got ${firstElapsed}ms`);
+    assert.match(firstBody, /response\.output_text\.delta/);
+
+    const second = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(second.status, 200);
+    const secondBody = await second.json();
+    assert.equal(secondBody.output[0].content[0].text, "recovered");
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy closes streamed responses once response.completed arrives", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+      );
+      res.write(
+        `event: response.completed\ndata: ${JSON.stringify({
+          type: "response.completed",
+          response: responseObject("done"),
+        })}\n\n`,
+      );
+      setTimeout(() => {
+        if (!res.writableEnded) res.end(": upstream lingered\n\n");
+      }, 200);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 25,
+  });
+
+  try {
+    const startedAt = Date.now();
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: true,
+        input: "reply with ok",
+      }),
+    });
+
+    assert.equal(res.status, 200);
+    const body = await res.text();
+    const elapsedMs = Date.now() - startedAt;
+    assert.match(body, /response\.completed/);
+    assert.ok(elapsedMs < 180, `expected proxy to close promptly, got ${elapsedMs}ms`);
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy preserves control frames for native streamed responses", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      res.write(
+        `event: response.created\ndata: ${JSON.stringify({
+          type: "response.created",
+          response: { id: "resp_123", object: "response", status: "in_progress" },
+        })}\n\n`,
+      );
+      res.write(
+        `event: response.in_progress\ndata: ${JSON.stringify({
+          type: "response.in_progress",
+          response: { id: "resp_123", object: "response", status: "in_progress" },
+        })}\n\n`,
+      );
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+      );
+      res.end(
+        `event: response.completed\ndata: ${JSON.stringify({
+          type: "response.completed",
+          response: responseObject("done"),
+        })}\n\n`,
+      );
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 25,
+  });
+
+  try {
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: true,
+        input: "reply with ok",
+      }),
+    });
+
+    assert.equal(res.status, 200);
+    const body = await res.text();
+    assert.match(body, /response\.created/);
+    assert.match(body, /response\.in_progress/);
+    assert.match(body, /response\.output_text\.delta/);
+    assert.match(body, /response\.completed/);
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy detects native streamed responses even when upstream omits content-type", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200);
+      res.flushHeaders();
+      res.write(
+        'event: response.created\ndata: {"type":"response.created","response":{"id":"resp_123","object":"response","status":"in_progress"}}\n\n',
+      );
+      setTimeout(() => {
+        res.end(
+          `event: response.completed\ndata: ${JSON.stringify({
+            type: "response.completed",
+            response: responseObject("done"),
+          })}\n\n`,
+        );
+      }, 400);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 500,
+  });
+
+  try {
+    const startedAt = Date.now();
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: true,
+        input: "reply with ok",
+      }),
+    });
+    assert.equal(res.status, 200);
+    const reader = res.body.getReader();
+    const first = await reader.read();
+    const firstChunk = new TextDecoder().decode(first.value, { stream: true });
+    assert.match(firstChunk, /response\.created/);
+    assert.ok(Date.now() - startedAt < 250);
+    await reader.cancel();
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy forwards partial native response chunks before a full SSE frame is complete", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      const createdFrame =
+        `event: response.created\ndata: ${JSON.stringify({
+          type: "response.created",
+          response: {
+            id: "resp_123",
+            object: "response",
+            status: "in_progress",
+            metadata: { pad: "x".repeat(4096) },
+          },
+        })}\n\n`;
+      const splitAt = Math.floor(createdFrame.length / 2);
+      res.write(createdFrame.slice(0, splitAt));
+      setTimeout(() => {
+        res.write(createdFrame.slice(splitAt));
+        res.end(
+          'event: response.output_text.done\ndata: {"type":"response.output_text.done","text":"ok"}\n\n',
+        );
+      }, 150);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 500,
+  });
+
+  try {
+    const startedAt = Date.now();
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: true,
+        input: "reply with ok",
+      }),
+    });
+
+    assert.equal(res.status, 200);
+    const reader = res.body.getReader();
+    const first = await reader.read();
+    const firstChunkMs = Date.now() - startedAt;
+    const decoder = new TextDecoder();
+    let body = first.done ? "" : decoder.decode(first.value, { stream: true });
+    assert.ok(firstChunkMs < 120, `expected first chunk promptly, got ${firstChunkMs}ms`);
+    assert.match(body, /response\.created/);
+
+    while (true) {
+      const next = await reader.read();
+      if (next.done) break;
+      body += decoder.decode(next.value, { stream: true });
+    }
+    body += decoder.decode();
+    assert.match(body, /response\.output_text\.done/);
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy preserves native streamed responses that end after response.output_text.done", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+      );
+      res.write(
+        'event: response.output_text.done\ndata: {"type":"response.output_text.done","text":"hello"}\n\n',
+      );
+      setTimeout(() => {
+        if (!res.writableEnded) res.end(": upstream lingered\n\n");
+      }, 200);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 25,
+  });
+
+  try {
+    const startedAt = Date.now();
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: true,
+        input: "reply with ok",
+      }),
+    });
+
+    assert.equal(res.status, 200);
+    const body = await res.text();
+    const elapsedMs = Date.now() - startedAt;
+    assert.match(body, /response\.output_text\.done/);
+    assert.doesNotMatch(body, /response\.completed/);
+    assert.ok(elapsedMs < 180, `expected proxy to close promptly, got ${elapsedMs}ms`);
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
+
+test("proxy returns a buffered response once response.output_text.done arrives", async () => {
+  const upstream = await startHttpServer(async (req, res) => {
+    if (req.method === "GET" && req.url === "/backend-api/wham/usage") {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          rate_limit: {
+            primary_window: { used_percent: 0 },
+            secondary_window: { used_percent: 0 },
+          },
+        }),
+      );
+      return;
+    }
+    if (
+      req.method === "GET" &&
+      req.url?.startsWith("/backend-api/codex/models")
+    ) {
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(JSON.stringify({ models: [{ slug: "gpt-5.4" }] }));
+      return;
+    }
+    if (req.method === "POST" && req.url === "/backend-api/codex/responses") {
+      res.writeHead(200, { "content-type": "text/event-stream" });
+      res.flushHeaders();
+      res.write(
+        'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello"}\n\n',
+      );
+      res.write(
+        'event: response.output_text.done\ndata: {"type":"response.output_text.done","text":"hello"}\n\n',
+      );
+      setTimeout(() => {
+        if (!res.writableEnded) res.end(": upstream lingered\n\n");
+      }, 200);
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), {
+    accounts: [
+      {
+        id: "acct-1",
+        provider: "openai",
+        accessToken: "acct-1-token",
+        enabled: true,
+        usage: { fetchedAt: Date.now(), primary: { usedPercent: 0 } },
+        state: {},
+      },
+    ],
+    modelAliases: [],
+  });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+
+  const runtime = await startRuntime({
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    openaiBaseUrl: upstream.url,
+    upstreamRequestTimeoutMs: 25,
+  });
+
+  try {
+    const startedAt = Date.now();
+    const res = await fetch(`${runtime.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.4",
+        stream: false,
+        input: "reply with ok",
+      }),
+    });
+
+    const elapsedMs = Date.now() - startedAt;
+    assert.equal(res.status, 200);
+    assert.ok(elapsedMs < 180, `expected proxy to return promptly, got ${elapsedMs}ms`);
+    const body = await res.json();
+    assert.equal(body.output[0].content[0].text, "hello");
+  } finally {
+    await runtime.close();
+    await upstream.close();
+  }
+});
diff --git a/test/refresh-singleflight.test.js b/test/refresh-singleflight.test.js
new file mode 100644
index 0000000..2560887
--- /dev/null
+++ b/test/refresh-singleflight.test.js
@@ -0,0 +1,55 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { startHttpServer } from "./helpers.js";
+
+test("token refresh is single-flight per account", async () => {
+  let refreshCalls = 0;
+  const tokenServer = await startHttpServer(async (req, res) => {
+    if (req.method === "POST" && req.url === "/oauth/token") {
+      refreshCalls += 1;
+      await new Promise((resolve) => setTimeout(resolve, 50));
+      res.writeHead(200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          access_token: "fresh-token",
+          refresh_token: "fresh-refresh",
+          expires_in: 3600,
+        }),
+      );
+      return;
+    }
+    res.writeHead(404).end();
+  });
+
+  try {
+    const { ensureValidToken } = await import("../dist/account-utils.js");
+    const account = {
+      id: "acct-1",
+      provider: "openai",
+      accessToken: "expired-token",
+      refreshToken: "refresh-1",
+      expiresAt: Date.now() - 1_000,
+      enabled: true,
+      state: {},
+    };
+    const oauthConfig = {
+      authorizationUrl: `${tokenServer.url}/oauth/authorize`,
+      tokenUrl: `${tokenServer.url}/oauth/token`,
+      clientId: "client",
+      scope: "openid",
+      redirectUri: "http://localhost/callback",
+    };
+
+    const results = await Promise.all(
+      Array.from({ length: 5 }, () => ensureValidToken(account, oauthConfig)),
+    );
+
+    assert.equal(refreshCalls, 1);
+    for (const result of results) {
+      assert.equal(result.accessToken, "fresh-token");
+      assert.equal(result.refreshToken, "fresh-refresh");
+    }
+  } finally {
+    await tokenServer.close();
+  }
+});
diff --git a/test/runtime.test.js b/test/runtime.test.js
new file mode 100644
index 0000000..f0a3771
--- /dev/null
+++ b/test/runtime.test.js
@@ -0,0 +1,90 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import path from "node:path";
+import { createTempDir, getAvailablePort, startRuntime, writeJson } from "./helpers.js";
+
+test("runtime refuses non-loopback binding without admin auth", async () => {
+  const { createRuntime } = await import("../dist/runtime.js");
+  const tmp = await createTempDir();
+  const storePath = path.join(tmp, "accounts.json");
+  const oauthStatePath = path.join(tmp, "oauth-state.json");
+  await writeJson(storePath, { accounts: [], modelAliases: [] });
+  await writeJson(oauthStatePath, { states: [] });
+
+  await assert.rejects(
+    () =>
+      createRuntime({
+        host: "0.0.0.0",
+        port: 0,
+        adminToken: "",
+        installSignalHandlers: false,
+        storePath,
+        oauthStatePath,
+        traceFilePath: path.join(tmp, "traces.jsonl"),
+        traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+      }),
+    /ADMIN_TOKEN is required/,
+  );
+});
+
+test("runtime exposes readiness separately from health", async () => {
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), { accounts: [], modelAliases: [] });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+  const runtime = await startRuntime({
+    adminToken: "test-admin",
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+  });
+
+  try {
+    const health = await fetch(`${runtime.baseUrl}/health`).then((r) => r.json());
+    const ready = await fetch(`${runtime.baseUrl}/ready`).then((r) => ({
+      status: r.status,
+      body: r.status === 200 ? r.json() : r.text(),
+    }));
+
+    assert.equal(health.ok, true);
+    assert.equal(health.ready, true);
+    assert.equal(ready.status, 200);
+  } finally {
+    await runtime.close();
+  }
+});
+
+test("runtime serves the loopback OAuth callback helper page", async () => {
+  const tmp = await createTempDir();
+  await writeJson(path.join(tmp, "accounts.json"), { accounts: [], modelAliases: [] });
+  await writeJson(path.join(tmp, "oauth-state.json"), { states: [] });
+  const callbackPort = await getAvailablePort();
+  const runtime = await startRuntime({
+    adminToken: "test-admin",
+    storePath: path.join(tmp, "accounts.json"),
+    oauthStatePath: path.join(tmp, "oauth-state.json"),
+    traceFilePath: path.join(tmp, "traces.jsonl"),
+    traceStatsHistoryPath: path.join(tmp, "traces-history.jsonl"),
+    oauthConfig: {
+      authorizationUrl: "https://auth.openai.com/oauth/authorize",
+      tokenUrl: "https://auth.openai.com/oauth/token",
+      clientId: "test-client",
+      scope: "openid profile email offline_access",
+      redirectUri: `http://127.0.0.1:${callbackPort}/auth/callback`,
+    },
+  });
+
+  try {
+    const res = await fetch(
+      `http://127.0.0.1:${callbackPort}/auth/callback?code=test-code&state=test-state`,
+    );
+    const body = await res.text();
+
+    assert.equal(res.status, 200);
+    assert.match(body, /OAuth callback received/);
+    assert.match(body, /multivibe-oauth-callback/);
+    assert.match(body, /Copy callback URL/);
+  } finally {
+    await runtime.close();
+  }
+});
diff --git a/test/store-encryption.test.js b/test/store-encryption.test.js
new file mode 100644
index 0000000..e899c1c
--- /dev/null
+++ b/test/store-encryption.test.js
@@ -0,0 +1,34 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import path from "node:path";
+import { readFile } from "node:fs/promises";
+import { createTempDir } from "./helpers.js";
+
+test("account store encrypts persisted state when a key is configured", async () => {
+  const tmp = await createTempDir();
+  const filePath = path.join(tmp, "accounts.enc.json");
+  const { AccountStore } = await import("../dist/store.js");
+
+  const store = new AccountStore(filePath, "super-secret-key");
+  await store.init();
+  await store.upsertAccount({
+    id: "acct-1",
+    provider: "openai",
+    accessToken: "access-secret",
+    refreshToken: "refresh-secret",
+    enabled: true,
+    state: {},
+  });
+  await store.flushIfDirty();
+
+  const raw = await readFile(filePath, "utf8");
+  assert.doesNotMatch(raw, /access-secret|refresh-secret/);
+  assert.match(raw, /"alg"\s*:\s*"aes-256-gcm"/);
+
+  const reloaded = new AccountStore(filePath, "super-secret-key");
+  await reloaded.init();
+  const accounts = await reloaded.listAccounts();
+  assert.equal(accounts.length, 1);
+  assert.equal(accounts[0].accessToken, "access-secret");
+  assert.equal(accounts[0].refreshToken, "refresh-secret");
+});
diff --git a/test/traces.test.js b/test/traces.test.js
new file mode 100644
index 0000000..048a89e
--- /dev/null
+++ b/test/traces.test.js
@@ -0,0 +1,70 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import path from "node:path";
+import { readFile } from "node:fs/promises";
+import { createTempDir } from "./helpers.js";
+
+test("trace manager keeps a bounded in-memory window and compacts persisted traces", async () => {
+  const tmp = await createTempDir();
+  const { createTraceManager } = await import("../dist/traces.js");
+  const manager = createTraceManager({
+    filePath: path.join(tmp, "traces.jsonl"),
+    historyFilePath: path.join(tmp, "traces-history.jsonl"),
+    retentionMax: 3,
+  });
+
+  for (let i = 0; i < 5; i += 1) {
+    await manager.appendTrace({
+      at: Date.now() + i,
+      route: "/responses",
+      status: 200,
+      stream: false,
+      latencyMs: 10 + i,
+      model: `gpt-${i}`,
+    });
+  }
+
+  const window = await manager.readTraceWindow();
+  assert.equal(window.length, 3);
+  assert.deepEqual(
+    window.map((entry) => entry.model),
+    ["gpt-2", "gpt-3", "gpt-4"],
+  );
+
+  await manager.compactTraceStorageIfNeeded();
+  const persisted = (await readFile(path.join(tmp, "traces.jsonl"), "utf8"))
+    .trim()
+    .split("\n")
+    .filter(Boolean)
+    .map((line) => JSON.parse(line));
+  assert.equal(persisted.length, 3);
+  assert.deepEqual(
+    persisted.map((entry) => entry.model),
+    ["gpt-2", "gpt-3", "gpt-4"],
+  );
+});
+
+test("trace manager preserves session ids for trace list entries", async () => {
+  const tmp = await createTempDir();
+  const { createTraceManager } = await import("../dist/traces.js");
+  const manager = createTraceManager({
+    filePath: path.join(tmp, "traces.jsonl"),
+    historyFilePath: path.join(tmp, "traces-history.jsonl"),
+  });
+
+  await manager.appendTrace({
+    at: Date.now(),
+    route: "/responses",
+    sessionId: "sess_test_123",
+    status: 200,
+    stream: true,
+    latencyMs: 42,
+    model: "gpt-5.4",
+  });
+
+  const [trace] = await manager.readTraceWindow();
+  assert.equal(trace?.sessionId, "sess_test_123");
+
+  const list = await manager.readTraceListWindow();
+  assert.equal(list[0]?.sessionId, "sess_test_123");
+});
diff --git a/web/src/App.tsx b/web/src/App.tsx
index ed9f11b..8f8e712 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -3,6 +3,7 @@ import "./styles.css";
 import { estimateCostUsd } from "./model-pricing";
 import { api, tokenDefault } from "./lib/api";
 import {
+  EMPTY_TRACE_USAGE_STATS,
   EMPTY_TRACE_PAGINATION,
   EMPTY_TRACE_STATS,
   TRACE_PAGE_SIZE,
@@ -16,6 +17,7 @@ import type {
   TracePagination,
   TraceRangePreset,
   TraceStats,
+  TraceUsageStats,
 } from "./types";
 import { AccountsTab } from "./components/tabs/AccountsTab";
 import { DocsTab } from "./components/tabs/DocsTab";
@@ -33,6 +35,7 @@ export default function App() {
   const [accounts, setAccounts] = useState<Account[]>([]);
   const [traces, setTraces] = useState<Trace[]>([]);
   const [traceStats, setTraceStats] = useState<TraceStats>(EMPTY_TRACE_STATS);
+  const [traceUsageStats, setTraceUsageStats] = useState<TraceUsageStats>(EMPTY_TRACE_USAGE_STATS);
   const [tracePagination, setTracePagination] = useState<TracePagination>(EMPTY_TRACE_PAGINATION);
   const [models, setModels] = useState<ExposedModel[]>([]);
   const [aliases, setAliases] = useState<ModelAlias[]>([]);
@@ -161,12 +164,14 @@ export default function App() {
     if (typeof sinceMs === "number") params.set("sinceMs", String(sinceMs));
     if (typeof untilMs === "number") params.set("untilMs", String(untilMs));
 
-    const [tr, statsRes] = await Promise.all([
+    const [tr, statsRes, usageRes] = await Promise.all([
       api(`/admin/traces?${params.toString()}`),
       api(`/admin/stats/traces?${params.toString()}`),
+      api(`/admin/stats/usage?${params.toString()}`),
     ]);
     setTraces((tr.traces ?? []) as Trace[]);
     setTraceStats((statsRes.stats ?? tr.stats ?? EMPTY_TRACE_STATS) as TraceStats);
+    setTraceUsageStats((usageRes ?? EMPTY_TRACE_USAGE_STATS) as TraceUsageStats);
     setTracePagination((tr.pagination ?? { ...EMPTY_TRACE_PAGINATION, page: safePage }) as TracePagination);
     setExpandedTraceId(null);
     setExpandedTrace(null);
@@ -445,6 +450,7 @@ export default function App() {
           <TracingTab
             accounts={accounts}
             traceStats={filteredTraceStats}
+            traceUsageStats={traceUsageStats}
             tokensTimeseries={tokensTimeseries}
             modelChartData={modelChartData}
             modelCostChartData={modelCostChartData}
diff --git a/web/src/components/tabs/AccountsTab.tsx b/web/src/components/tabs/AccountsTab.tsx
index cafe767..b7ad860 100644
--- a/web/src/components/tabs/AccountsTab.tsx
+++ b/web/src/components/tabs/AccountsTab.tsx
@@ -1,4 +1,4 @@
-import React, { useState } from "react";
+import React, { useEffect, useState } from "react";
 import { Metric } from "../Metric";
 import { fmt, maskEmail, maskId, usd } from "../../lib/ui";
 import type { Account, TraceStats } from "../../types";
@@ -69,6 +69,35 @@ export function AccountsTab(props: Props) {
   const [oauthBusyId, setOauthBusyId] = useState<string | null>(null);
   const [oauthDialog, setOauthDialog] = useState<OAuthDialogState | null>(null);
 
+  useEffect(() => {
+    if (!oauthDialog) return;
+
+    const onMessage = (event: MessageEvent) => {
+      const data = event.data;
+      if (!data || typeof data !== "object") return;
+      if ((data as { type?: string }).type !== "multivibe-oauth-callback") return;
+      const callbackUrl = (data as { callbackUrl?: string }).callbackUrl;
+      if (typeof callbackUrl !== "string" || !callbackUrl.trim()) return;
+
+      try {
+        const received = new URL(callbackUrl);
+        const expected = new URL(oauthDialog.expectedRedirectUri);
+        if (received.origin !== expected.origin || received.pathname !== expected.pathname) {
+          return;
+        }
+      } catch {
+        return;
+      }
+
+      setOauthDialog((current) =>
+        current ? { ...current, callbackInput: callbackUrl.trim() } : current,
+      );
+    };
+
+    window.addEventListener("message", onMessage);
+    return () => window.removeEventListener("message", onMessage);
+  }, [oauthDialog]);
+
   const closeModal = () => {
     setShowAddAccount(false);
     setProvider("openai");
@@ -583,8 +612,8 @@ export function AccountsTab(props: Props) {
             </div>
             <div className="muted">
               Complete the OpenAI login in the opened browser tab. When the browser reaches
-              the callback page, copy the full URL and paste it here. Do not paste access or
-              refresh tokens.
+              the callback page, the full URL should autofill here. If it does not, copy the
+              full URL and paste it here. Do not paste access or refresh tokens.
             </div>
             <div className="inline wrap">
               <button
diff --git a/web/src/components/tabs/DocsTab.tsx b/web/src/components/tabs/DocsTab.tsx
index 752b938..f627644 100644
--- a/web/src/components/tabs/DocsTab.tsx
+++ b/web/src/components/tabs/DocsTab.tsx
@@ -17,7 +17,7 @@ export function DocsTab({ totalTraceCostFromRows }: { totalTraceCostFromRows: nu
           <li className="mono">POST /admin/model-aliases</li>
           <li className="mono">PATCH /admin/model-aliases/:id</li>
           <li className="mono">DELETE /admin/model-aliases/:id</li>
-          <li className="mono">GET /admin/traces?page=1&amp;pageSize=100</li>
+          <li className="mono">GET /admin/traces?page=1&amp;pageSize=50</li>
           <li className="mono">GET /admin/traces?limit=50 (legacy compatibility)</li>
           <li className="mono">GET /admin/stats/traces?sinceMs=&amp;untilMs=</li>
           <li className="mono">GET /admin/stats/usage?sinceMs=&amp;untilMs=&amp;accountId=&amp;route=</li>
diff --git a/web/src/components/tabs/TracingTab.tsx b/web/src/components/tabs/TracingTab.tsx
index fc9324b..3882b4b 100644
--- a/web/src/components/tabs/TracingTab.tsx
+++ b/web/src/components/tabs/TracingTab.tsx
@@ -15,13 +15,14 @@ import {
   YAxis,
 } from "recharts";
 import { estimateCostUsd } from "../../model-pricing";
-import { CHART_COLORS, fmt, formatTokenCount, maskEmail, maskId, pct, routeLabel, usd } from "../../lib/ui";
+import { CHART_COLORS, fmt, formatSessionTail, formatTokenCount, maskEmail, maskId, pct, routeLabel, usd } from "../../lib/ui";
 import { Metric } from "../Metric";
-import type { Account, Trace, TracePagination, TraceRangePreset, TraceStats } from "../../types";
+import type { Account, Trace, TracePagination, TraceRangePreset, TraceStats, TraceUsageStats } from "../../types";
 
 type Props = {
   accounts: Account[];
   traceStats: TraceStats;
+  traceUsageStats: TraceUsageStats;
   tokensTimeseries: Array<any>;
   modelChartData: Array<any>;
   modelCostChartData: Array<any>;
@@ -39,10 +40,120 @@ type Props = {
   exportInProgress: boolean;
 };
 
+type SessionUsageEntry = TraceUsageStats["bySession"][number];
+type SessionSortKey = "requests" | "tokens" | "costUsd" | "avgLatencyMs" | "lastAt";
+type SessionSortDirection = "asc" | "desc";
+type SessionSortState = {
+  key: SessionSortKey;
+  direction: SessionSortDirection;
+};
+type TraceCardId =
+  | "tokensOverTime"
+  | "modelUsage"
+  | "modelCost"
+  | "errorTrend"
+  | "costOverTime"
+  | "latency"
+  | "tokenSplit"
+  | "usageByAccount"
+  | "usageByRoute"
+  | "topSessions";
+
+const CARD_ORDER_STORAGE_KEY = "tracing-card-order.v1";
+const TOP_SESSIONS_SORT_STORAGE_KEY = "tracing-top-sessions-sort.v1";
+const DEFAULT_TOP_SESSIONS_SORT: SessionSortState = { key: "requests", direction: "desc" };
+const DEFAULT_CARD_ORDER: TraceCardId[] = [
+  "tokensOverTime",
+  "modelUsage",
+  "modelCost",
+  "errorTrend",
+  "costOverTime",
+  "latency",
+  "tokenSplit",
+  "usageByAccount",
+  "usageByRoute",
+  "topSessions",
+];
+const VALID_CARD_IDS = new Set<TraceCardId>(DEFAULT_CARD_ORDER);
+const VALID_SORT_KEYS = new Set<SessionSortKey>(["requests", "tokens", "costUsd", "avgLatencyMs", "lastAt"]);
+const VALID_SORT_DIRECTIONS = new Set<SessionSortDirection>(["asc", "desc"]);
+
+function normalizeCardOrder(input: unknown): TraceCardId[] {
+  const raw = Array.isArray(input) ? input : [];
+  const ordered: TraceCardId[] = [];
+
+  for (const entry of raw) {
+    if (typeof entry !== "string" || !VALID_CARD_IDS.has(entry as TraceCardId)) continue;
+    const cardId = entry as TraceCardId;
+    if (!ordered.includes(cardId)) ordered.push(cardId);
+  }
+
+  for (const cardId of DEFAULT_CARD_ORDER) {
+    if (!ordered.includes(cardId)) ordered.push(cardId);
+  }
+
+  return ordered;
+}
+
+function readCardOrder(): TraceCardId[] {
+  if (typeof window === "undefined") return DEFAULT_CARD_ORDER;
+  try {
+    const raw = window.localStorage.getItem(CARD_ORDER_STORAGE_KEY);
+    return normalizeCardOrder(raw ? JSON.parse(raw) : null);
+  } catch {
+    return DEFAULT_CARD_ORDER;
+  }
+}
+
+function readTopSessionsSort(): SessionSortState {
+  if (typeof window === "undefined") return DEFAULT_TOP_SESSIONS_SORT;
+  try {
+    const raw = window.localStorage.getItem(TOP_SESSIONS_SORT_STORAGE_KEY);
+    const parsed = raw ? (JSON.parse(raw) as Partial<SessionSortState>) : null;
+    if (
+      parsed &&
+      typeof parsed.key === "string" &&
+      VALID_SORT_KEYS.has(parsed.key as SessionSortKey) &&
+      typeof parsed.direction === "string" &&
+      VALID_SORT_DIRECTIONS.has(parsed.direction as SessionSortDirection)
+    ) {
+      return {
+        key: parsed.key as SessionSortKey,
+        direction: parsed.direction as SessionSortDirection,
+      };
+    }
+  } catch {
+    // Fall through to default sort.
+  }
+  return DEFAULT_TOP_SESSIONS_SORT;
+}
+
+function compareNumbers(a: number, b: number, direction: SessionSortDirection) {
+  return direction === "asc" ? a - b : b - a;
+}
+
+function compareSessionEntries(a: SessionUsageEntry, b: SessionUsageEntry, sort: SessionSortState) {
+  switch (sort.key) {
+    case "requests":
+      return compareNumbers(a.requests, b.requests, sort.direction);
+    case "tokens":
+      return compareNumbers(a.tokens.total, b.tokens.total, sort.direction);
+    case "costUsd":
+      return compareNumbers(a.costUsd, b.costUsd, sort.direction);
+    case "avgLatencyMs":
+      return compareNumbers(a.avgLatencyMs, b.avgLatencyMs, sort.direction);
+    case "lastAt":
+      return compareNumbers(Number(a.lastAt ?? 0), Number(b.lastAt ?? 0), sort.direction);
+    default:
+      return 0;
+  }
+}
+
 export function TracingTab(props: Props) {
   const {
     accounts,
     traceStats,
+    traceUsageStats,
     tokensTimeseries,
     modelChartData,
     modelCostChartData,
@@ -59,6 +170,18 @@ export function TracingTab(props: Props) {
     exportTracesZip,
     exportInProgress,
   } = props;
+  const [cardOrder, setCardOrder] = React.useState<TraceCardId[]>(() => readCardOrder());
+  const [layoutEditMode, setLayoutEditMode] = React.useState(false);
+  const [topSessionsSort, setTopSessionsSort] = React.useState<SessionSortState>(() => readTopSessionsSort());
+
+  React.useEffect(() => {
+    window.localStorage.setItem(CARD_ORDER_STORAGE_KEY, JSON.stringify(normalizeCardOrder(cardOrder)));
+  }, [cardOrder]);
+
+  React.useEffect(() => {
+    window.localStorage.setItem(TOP_SESSIONS_SORT_STORAGE_KEY, JSON.stringify(topSessionsSort));
+  }, [topSessionsSort]);
+
   const accountProviderById = React.useMemo(
     () => new Map(accounts.map((account) => [account.id, account.provider])),
     [accounts],
@@ -77,103 +200,160 @@ export function TracingTab(props: Props) {
   const formatTooltipValue = (value: any) => formatTokenChartValue(value?.[0] ?? value ?? 0);
 
   const formatPieTokenLabel = ({ value }: { value?: number }) => formatTokenChartValue(value);
+  const usageCoverage =
+    traceUsageStats.totals.requests > 0
+      ? (traceUsageStats.totals.requestsWithUsage / traceUsageStats.totals.requests) * 100
+      : 0;
+  const statusEntries = Object.entries(traceUsageStats.totals.statusCounts).sort((a, b) => b[1] - a[1]);
+  const topAccounts = traceUsageStats.byAccount.slice(0, 6);
+  const topRoutes = traceUsageStats.byRoute.slice(0, 6);
+  const orderedCardIds = React.useMemo(() => normalizeCardOrder(cardOrder), [cardOrder]);
+  const topSessions = React.useMemo(
+    () =>
+      [...traceUsageStats.bySession]
+        .sort((a, b) => {
+          const primary = compareSessionEntries(a, b, topSessionsSort);
+          if (primary !== 0) return primary;
+          const lastSeen = compareNumbers(Number(a.lastAt ?? 0), Number(b.lastAt ?? 0), "desc");
+          if (lastSeen !== 0) return lastSeen;
+          return a.sessionId.localeCompare(b.sessionId);
+        })
+        .slice(0, 8),
+    [topSessionsSort, traceUsageStats.bySession],
+  );
+  const layoutChanged = orderedCardIds.some((cardId, index) => cardId !== DEFAULT_CARD_ORDER[index]);
 
-  return (
-    <>
-      <section className="grid cards5">
-        <Metric title="Requests" value={`${traceStats.totals.requests}`} />
-        <Metric title="Error rate" value={pct(traceStats.totals.errorRate)} />
-        <Metric title="Total tokens" value={formatTokenCount(traceStats.totals.tokensTotal)} />
-        <Metric title="Total cost" value={usd(traceStats.totals.costUsd)} />
-        <Metric title="Avg latency" value={`${Math.round(traceStats.totals.latencyAvgMs)}ms`} />
-      </section>
-
-      <section className="grid cards2">
-        <section className="panel">
-          <h2>Tokens over time (hourly)</h2>
-          <div className="chart-wrap">
-            <ResponsiveContainer width="100%" height={260}>
-              <LineChart data={tokensTimeseries}>
-                <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
-                <XAxis dataKey="label" minTickGap={24} />
-                <YAxis tickFormatter={formatTokenChartValue} />
-                <Tooltip formatter={formatTooltipValue} />
-                <Legend />
-                <Line type="monotone" dataKey="tokensInput" name="input" stroke="#1f7a8c" strokeWidth={2} dot={false} />
-                <Line type="monotone" dataKey="tokensOutput" name="output" stroke="#2da4b8" strokeWidth={2} dot={false} />
-                <Line type="monotone" dataKey="tokensTotal" name="total" stroke="#4c956c" strokeWidth={2} dot={false} />
-              </LineChart>
-            </ResponsiveContainer>
-          </div>
-        </section>
-        <section className="panel">
-          <h2>Model usage</h2>
-          <div className="chart-wrap">
-            <ResponsiveContainer width="100%" height={260}>
-              <BarChart data={modelChartData}>
-                <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
-                <XAxis dataKey="label" interval={0} angle={-15} textAnchor="end" height={56} />
-                <YAxis />
-                <Tooltip />
-                <Legend />
-                <Bar dataKey="count" name="requests" fill="#1f7a8c" />
-              </BarChart>
-            </ResponsiveContainer>
-          </div>
-        </section>
-      </section>
+  const moveCard = (cardId: TraceCardId, direction: -1 | 1) => {
+    setCardOrder((current) => {
+      const next = [...normalizeCardOrder(current)];
+      const currentIndex = next.indexOf(cardId);
+      if (currentIndex < 0) return next;
+      const targetIndex = currentIndex + direction;
+      if (targetIndex < 0 || targetIndex >= next.length) return next;
+      [next[currentIndex], next[targetIndex]] = [next[targetIndex], next[currentIndex]];
+      return next;
+    });
+  };
 
-      <section className="grid cards2">
-        <section className="panel">
-          <h2>Model cost (USD)</h2>
-          <div className="chart-wrap">
-            <ResponsiveContainer width="100%" height={260}>
-              <BarChart data={modelCostChartData}>
-                <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
-                <XAxis dataKey="label" interval={0} angle={-15} textAnchor="end" height={56} />
-                <YAxis />
-                <Tooltip formatter={(v: any) => usd(Number(v) || 0)} />
-                <Legend />
-                <Bar dataKey="costUsd" name="cost usd" fill="#4c956c" />
-              </BarChart>
-            </ResponsiveContainer>
-          </div>
-        </section>
-        <section className="panel">
-          <h2>Error trend (hourly)</h2>
-          <div className="chart-wrap">
-            <ResponsiveContainer width="100%" height={260}>
-              <LineChart data={tokensTimeseries}>
-                <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
-                <XAxis dataKey="label" minTickGap={24} />
-                <YAxis />
-                <Tooltip />
-                <Legend />
-                <Line type="monotone" dataKey="errors" name="errors" stroke="#c44545" strokeWidth={2} dot={false} />
-                <Line type="monotone" dataKey="requests" name="requests" stroke="#355070" strokeWidth={2} dot={false} />
-              </LineChart>
-            </ResponsiveContainer>
-          </div>
-        </section>
-        <section className="panel">
-          <h2>Cost over time (hourly)</h2>
-          <div className="chart-wrap">
-            <ResponsiveContainer width="100%" height={260}>
-              <LineChart data={tokensTimeseries}>
-                <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
-                <XAxis dataKey="label" minTickGap={24} />
-                <YAxis />
-                <Tooltip formatter={(v: any) => usd(Number(v) || 0)} />
-                <Legend />
-                <Line type="monotone" dataKey="costUsd" name="cost usd" stroke="#4c956c" strokeWidth={2} dot={false} />
-              </LineChart>
-            </ResponsiveContainer>
-          </div>
-        </section>
-      </section>
+  const renderCardControls = (cardId: TraceCardId, index: number, extra?: React.ReactNode) => (
+    <div className="inline wrap tracing-card-toolbar">
+      {extra}
+      {layoutEditMode && (
+        <>
+          <button
+            className="btn ghost small"
+            onClick={() => moveCard(cardId, -1)}
+            disabled={index === 0}
+            title="Move card earlier"
+          >
+            Earlier
+          </button>
+          <button
+            className="btn ghost small"
+            onClick={() => moveCard(cardId, 1)}
+            disabled={index === orderedCardIds.length - 1}
+            title="Move card later"
+          >
+            Later
+          </button>
+        </>
+      )}
+    </div>
+  );
 
-      <section className="panel">
-        <h2>Latency p50/p95 (hourly)</h2>
+  const cards: Record<TraceCardId, { title: string; fullSpan?: boolean; render: () => React.ReactNode; toolbar?: React.ReactNode }> = {
+    tokensOverTime: {
+      title: "Tokens over time (hourly)",
+      render: () => (
+        <div className="chart-wrap">
+          <ResponsiveContainer width="100%" height={260}>
+            <LineChart data={tokensTimeseries}>
+              <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
+              <XAxis dataKey="label" minTickGap={24} />
+              <YAxis tickFormatter={formatTokenChartValue} />
+              <Tooltip formatter={formatTooltipValue} />
+              <Legend />
+              <Line type="monotone" dataKey="tokensInput" name="input" stroke="#1f7a8c" strokeWidth={2} dot={false} />
+              <Line type="monotone" dataKey="tokensOutput" name="output" stroke="#2da4b8" strokeWidth={2} dot={false} />
+              <Line type="monotone" dataKey="tokensTotal" name="total" stroke="#4c956c" strokeWidth={2} dot={false} />
+            </LineChart>
+          </ResponsiveContainer>
+        </div>
+      ),
+    },
+    modelUsage: {
+      title: "Model usage",
+      render: () => (
+        <div className="chart-wrap">
+          <ResponsiveContainer width="100%" height={260}>
+            <BarChart data={modelChartData}>
+              <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
+              <XAxis dataKey="label" interval={0} angle={-15} textAnchor="end" height={56} />
+              <YAxis />
+              <Tooltip />
+              <Legend />
+              <Bar dataKey="count" name="requests" fill="#1f7a8c" />
+            </BarChart>
+          </ResponsiveContainer>
+        </div>
+      ),
+    },
+    modelCost: {
+      title: "Model cost (USD)",
+      render: () => (
+        <div className="chart-wrap">
+          <ResponsiveContainer width="100%" height={260}>
+            <BarChart data={modelCostChartData}>
+              <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
+              <XAxis dataKey="label" interval={0} angle={-15} textAnchor="end" height={56} />
+              <YAxis />
+              <Tooltip formatter={(v: any) => usd(Number(v) || 0)} />
+              <Legend />
+              <Bar dataKey="costUsd" name="cost usd" fill="#4c956c" />
+            </BarChart>
+          </ResponsiveContainer>
+        </div>
+      ),
+    },
+    errorTrend: {
+      title: "Error trend (hourly)",
+      render: () => (
+        <div className="chart-wrap">
+          <ResponsiveContainer width="100%" height={260}>
+            <LineChart data={tokensTimeseries}>
+              <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
+              <XAxis dataKey="label" minTickGap={24} />
+              <YAxis />
+              <Tooltip />
+              <Legend />
+              <Line type="monotone" dataKey="errors" name="errors" stroke="#c44545" strokeWidth={2} dot={false} />
+              <Line type="monotone" dataKey="requests" name="requests" stroke="#355070" strokeWidth={2} dot={false} />
+            </LineChart>
+          </ResponsiveContainer>
+        </div>
+      ),
+    },
+    costOverTime: {
+      title: "Cost over time (hourly)",
+      render: () => (
+        <div className="chart-wrap">
+          <ResponsiveContainer width="100%" height={260}>
+            <LineChart data={tokensTimeseries}>
+              <CartesianGrid strokeDasharray="3 3" stroke="#d6dde4" />
+              <XAxis dataKey="label" minTickGap={24} />
+              <YAxis />
+              <Tooltip formatter={(v: any) => usd(Number(v) || 0)} />
+              <Legend />
+              <Line type="monotone" dataKey="costUsd" name="cost usd" stroke="#4c956c" strokeWidth={2} dot={false} />
+            </LineChart>
+          </ResponsiveContainer>
+        </div>
+      ),
+    },
+    latency: {
+      title: "Latency p50/p95 (hourly)",
+      fullSpan: true,
+      render: () => (
         <div className="chart-wrap">
           <ResponsiveContainer width="100%" height={260}>
             <LineChart data={tokensTimeseries}>
@@ -187,13 +367,14 @@ export function TracingTab(props: Props) {
             </LineChart>
           </ResponsiveContainer>
         </div>
-      </section>
-
-      <section className="panel">
-        <h2>Model split by token volume</h2>
+      ),
+    },
+    tokenSplit: {
+      title: "Model split by token volume",
+      render: () => (
         <div className="chart-wrap">
           <ResponsiveContainer width="100%" height={260}>
-              <PieChart>
+            <PieChart>
               <Pie
                 data={modelChartData}
                 dataKey="tokensTotal"
@@ -210,6 +391,199 @@ export function TracingTab(props: Props) {
             </PieChart>
           </ResponsiveContainer>
         </div>
+      ),
+    },
+    usageByAccount: {
+      title: "Usage by account",
+      render: () => (
+        <div className="table-wrap">
+          <table>
+            <thead>
+              <tr>
+                <th>Account</th>
+                <th>Req</th>
+                <th>Success</th>
+                <th>Tokens</th>
+                <th>Cost</th>
+                <th>Avg latency</th>
+              </tr>
+            </thead>
+            <tbody>
+              {topAccounts.map((entry) => {
+                const accountLabel = sanitized
+                  ? maskEmail(entry.account.email) || maskId(entry.accountId)
+                  : entry.account.email ?? entry.accountId;
+                return (
+                  <tr key={entry.accountId}>
+                    <td className="mono">{accountLabel}</td>
+                    <td>{entry.requests}</td>
+                    <td>{entry.successRate.toFixed(1)}%</td>
+                    <td>{formatTokenCount(entry.tokens.total)}</td>
+                    <td className="mono">{usd(entry.costUsd)}</td>
+                    <td>{Math.round(entry.avgLatencyMs)}ms</td>
+                  </tr>
+                );
+              })}
+              {!topAccounts.length && (
+                <tr>
+                  <td colSpan={6} className="muted">No account usage in this range.</td>
+                </tr>
+              )}
+            </tbody>
+          </table>
+        </div>
+      ),
+    },
+    usageByRoute: {
+      title: "Usage by route",
+      render: () => (
+        <div className="table-wrap">
+          <table>
+            <thead>
+              <tr>
+                <th>Route</th>
+                <th>Req</th>
+                <th>Errors</th>
+                <th>Stream</th>
+                <th>Tokens</th>
+                <th>Avg latency</th>
+              </tr>
+            </thead>
+            <tbody>
+              {topRoutes.map((entry) => (
+                <tr key={entry.route}>
+                  <td className="mono">{routeLabel(entry.route)}</td>
+                  <td>{entry.requests}</td>
+                  <td>{entry.errors}</td>
+                  <td>{entry.streamingRate.toFixed(1)}%</td>
+                  <td>{formatTokenCount(entry.tokens.total)}</td>
+                  <td>{Math.round(entry.avgLatencyMs)}ms</td>
+                </tr>
+              ))}
+              {!topRoutes.length && (
+                <tr>
+                  <td colSpan={6} className="muted">No route usage in this range.</td>
+                </tr>
+              )}
+            </tbody>
+          </table>
+        </div>
+      ),
+    },
+    topSessions: {
+      title: "Top sessions",
+      toolbar: (
+        <>
+          <select
+            value={topSessionsSort.key}
+            onChange={(e) =>
+              setTopSessionsSort((current) => ({
+                ...current,
+                key: e.target.value as SessionSortKey,
+              }))
+            }
+          >
+            <option value="requests">Sort: requests</option>
+            <option value="tokens">Sort: tokens</option>
+            <option value="costUsd">Sort: cost</option>
+            <option value="avgLatencyMs">Sort: latency</option>
+            <option value="lastAt">Sort: last seen</option>
+          </select>
+          <button
+            className="btn ghost small"
+            onClick={() =>
+              setTopSessionsSort((current) => ({
+                ...current,
+                direction: current.direction === "desc" ? "asc" : "desc",
+              }))
+            }
+          >
+            {topSessionsSort.direction === "desc" ? "Desc" : "Asc"}
+          </button>
+        </>
+      ),
+      render: () => (
+        <>
+          <p className="muted">Session IDs are shown by tail only.</p>
+          <div className="table-wrap">
+            <table>
+              <thead>
+                <tr>
+                  <th>Session</th>
+                  <th>Req</th>
+                  <th>Tokens</th>
+                  <th>Cost</th>
+                  <th>Avg latency</th>
+                  <th>Last seen</th>
+                </tr>
+              </thead>
+              <tbody>
+                {topSessions.map((entry) => (
+                  <tr key={entry.sessionId}>
+                    <td className="mono">{formatSessionTail(entry.sessionId)}</td>
+                    <td>{entry.requests}</td>
+                    <td>{formatTokenCount(entry.tokens.total)}</td>
+                    <td className="mono">{usd(entry.costUsd)}</td>
+                    <td>{Math.round(entry.avgLatencyMs)}ms</td>
+                    <td>{fmt(entry.lastAt)}</td>
+                  </tr>
+                ))}
+                {!topSessions.length && (
+                  <tr>
+                    <td colSpan={6} className="muted">No session-tagged traces in this range.</td>
+                  </tr>
+                )}
+              </tbody>
+            </table>
+          </div>
+        </>
+      ),
+    },
+  };
+
+  return (
+    <>
+      <section className="grid cards5">
+        <Metric title="Requests" value={`${traceStats.totals.requests}`} />
+        <Metric title="Error rate" value={pct(traceStats.totals.errorRate)} />
+        <Metric title="Total tokens" value={formatTokenCount(traceStats.totals.tokensTotal)} />
+        <Metric title="Total cost" value={usd(traceStats.totals.costUsd)} />
+        <Metric title="Avg latency" value={`${Math.round(traceStats.totals.latencyAvgMs)}ms`} />
+      </section>
+
+      <section className="grid cards5">
+        <Metric title="Success rate" value={`${traceUsageStats.totals.successRate.toFixed(1)}%`} />
+        <Metric title="Stream share" value={`${traceUsageStats.totals.streamingRate.toFixed(1)}%`} />
+        <Metric title="Usage captured" value={`${usageCoverage.toFixed(1)}%`} />
+        <Metric title="Active sessions" value={`${traceUsageStats.bySession.length}`} />
+        <Metric title="Active accounts" value={`${traceUsageStats.byAccount.length}`} />
+      </section>
+
+      <section className="tracing-layout-actions">
+        <p className="muted">Analytics card order is saved in this browser.</p>
+        <div className="inline wrap">
+          <button className="btn ghost" onClick={() => setLayoutEditMode((current) => !current)}>
+            {layoutEditMode ? "Done editing" : "Edit layout"}
+          </button>
+          <button className="btn secondary" onClick={() => setCardOrder(DEFAULT_CARD_ORDER)} disabled={!layoutChanged}>
+            Reset layout
+          </button>
+        </div>
+      </section>
+
+      <section className="grid tracing-layout">
+        {orderedCardIds.map((cardId, index) => {
+          const card = cards[cardId];
+          return (
+            <section key={cardId} className={`panel tracing-card${card.fullSpan ? " full-span" : ""}`}>
+              <div className="tracing-card-head">
+                <h2>{card.title}</h2>
+                {renderCardControls(cardId, index, card.toolbar)}
+              </div>
+              {card.render()}
+            </section>
+          );
+        })}
       </section>
 
       <section className="panel">
@@ -229,18 +603,40 @@ export function TracingTab(props: Props) {
               <option value="all">All time</option>
             </select>
             <button className="btn ghost" onClick={() => void gotoTracePage(tracePagination.page - 1)} disabled={!tracePagination.hasPrev}>Previous</button>
-            <span className="mono">Page {tracePagination.page} / {tracePagination.totalPages} ({tracePagination.total} traces)</span>
+            <span className="mono">
+              Page {tracePagination.page} / {tracePagination.totalPages} ({tracePagination.total} traces, {tracePagination.pageSize} per page)
+            </span>
             <button className="btn ghost" onClick={() => void gotoTracePage(tracePagination.page + 1)} disabled={!tracePagination.hasNext}>Next</button>
             <button className="btn secondary" onClick={() => void exportTracesZip()} disabled={exportInProgress}>
               {exportInProgress ? "Exporting..." : "Export all (.zip)"}
             </button>
           </div>
         </div>
+        <div className="trace-summary">
+          <div className="chips">
+            {statusEntries.map(([status, count]) => {
+              const share =
+                traceUsageStats.totals.requests > 0
+                  ? (count / traceUsageStats.totals.requests) * 100
+                  : 0;
+              return (
+                <span key={status} className="chip mono">
+                  {status}: {count} ({share.toFixed(1)}%)
+                </span>
+              );
+            })}
+            {!statusEntries.length && <span className="chip mono">No traces</span>}
+          </div>
+          <p className="muted">
+            Matched {traceUsageStats.tracesMatched} of {traceUsageStats.tracesEvaluated} retained traces in the selected range.
+          </p>
+        </div>
         <div className="table-wrap">
           <table>
             <thead>
               <tr>
                 <th>Time</th>
+                <th>Session</th>
                 <th>Route</th>
                 <th>Model</th>
                 <th>Account</th>
@@ -259,10 +655,12 @@ export function TracingTab(props: Props) {
                 const accountLabel = sanitized
                   ? maskEmail(t.accountEmail) || maskId(t.accountId)
                   : t.accountEmail ?? t.accountId ?? "-";
+                const sessionLabel = formatSessionTail(t.sessionId);
                 return (
                   <React.Fragment key={t.id}>
                     <tr onClick={() => void toggleExpandedTrace(t.id)} className="trace-row">
                       <td>{fmt(t.at)}</td>
+                      <td className="mono">{sessionLabel || "-"}</td>
                       <td className="mono">{routeLabel(t.route)}</td>
                       <td className="mono">{t.model ?? "-"}</td>
                       <td>
@@ -289,7 +687,7 @@ export function TracingTab(props: Props) {
                     </tr>
                     {isExpanded && (
                       <tr>
-                        <td colSpan={9}>
+                        <td colSpan={10}>
                           <div className="expanded-trace">
                             {expandedTraceLoading && <div className="muted">Loading trace details...</div>}
                             {!expandedTraceLoading && expandedTrace && expandedTrace.id === t.id && (
diff --git a/web/src/lib/ui.ts b/web/src/lib/ui.ts
index 8c156b0..844699f 100644
--- a/web/src/lib/ui.ts
+++ b/web/src/lib/ui.ts
@@ -1,6 +1,6 @@
-import type { TracePagination, TraceStats } from "../types";
+import type { TracePagination, TraceStats, TraceUsageStats, UsageSummary } from "../types";
 
-export const TRACE_PAGE_SIZE = 100;
+export const TRACE_PAGE_SIZE = 50;
 export const CHART_COLORS = ["#1f7a8c", "#2da4b8", "#4c956c", "#f4a259", "#e76f51", "#8a5a44", "#355070", "#43aa8b"];
 
 export const EMPTY_TRACE_STATS: TraceStats = {
@@ -27,6 +27,35 @@ export const EMPTY_TRACE_PAGINATION: TracePagination = {
   hasNext: false,
 };
 
+const EMPTY_USAGE_SUMMARY: UsageSummary = {
+  requests: 0,
+  ok: 0,
+  errors: 0,
+  successRate: 0,
+  stream: 0,
+  streamingRate: 0,
+  latencyMsTotal: 0,
+  avgLatencyMs: 0,
+  requestsWithUsage: 0,
+  tokens: {
+    prompt: 0,
+    completion: 0,
+    total: 0,
+  },
+  costUsd: 0,
+  statusCounts: {},
+};
+
+export const EMPTY_TRACE_USAGE_STATS: TraceUsageStats = {
+  filters: {},
+  totals: EMPTY_USAGE_SUMMARY,
+  byAccount: [],
+  byRoute: [],
+  bySession: [],
+  tracesEvaluated: 0,
+  tracesMatched: 0,
+};
+
 export const fmt = (ts?: number) => (!ts ? "-" : new Date(ts).toLocaleString());
 export const clampPct = (v: number) => Math.max(0, Math.min(100, v));
 export const compactNumber = (v: number) =>
@@ -66,3 +95,9 @@ export function maskId(v?: string) {
   if (!v) return "acc-xxxx";
   return "*";
 }
+
+export function formatSessionTail(v?: string) {
+  const value = String(v ?? "").trim();
+  if (!value) return "-";
+  return value.length <= 8 ? value : `...${value.slice(-8)}`;
+}
diff --git a/web/src/styles.css b/web/src/styles.css
index 426514e..a3991b0 100644
--- a/web/src/styles.css
+++ b/web/src/styles.css
@@ -199,6 +199,12 @@ button.danger {
   color: #fff;
 }
 
+.btn.small,
+button.small {
+  padding: 6px 9px;
+  font-size: 12px;
+}
+
 .tabs {
   display: flex;
   gap: 8px;
@@ -391,6 +397,44 @@ small { display: block; color: var(--muted); }
   margin-bottom: 12px;
 }
 
+.tracing-layout-actions {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  gap: 12px;
+}
+
+.tracing-layout {
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  align-items: start;
+}
+
+.tracing-card.full-span {
+  grid-column: 1 / -1;
+}
+
+.tracing-card-head {
+  display: flex;
+  justify-content: space-between;
+  align-items: flex-start;
+  gap: 12px;
+  margin-bottom: 12px;
+}
+
+.tracing-card-head h2 {
+  margin-bottom: 0;
+}
+
+.tracing-card-toolbar {
+  justify-content: flex-end;
+}
+
+.trace-summary {
+  display: grid;
+  gap: 10px;
+  margin-bottom: 12px;
+}
+
 .chart-wrap {
   width: 100%;
   min-height: 260px;
@@ -459,6 +503,14 @@ details summary {
     grid-template-columns: 1fr;
   }
 
+  .tracing-layout {
+    grid-template-columns: 1fr;
+  }
+
+  .tracing-card.full-span {
+    grid-column: auto;
+  }
+
   .topbar {
     align-items: flex-start;
     flex-direction: column;
@@ -467,4 +519,10 @@ details summary {
   .modal-grid {
     grid-template-columns: 1fr;
   }
+
+  .tracing-layout-actions,
+  .tracing-card-head {
+    align-items: flex-start;
+    flex-direction: column;
+  }
 }
diff --git a/web/src/types.ts b/web/src/types.ts
index fe9b810..2606ce3 100644
--- a/web/src/types.ts
+++ b/web/src/types.ts
@@ -15,6 +15,7 @@ export type Trace = {
   id: string;
   at: number;
   route: string;
+  sessionId?: string;
   accountId?: string;
   accountEmail?: string;
   model?: string;
@@ -32,6 +33,52 @@ export type Trace = {
   hasRequestBody?: boolean;
 };
 
+export type UsageSummary = {
+  requests: number;
+  ok: number;
+  errors: number;
+  successRate: number;
+  stream: number;
+  streamingRate: number;
+  latencyMsTotal: number;
+  avgLatencyMs: number;
+  requestsWithUsage: number;
+  tokens: {
+    prompt: number;
+    completion: number;
+    total: number;
+  };
+  costUsd: number;
+  statusCounts: Record<string, number>;
+  firstAt?: number;
+  lastAt?: number;
+};
+
+export type TraceUsageStats = {
+  filters: {
+    accountId?: string;
+    route?: string;
+    sinceMs?: number;
+    untilMs?: number;
+  };
+  totals: UsageSummary;
+  byAccount: Array<
+    UsageSummary & {
+      accountId: string;
+      account: {
+        id: string;
+        provider?: "openai" | "mistral";
+        email?: string;
+        enabled?: boolean;
+      };
+    }
+  >;
+  byRoute: Array<UsageSummary & { route: string }>;
+  bySession: Array<UsageSummary & { sessionId: string }>;
+  tracesEvaluated: number;
+  tracesMatched: number;
+};
+
 export type TraceStats = {
   totals: {
     requests: number;

Account	Req	Success	Tokens	Cost	Avg latency
{accountLabel}	{entry.requests}	{entry.successRate.toFixed(1)}%	{formatTokenCount(entry.tokens.total)}	{usd(entry.costUsd)}	{Math.round(entry.avgLatencyMs)}ms
No account usage in this range.
Route	Req	Errors	Stream	Tokens	Avg latency
{routeLabel(entry.route)}	{entry.requests}	{entry.errors}	{entry.streamingRate.toFixed(1)}%	{formatTokenCount(entry.tokens.total)}	{Math.round(entry.avgLatencyMs)}ms
No route usage in this range.
Session	Req	Tokens	Cost	Avg latency	Last seen
{formatSessionTail(entry.sessionId)}	{entry.requests}	{formatTokenCount(entry.tokens.total)}	{usd(entry.costUsd)}	{Math.round(entry.avgLatencyMs)}ms	{fmt(entry.lastAt)}
No session-tagged traces in this range.
Time	Session	Route	Model	Account
{fmt(t.at)}	{sessionLabel \|\| "-"}	{routeLabel(t.route)}	{t.model ?? "-"}	@@ -289,7 +687,7 @@ export function TracingTab(props: Props) {
+	{expandedTraceLoading && Loading trace details... } {!expandedTraceLoading && expandedTrace && expandedTrace.id === t.id && ( diff --git a/web/src/lib/ui.ts b/web/src/lib/ui.ts index 8c156b0..844699f 100644 --- a/web/src/lib/ui.ts +++ b/web/src/lib/ui.ts @@ -1,6 +1,6 @@ -import type { TracePagination, TraceStats } from "../types"; +import type { TracePagination, TraceStats, TraceUsageStats, UsageSummary } from "../types"; -export const TRACE_PAGE_SIZE = 100; +export const TRACE_PAGE_SIZE = 50; export const CHART_COLORS = ["#1f7a8c", "#2da4b8", "#4c956c", "#f4a259", "#e76f51", "#8a5a44", "#355070", "#43aa8b"]; export const EMPTY_TRACE_STATS: TraceStats = { @@ -27,6 +27,35 @@ export const EMPTY_TRACE_PAGINATION: TracePagination = { hasNext: false, }; +const EMPTY_USAGE_SUMMARY: UsageSummary = { + requests: 0, + ok: 0, + errors: 0, + successRate: 0, + stream: 0, + streamingRate: 0, + latencyMsTotal: 0, + avgLatencyMs: 0, + requestsWithUsage: 0, + tokens: { + prompt: 0, + completion: 0, + total: 0, + }, + costUsd: 0, + statusCounts: {}, +}; + +export const EMPTY_TRACE_USAGE_STATS: TraceUsageStats = { + filters: {}, + totals: EMPTY_USAGE_SUMMARY, + byAccount: [], + byRoute: [], + bySession: [], + tracesEvaluated: 0, + tracesMatched: 0, +}; + export const fmt = (ts?: number) => (!ts ? "-" : new Date(ts).toLocaleString()); export const clampPct = (v: number) => Math.max(0, Math.min(100, v)); export const compactNumber = (v: number) => @@ -66,3 +95,9 @@ export function maskId(v?: string) { if (!v) return "acc-xxxx"; return "*"; } + +export function formatSessionTail(v?: string) { + const value = String(v ?? "").trim(); + if (!value) return "-"; + return value.length <= 8 ? value : `...${value.slice(-8)}`; +} diff --git a/web/src/styles.css b/web/src/styles.css index 426514e..a3991b0 100644 --- a/web/src/styles.css +++ b/web/src/styles.css @@ -199,6 +199,12 @@ button.danger { color: #fff; } +.btn.small, +button.small { + padding: 6px 9px; + font-size: 12px; +} + .tabs { display: flex; gap: 8px; @@ -391,6 +397,44 @@ small { display: block; color: var(--muted); } margin-bottom: 12px; } +.tracing-layout-actions { + display: flex; + justify-content: space-between; + align-items: center; + gap: 12px; +} + +.tracing-layout { + grid-template-columns: repeat(2, minmax(0, 1fr)); + align-items: start; +} + +.tracing-card.full-span { + grid-column: 1 / -1; +} + +.tracing-card-head { + display: flex; + justify-content: space-between; + align-items: flex-start; + gap: 12px; + margin-bottom: 12px; +} + +.tracing-card-head h2 { + margin-bottom: 0; +} + +.tracing-card-toolbar { + justify-content: flex-end; +} + +.trace-summary { + display: grid; + gap: 10px; + margin-bottom: 12px; +} + .chart-wrap { width: 100%; min-height: 260px; @@ -459,6 +503,14 @@ details summary { grid-template-columns: 1fr; } + .tracing-layout { + grid-template-columns: 1fr; + } + + .tracing-card.full-span { + grid-column: auto; + } + .topbar { align-items: flex-start; flex-direction: column; @@ -467,4 +519,10 @@ details summary { .modal-grid { grid-template-columns: 1fr; } + + .tracing-layout-actions, + .tracing-card-head { + align-items: flex-start; + flex-direction: column; + } } diff --git a/web/src/types.ts b/web/src/types.ts index fe9b810..2606ce3 100644 --- a/web/src/types.ts +++ b/web/src/types.ts @@ -15,6 +15,7 @@ export type Trace = { id: string; at: number; route: string; + sessionId?: string; accountId?: string; accountEmail?: string; model?: string; @@ -32,6 +33,52 @@ export type Trace = { hasRequestBody?: boolean; }; +export type UsageSummary = { + requests: number; + ok: number; + errors: number; + successRate: number; + stream: number; + streamingRate: number; + latencyMsTotal: number; + avgLatencyMs: number; + requestsWithUsage: number; + tokens: { + prompt: number; + completion: number; + total: number; + }; + costUsd: number; + statusCounts: Record; + firstAt?: number; + lastAt?: number; +}; + +export type TraceUsageStats = { + filters: { + accountId?: string; + route?: string; + sinceMs?: number; + untilMs?: number; + }; + totals: UsageSummary; + byAccount: Array< + UsageSummary & { + accountId: string; + account: { + id: string; + provider?: "openai" \| "mistral"; + email?: string; + enabled?: boolean; + }; + } + >; + byRoute: Array; + bySession: Array; + tracesEvaluated: number; + tracesMatched: number; +}; + export type TraceStats = { totals: { requests: number;