raphaeltm · simple-agent-manager · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/.github/workflows/deploy-reusable.yml b/.github/workflows/deploy-reusable.yml
@@ -448,6 +448,7 @@ jobs:
           R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
           R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
           SMOKE_TEST_AUTH_ENABLED: ${{ secrets.SMOKE_TEST_AUTH_ENABLED }}
+          ANTHROPIC_API_KEY_TRIAL: ${{ secrets.ANTHROPIC_API_KEY_TRIAL }}
           SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }}
           GA4_API_SECRET: ${{ secrets.GA4_API_SECRET }}
           GA4_MEASUREMENT_ID: ${{ secrets.GA4_MEASUREMENT_ID }}

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -221,6 +221,7 @@ Domains chain together: competitive research feeds marketing and business strate
 - Cloudflare D1 (credentials table with AES-GCM encrypted tokens) (028-provider-infrastructure)
 
 ## Recent Changes
+- ai-proxy-gateway: AI inference proxy routes LLM requests through Cloudflare AI Gateway — `POST /ai/v1/chat/completions` accepts OpenAI-format requests, transparently routes to Workers AI (@cf/* models) or Anthropic (claude-* models) with format translation (`ai-anthropic-translate.ts`); per-user RPM rate limiting + daily token budget via KV; admin model picker at `/admin/ai-proxy`; AI usage analytics dashboard at `/admin/analytics/ai-usage` aggregates AI Gateway logs by model, day, cost; configurable via AI_PROXY_ENABLED, AI_PROXY_DEFAULT_MODEL, AI_GATEWAY_ID, AI_PROXY_ALLOWED_MODELS, AI_PROXY_RATE_LIMIT_RPM, AI_PROXY_RATE_LIMIT_WINDOW_SECONDS, AI_PROXY_MAX_INPUT_TOKENS_PER_REQUEST, AI_USAGE_PAGE_SIZE, AI_USAGE_MAX_PAGES
 - trial-agent-boot: TrialOrchestrator `discovery_agent_start` step now runs the full 5-step idempotent VM boot (registers agent session via `createAgentSessionOnNode`, mints MCP token with trialId as synthetic taskId, `startAgentSessionOnNode` with discovery prompt + MCP server URL, drives ACP session `pending → assigned → running`; idempotency flags `mcpToken`, `agentSessionCreatedOnVm`, `agentStartedOnVm`, `acpAssignedOnVm`, `acpRunningOnVm` on DO state let crash/retry resume without double-booking); new `fetchDefaultBranch()` probes GitHub `/repos/:owner/:repo` with AbortController-bounded fetch and threads the real default branch through `projects.defaultBranch` + workspace `git clone --branch` (master-default repos like `octocat/Hello-World` now work); configurable via TRIAL_GITHUB_TIMEOUT_MS (default: 5000); new capability test `apps/api/tests/unit/durable-objects/trial-orchestrator-agent-boot.test.ts` asserts every cross-boundary call fires with correct payload; rule 10 updated with port-of-pattern coverage requirement. See `docs/notes/2026-04-19-trial-orchestrator-agent-boot-postmortem.md`.
 - trial-sse-events-fix: Fixed "zero trial.* events on staging" — `formatSse()` in `apps/api/src/routes/trial/events.ts` previously emitted named SSE frames (`event: trial.knowledge\ndata: {...}`), but the frontend subscribes via `source.onmessage` which only fires for the default (unnamed) event; frames arrived on the wire (curl saw them) but browser EventSource silently dropped them. Now emits unnamed `data: {JSON}\n\n` frames; the `TrialEvent` payload's own `type` discriminator preserves dispatch info. Also fixed `eventsUrl` in `apps/api/src/routes/trial/create.ts` response shape mismatch (`/api/trial/events?trialId=X` → `/api/trial/:trialId/events`). New capability test `apps/api/tests/workers/trial-event-bus-sse.test.ts` asserts no `event:` line + JSON round-trip across the TrialEventBus DO → SSE endpoint boundary; unit tests updated to assert new unnamed-frame contract and exact `eventsUrl` shape (no substring matches on URL contracts). Rule 13 updated to ban curl-only verification of browser-consumed SSE/WebSocket streams — curl confirms bytes, browsers confirm dispatch. See `docs/notes/2026-04-19-trial-sse-named-events-postmortem.md`.
 - trial-orchestrator-wire-up: TrialOrchestrator Durable Object + GitHub-API knowledge fast-path — `POST /api/trial/create` now fire-and-forget dispatches two concurrent `c.executionCtx.waitUntil` tasks: (1) `env.TRIAL_ORCHESTRATOR.idFromName(trialId)` DO state machine (alarm-driven, steps: project_creation → node_provisioning → workspace_creation → workspace_ready → agent_session → completed; idempotent `start()`; terminal guard on completed/failed; overall-timeout emits `trial.error`); (2) `emitGithubKnowledgeEvents()` probe hits unauthenticated `/repos/:o/:n`, `/repos/:o/:n/languages`, `/repos/:o/:n/readme` in parallel with AbortController-bounded fetches, emits up to `TRIAL_KNOWLEDGE_MAX_EVENTS` `trial.knowledge` events (description, primary language, stars, topics, license, language breakdown by bytes, README first paragraph), swallows all errors; `apps/api/src/services/trial/bridge.ts` bridges ACP session transitions (`running` → `trial.ready`, `failed` → `trial.error`) and MCP tool calls (`add_knowledge` → `trial.knowledge`, `create_idea` → `trial.idea`) into the SSE stream via `readTrialByProject()` KV lookup (no-op on non-trial projects); new sentinel `TRIAL_ANONYMOUS_INSTALLATION_ID` row in `github_installations` so trial projects satisfy the FK; configurable via TRIAL_ORCHESTRATOR_OVERALL_TIMEOUT_MS (default: 300000), TRIAL_ORCHESTRATOR_STEP_MAX_RETRIES (default: 5), TRIAL_ORCHESTRATOR_RETRY_BASE_DELAY_MS (default: 1000), TRIAL_ORCHESTRATOR_RETRY_MAX_DELAY_MS (default: 60000), TRIAL_ORCHESTRATOR_NODE_READY_TIMEOUT_MS (default: 180000), TRIAL_ORCHESTRATOR_AGENT_READY_TIMEOUT_MS (default: 60000), TRIAL_ORCHESTRATOR_WORKSPACE_READY_TIMEOUT_MS (default: 180000), TRIAL_ORCHESTRATOR_WORKSPACE_READY_POLL_INTERVAL_MS (default: 5000), TRIAL_VM_SIZE (default: DEFAULT_VM_SIZE), TRIAL_VM_LOCATION (default: DEFAULT_VM_LOCATION), TRIAL_KNOWLEDGE_GITHUB_TIMEOUT_MS (default: 5000), TRIAL_KNOWLEDGE_MAX_EVENTS (default: 10)

diff --git a/apps/api/.env.example b/apps/api/.env.example
@@ -46,6 +46,58 @@ BASE_DOMAIN=workspaces.example.com
 # MAX_SMOKE_TOKENS_PER_USER=10
 # MAX_SMOKE_TOKEN_NAME_LENGTH=100
 
+# ========================================
+# Trial Onboarding
+# ========================================
+# See docs/guides/trial-configuration.md for the full tunable list.
+# TRIAL_MONTHLY_CAP=1500
+# TRIAL_WORKSPACE_TTL_MS=1200000                # 20 min
+# TRIAL_DATA_RETENTION_HOURS=168                # 7 days
+# TRIAL_ANONYMOUS_USER_ID=system_anonymous_trials
+# TRIAL_ANONYMOUS_INSTALLATION_ID=1
+# TRIAL_AGENT_TYPE_STAGING=opencode
+# TRIAL_AGENT_TYPE_PRODUCTION=claude-code
+# TRIAL_DEFAULT_WORKSPACE_PROFILE=lightweight
+# TRIAL_VM_SIZE=cax11
+# TRIAL_VM_LOCATION=fsn1
+# TRIALS_ENABLED_KV_KEY=trials:enabled
+# TRIAL_REPO_MAX_KB=102400
+# TRIAL_COUNTER_KEEP_MONTHS=6
+# TRIAL_KILL_SWITCH_CACHE_MS=30000
+# TRIAL_GITHUB_TIMEOUT_MS=10000
+# TRIAL_WAITLIST_PURGE_DAYS=90
+# TRIAL_MODEL=@cf/qwen/qwen3-30b-a3b-fp8        # Model for trial conversations
+# TRIAL_LLM_PROVIDER=workers-ai                 # "workers-ai" or "anthropic"
+# ANTHROPIC_API_KEY_TRIAL=                       # Required only when TRIAL_LLM_PROVIDER=anthropic
+# ENVIRONMENT=staging                            # "staging" or "production" — set by deploy pipeline
+# TRIAL_SSE_HEARTBEAT_MS=15000
+# TRIAL_SSE_POLL_TIMEOUT_MS=15000
+# TRIAL_SSE_MAX_DURATION_MS=1800000
+# TRIAL_CRON_ROLLOVER_CRON="0 5 1 * *"
+# TRIAL_CRON_WAITLIST_CLEANUP="0 4 * * *"
+# TRIAL_KNOWLEDGE_GITHUB_TIMEOUT_MS=10000
+# TRIAL_KNOWLEDGE_MAX_EVENTS=50
+# TRIAL_ORCHESTRATOR_NODE_WAIT_MS=300000
+# TRIAL_ORCHESTRATOR_WORKSPACE_WAIT_MS=300000
+# TRIAL_ORCHESTRATOR_AGENT_BOOT_TIMEOUT_MS=120000
+# TRIAL_ORCHESTRATOR_STEP_RETRY_MAX=3
+# TRIAL_ORCHESTRATOR_STEP_RETRY_DELAY_MS=5000
+# RATE_LIMIT_TRIAL_CREATE=10
+# RATE_LIMIT_TRIAL_SSE=30
+
+# ========================================
+# AI Inference Proxy (Cloudflare AI Gateway)
+# ========================================
+# AI_PROXY_ENABLED=true
+# AI_PROXY_DEFAULT_MODEL=@cf/qwen/qwen3-30b-a3b-fp8
+# AI_GATEWAY_ID=sam
+# AI_PROXY_ALLOWED_MODELS=                       # Comma-separated; defaults in shared/constants
+# AI_PROXY_RATE_LIMIT_RPM=60
+# AI_PROXY_RATE_LIMIT_WINDOW_SECONDS=60
+# AI_PROXY_MAX_INPUT_TOKENS_PER_REQUEST=4096
+# AI_USAGE_PAGE_SIZE=50
+# AI_USAGE_MAX_PAGES=20
+
 # Pages project names (for Worker proxy routing)
 # PAGES_PROJECT_NAME=sam-web-prod
 # WWW_PAGES_PROJECT_NAME=sam-www

diff --git a/apps/api/src/durable-objects/trial-event-bus.ts b/apps/api/src/durable-objects/trial-event-bus.ts
@@ -46,12 +46,26 @@ export class TrialEventBus extends DurableObject<Env> {
   private buffer: BufferedEvent[] = [];
   private nextCursor = 1;
   private closed = false;
+  private closedLoaded = false;
   private waiters: Set<Waiter> = new Set();
 
   constructor(ctx: DurableObjectState, env: Env) {
     super(ctx, env);
   }
 
+  /**
+   * Lazily load the `closed` flag from DO storage. In-memory state is lost on
+   * eviction; persisting this single flag ensures SSE consumers see `closed: true`
+   * even after the DO is re-instantiated (the event buffer is intentionally NOT
+   * persisted — short-lived trial streams don't need full replay).
+   */
+  private async ensureClosedLoaded(): Promise<void> {
+    if (this.closedLoaded) return;
+    const stored = await this.ctx.storage.get<boolean>('closed');
+    if (stored === true) this.closed = true;
+    this.closedLoaded = true;
+  }
+
   async fetch(req: Request): Promise<Response> {
     const url = new URL(req.url);
     const path = url.pathname;
@@ -73,6 +87,7 @@ export class TrialEventBus extends DurableObject<Env> {
   // -------------------------------------------------------------------------
 
   private async handleAppend(req: Request): Promise<Response> {
+    await this.ensureClosedLoaded();
     log.info('trial_event_bus.handleAppend.enter', {
       closed: this.closed,
       bufferLen: this.buffer.length,
@@ -108,6 +123,7 @@ export class TrialEventBus extends DurableObject<Env> {
     // would reject those late-arriving events with 409.
     if (event.type === 'trial.error') {
       this.closed = true;
+      await this.ctx.storage.put('closed', true);
     }
 
     this.wakeWaiters();
@@ -119,6 +135,7 @@ export class TrialEventBus extends DurableObject<Env> {
   // -------------------------------------------------------------------------
 
   private async handlePoll(url: URL): Promise<Response> {
+    await this.ensureClosedLoaded();
     const cursor = Number.parseInt(url.searchParams.get('cursor') ?? '0', 10) || 0;
     const requestedTimeout = Number.parseInt(
       url.searchParams.get('timeoutMs') ?? String(DEFAULT_POLL_TIMEOUT_MS),
@@ -164,6 +181,7 @@ export class TrialEventBus extends DurableObject<Env> {
 
   private async handleClose(): Promise<Response> {
     this.closed = true;
+    await this.ctx.storage.put('closed', true);
     this.wakeWaiters();
     return Response.json({ closed: true });
   }

diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts
@@ -463,7 +463,7 @@ export interface Env {
   TRIAL_GITHUB_TIMEOUT_MS?: string;                  // Timeout for GitHub repo metadata probe (default: 5000)
   TRIAL_COUNTER_KEEP_MONTHS?: string;                // Months of counter rows to retain in DO (default: 3)
   TRIAL_WAITLIST_PURGE_DAYS?: string;                // Days after reset_date before notified waitlist rows are purged (default: 30)
-  TRIAL_CRON_ROLLOVER_CRON?: string;                 // Cron expression used by the monthly rollover audit (default: 0 3 1 * *)
+  TRIAL_CRON_ROLLOVER_CRON?: string;                 // Cron expression used by the monthly rollover audit (default: 0 5 1 * *)
   TRIAL_CRON_WAITLIST_CLEANUP?: string;              // Cron expression used by the daily waitlist cleanup (default: 0 4 * * *)
   TRIAL_SSE_HEARTBEAT_MS?: string;                   // SSE comment heartbeat cadence (default: 15000)
   TRIAL_SSE_POLL_TIMEOUT_MS?: string;                // Long-poll timeout per DO fetch (default: 15000)

diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts
@@ -452,7 +452,7 @@ export default {
     env: Env,
     ctx: ExecutionContext
   ): Promise<void> {
-    const rolloverCron = env.TRIAL_CRON_ROLLOVER_CRON ?? '0 3 1 * *';
+    const rolloverCron = env.TRIAL_CRON_ROLLOVER_CRON ?? '0 5 1 * *';
     const waitlistCleanupCron = env.TRIAL_CRON_WAITLIST_CLEANUP ?? '0 4 * * *';
 
     const isDailyForward = controller.cron === '0 3 * * *';

diff --git a/apps/api/src/middleware/rate-limit.ts b/apps/api/src/middleware/rate-limit.ts
@@ -40,6 +40,8 @@ export const DEFAULT_RATE_LIMITS = {
   // Tighter limit for anonymous trial creation: each call spawns a DO,
   // fires ~4 GitHub API calls, and consumes a monthly trial slot.
   TRIAL_CREATE: 10,
+  // SSE events endpoint — short window to prevent connection storms.
+  TRIAL_SSE: 30,
 } as const;
 
 /** Default time window (1 hour in seconds) */

diff --git a/apps/api/src/routes/admin-ai-usage.ts b/apps/api/src/routes/admin-ai-usage.ts
@@ -11,8 +11,6 @@
 import { requireApproved, requireAuth, requireSuperadmin } from '../middleware/auth';
 import { errors } from '../middleware/error';
 
-/** Default AI Gateway ID. Override via AI_GATEWAY_ID env var. */
-const DEFAULT_GATEWAY_ID = 'sam';
 /** Default number of log entries to fetch per page. Override via AI_USAGE_PAGE_SIZE env var. CF max is 50. */
 const DEFAULT_PAGE_SIZE = 50;
 /** Maximum pages to iterate when aggregating. Override via AI_USAGE_MAX_PAGES env var. */
@@ -97,7 +95,7 @@
       body: body.slice(0, 500),
       url: url.replace(/Bearer\s+\S+/, 'Bearer [REDACTED]'),
     });
-    throw errors.internal(`AI Gateway API error: ${resp.status} — ${body.slice(0, 200)}`);
+    throw errors.internal(`AI Gateway API error (${resp.status}). Check admin logs for details.`);
   }
 
   return resp.json() as Promise<AIGatewayLogsResponse>;
@@ -145,9 +143,26 @@
 * GET /ai-usage — Aggregated AI usage from AI Gateway logs.
 * Query params: ?period=7d (24h|7d|30d|90d)
  */
 adminAiUsageRoutes.get('/', async (c) => {
   const period = parsePeriod(c.req.query('period'));
-  const gatewayId = c.env.AI_GATEWAY_ID || DEFAULT_GATEWAY_ID;
+  const gatewayId = c.env.AI_GATEWAY_ID;
+  if (!gatewayId) {
+    // AI Gateway is optional — self-hosters who don't use it get an empty summary
+    // instead of a 500. The admin dashboard renders "no data" gracefully.
+    return c.json({
+      totalRequests: 0,
+      totalInputTokens: 0,
+      totalOutputTokens: 0,
+      totalCostUsd: 0,
+      trialRequests: 0,
+      trialCostUsd: 0,
+      cachedRequests: 0,
+      errorRequests: 0,
+      byModel: [],
+      byDay: [],
+      period,
+    } satisfies AiUsageSummary);
+  }
   const pageSize = parseInt(c.env.AI_USAGE_PAGE_SIZE || '', 10) || DEFAULT_PAGE_SIZE;
   const maxPages = parseInt(c.env.AI_USAGE_MAX_PAGES || '', 10) || DEFAULT_MAX_PAGES;
   const startDate = periodToStartDate(period);

diff --git a/apps/api/src/routes/ai-proxy.ts b/apps/api/src/routes/ai-proxy.ts
@@ -159,9 +159,13 @@ async function forwardToWorkersAI(
 
   if (!response.ok) {
     const errorText = await response.text();
+    log.error('ai_proxy.workers_ai_error', {
+      status: response.status,
+      body: errorText.slice(0, 500),
+    });
     return new Response(JSON.stringify({
       error: {
-        message: `AI Gateway error (${response.status}): ${errorText.slice(0, 200)}`,
+        message: `AI inference failed (${response.status}). Please try again.`,
         type: 'server_error',
       },
     }), { status: response.status, headers: { 'Content-Type': 'application/json' } });
@@ -206,9 +210,13 @@ async function forwardToAnthropic(
 
   if (!response.ok) {
     const errorText = await response.text();
+    log.error('ai_proxy.anthropic_error', {
+      status: response.status,
+      body: errorText.slice(0, 500),
+    });
     return new Response(JSON.stringify({
       error: {
-        message: `Anthropic API error (${response.status}): ${errorText.slice(0, 200)}`,
+        message: `AI inference failed (${response.status}). Please try again.`,
         type: 'server_error',
       },
     }), { status: response.status, headers: { 'Content-Type': 'application/json' } });

diff --git a/apps/api/src/routes/trial/claim.ts b/apps/api/src/routes/trial/claim.ts
@@ -112,11 +112,12 @@ claimRoutes.post('/claim', requireAuth(), async (c) => {
   const claimedAt = Date.now();
   log.info('trial_claim.success', { trialId, projectId, userId, claimedAt });
 
-  // Clear the claim cookie. Domain attribute mirrors what was set by the
-  // OAuth-callback issuer; we use host-only (no Domain) by default — safe for
-  // app.${BASE_DOMAIN}.
+  // Clear the claim cookie. Domain attribute MUST match what was set when the
+  // cookie was issued (`.BASE_DOMAIN`), otherwise the browser treats them as
+  // different cookies and the original is never deleted — enabling replay.
+  const cookieDomain = c.env.BASE_DOMAIN ? `.${c.env.BASE_DOMAIN}` : undefined;
   const response: TrialClaimResponse = { projectId, claimedAt };
-  c.header('Set-Cookie', clearClaimCookie());
+  c.header('Set-Cookie', clearClaimCookie({ domain: cookieDomain }));
   return c.json(response, 200);
 });
 

diff --git a/apps/api/src/routes/trial/create.ts b/apps/api/src/routes/trial/create.ts
@@ -38,6 +38,7 @@ import { rateLimitTrialCreate } from '../../middleware/rate-limit';
 import {
   buildClaimCookie,
   buildFingerprintCookie,
+  DEFAULT_TRIAL_CLAIM_TTL_MS,
   signClaimToken,
   signFingerprint,
   type TrialClaimPayload,
@@ -381,7 +382,7 @@ createRoutes.post('/create', async (c) => {
     // bound to the trialId alone, which is sufficient for claim validation.
     projectId: '',
     issuedAt: now,
-    expiresAt: now + 1000 * 60 * 60 * 48, // 48h
+    expiresAt: now + DEFAULT_TRIAL_CLAIM_TTL_MS,
   };
   const claimSigned = await signClaimToken(claimPayload, secret);