diff --git a/agents/__tests__/context-pruner.test.ts b/agents/__tests__/context-pruner.test.ts
index 45c61b4b9f..b691f33a9f 100644
--- a/agents/__tests__/context-pruner.test.ts
+++ b/agents/__tests__/context-pruner.test.ts
@@ -219,6 +219,7 @@ describe('context-pruner handleSteps', () => {
     messages: Message[],
     contextTokenCount?: number,
     maxContextLength?: number,
+    budgets?: { assistantToolBudget?: number; userBudget?: number },
   ) => {
     mockAgentState.messageHistory = messages
     // If contextTokenCount not provided, estimate from messages
@@ -233,7 +234,10 @@ describe('context-pruner handleSteps', () => {
     const generator = contextPruner.handleSteps!({
       agentState: mockAgentState,
       logger: mockLogger,
-      params: maxContextLength ? { maxContextLength } : {},
+      params: {
+        ...(maxContextLength ? { maxContextLength } : {}),
+        ...budgets,
+      },
     })
     const results: any[] = []
     let result = generator.next()
@@ -381,36 +385,6 @@ describe('context-pruner handleSteps', () => {
     expect(content).toContain('[USER] [with image(s)]')
   })
 
-  test('truncates summary when it exceeds target size', () => {
-    // Create many messages to generate a large summary
-    const messages: Message[] = []
-    for (let i = 0; i < 100; i++) {
-      messages.push(
-        createMessage(
-          'user',
-          `User message number ${i} with some additional content to make it longer`,
-        ),
-      )
-      messages.push(
-        createMessage(
-          'assistant',
-          `Assistant response number ${i} with detailed explanation`,
-        ),
-      )
-    }
-
-    // Use a very small max context to force truncation
-    const results = runHandleSteps(messages, 500000, 5000)
-    const content = results[0].input.messages[0].content[0].text
-
-    // Should contain truncation notice
-    expect(content).toContain('[CONVERSATION TRUNCATED')
-
-    // Should still have the wrapper tags
-    expect(content).toContain('<conversation_summary>')
-    expect(content).toContain('</conversation_summary>')
-  })
-
   test('removes only INSTRUCTIONS_PROMPT and SUBAGENT_SPAWN when under context limit', () => {
     const messages: Message[] = [
       createMessage('user', 'Hello'),
@@ -700,6 +674,7 @@ describe('context-pruner long message truncation', () => {
     messages: Message[],
     contextTokenCount: number,
     maxContextLength: number,
+    budgets?: { assistantToolBudget?: number; userBudget?: number },
   ) => {
     mockAgentState.messageHistory = messages
     mockAgentState.contextTokenCount = contextTokenCount
@@ -712,7 +687,7 @@ describe('context-pruner long message truncation', () => {
     const generator = contextPruner.handleSteps!({
       agentState: mockAgentState,
       logger: mockLogger,
-      params: { maxContextLength },
+      params: { maxContextLength, ...budgets },
     })
     const results: any[] = []
     let result = generator.next()
@@ -726,8 +701,8 @@ describe('context-pruner long message truncation', () => {
   }
 
   test('truncates very long user messages with 80-20 ratio', () => {
-    // Create a message that exceeds 20k chars
-    const longText = 'A'.repeat(25000)
+    // Create a message that exceeds the user message token limit (~13k tokens = ~39k chars)
+    const longText = 'A'.repeat(45000)
     const messages = [
       createMessage('user', longText),
       createMessage('assistant', 'Got it'),
@@ -1118,6 +1093,7 @@ describe('context-pruner repeated compaction', () => {
     messages: Message[],
     contextTokenCount: number,
     maxContextLength: number,
+    budgets?: { assistantToolBudget?: number; userBudget?: number },
   ) => {
     mockAgentState.messageHistory = messages
     mockAgentState.contextTokenCount = contextTokenCount
@@ -1130,7 +1106,7 @@ describe('context-pruner repeated compaction', () => {
     const generator = contextPruner.handleSteps!({
       agentState: mockAgentState,
       logger: mockLogger,
-      params: { maxContextLength },
+      params: { maxContextLength, ...budgets },
     })
     const results: any[] = []
     let result = generator.next()
@@ -1208,6 +1184,135 @@ First assistant response
     expect(summaryTagCount).toBe(1)
   })
 
+  test('drops old entries each cycle when budgets are tight', () => {
+    const simulateCompaction = (
+      inputMessages: Message[],
+      budgets: { assistantToolBudget: number; userBudget: number },
+    ): Message => {
+      const result = runHandleSteps(inputMessages, 250000, 200000, budgets)
+      return result[0].input.messages[0]
+    }
+
+    const tightBudgets = { assistantToolBudget: 25, userBudget: 25 }
+
+    // === CYCLE 1: 3 pairs of messages, tight budgets drop the oldest ===
+    const cycle1Messages = [
+      createMessage('user', 'Cycle1-Request-A'),
+      createMessage('assistant', 'Cycle1-Response-A'),
+      createMessage('user', 'Cycle1-Request-B'),
+      createMessage('assistant', 'Cycle1-Response-B'),
+      createMessage('user', 'Cycle1-Request-C'),
+      createMessage('assistant', 'Cycle1-Response-C'),
+    ]
+    const summary1 = simulateCompaction(cycle1Messages, tightBudgets)
+    const summary1Text = (summary1.content[0] as { type: 'text'; text: string })
+      .text
+
+    // Most recent entries should survive
+    expect(summary1Text).toContain('Cycle1-Request-C')
+    expect(summary1Text).toContain('Cycle1-Response-C')
+    // Oldest entries should be dropped
+    expect(summary1Text).not.toContain('Cycle1-Request-A')
+    expect(summary1Text).not.toContain('Cycle1-Response-A')
+
+    // === CYCLE 2: Add new messages, compact again ===
+    const cycle2Messages = [
+      summary1,
+      createMessage('user', 'Cycle2-Request-D'),
+      createMessage('assistant', 'Cycle2-Response-D'),
+    ]
+    const summary2 = simulateCompaction(cycle2Messages, tightBudgets)
+    const summary2Text = (summary2.content[0] as { type: 'text'; text: string })
+      .text
+
+    // Newest entries from cycle 2 should survive
+    expect(summary2Text).toContain('Cycle2-Request-D')
+    expect(summary2Text).toContain('Cycle2-Response-D')
+    // Cycle 1's oldest survivors should now be dropped
+    expect(summary2Text).not.toContain('Cycle1-Request-A')
+    expect(summary2Text).not.toContain('Cycle1-Response-A')
+
+    // === CYCLE 3: Add more, compact again ===
+    const cycle3Messages = [
+      summary2,
+      createMessage('user', 'Cycle3-Request-E'),
+      createMessage('assistant', 'Cycle3-Response-E'),
+    ]
+    const summary3 = simulateCompaction(cycle3Messages, tightBudgets)
+    const summary3Text = (summary3.content[0] as { type: 'text'; text: string })
+      .text
+
+    // Newest entries from cycle 3 should survive
+    expect(summary3Text).toContain('Cycle3-Request-E')
+    expect(summary3Text).toContain('Cycle3-Response-E')
+    // Very old entries should definitely be gone
+    expect(summary3Text).not.toContain('Cycle1-Request-A')
+    expect(summary3Text).not.toContain('Cycle1-Response-A')
+
+    // Verify only one conversation_summary tag (no nesting)
+    const summaryTagCount = (
+      summary3Text.match(/<conversation_summary>/g) || []
+    ).length
+    expect(summaryTagCount).toBe(1)
+  })
+
+  test('keeps multi-part tool entries grouped across compaction cycles', () => {
+    const simulateCompaction = (
+      inputMessages: Message[],
+    ): Message => {
+      const result = runHandleSteps(inputMessages, 250000, 200000)
+      return result[0].input.messages[0]
+    }
+
+    // Create a tool result that produces multiple entryParts:
+    // both an error AND a non-zero exit code
+    const cycle1Messages: Message[] = [
+      createMessage('user', 'Run tests'),
+      createToolCallMessage('call-1', 'run_terminal_command', {
+        command: 'npm test',
+      }),
+      createToolResultMessage('call-1', 'run_terminal_command', {
+        errorMessage: 'Test suite failed',
+        exitCode: 1,
+      }),
+      createMessage('user', 'Fix the tests'),
+      createMessage('assistant', 'I will fix them'),
+    ]
+
+    // Cycle 1: compact
+    const summary1 = simulateCompaction(cycle1Messages)
+    const summary1Text = (summary1.content[0] as { type: 'text'; text: string })
+      .text
+
+    // Both parts should be present in cycle 1
+    expect(summary1Text).toContain('[TOOL ERROR: run_terminal_command] Test suite failed')
+    expect(summary1Text).toContain('[COMMAND FAILED] Exit code: 1')
+
+    // Cycle 2: re-compact — the multi-part entry should stay as one entry
+    const cycle2Messages: Message[] = [
+      summary1,
+      createMessage('user', 'Try again'),
+      createMessage('assistant', 'Running tests again'),
+    ]
+    const summary2 = simulateCompaction(cycle2Messages)
+    const summary2Text = (summary2.content[0] as { type: 'text'; text: string })
+      .text
+
+    // Both parts should still be present together after re-compaction
+    expect(summary2Text).toContain('[TOOL ERROR: run_terminal_command] Test suite failed')
+    expect(summary2Text).toContain('[COMMAND FAILED] Exit code: 1')
+
+    // They should be within the same --- delimited chunk (not split apart)
+    const separator = '\n\n---\n\n'
+    const chunks = summary2Text
+      .replace(/<conversation_summary>[\s\S]*?\n\n/, '')
+      .replace(/<\/conversation_summary>[\s\S]*/, '')
+      .split(separator)
+    const errorChunk = chunks.find((c) => c.includes('[TOOL ERROR:'))
+    expect(errorChunk).toBeDefined()
+    expect(errorChunk).toContain('[COMMAND FAILED] Exit code: 1')
+  })
+
   test('handles 3+ compaction cycles without nested PREVIOUS SUMMARY markers', () => {
     // Helper to simulate running the context pruner and getting the output
     const simulateCompaction = (inputMessages: Message[]): Message => {
@@ -1355,6 +1460,7 @@ describe('context-pruner threshold behavior', () => {
     messages: Message[],
     contextTokenCount: number,
     maxContextLength: number,
+    budgets?: { assistantToolBudget?: number; userBudget?: number },
   ) => {
     mockAgentState.messageHistory = messages
     mockAgentState.contextTokenCount = contextTokenCount
@@ -1367,7 +1473,7 @@ describe('context-pruner threshold behavior', () => {
     const generator = contextPruner.handleSteps!({
       agentState: mockAgentState,
       logger: mockLogger,
-      params: { maxContextLength },
+      params: { maxContextLength, ...budgets },
     })
     const results: any[] = []
     let result = generator.next()
@@ -1446,7 +1552,7 @@ describe('context-pruner str_replace and write_file tool results', () => {
     return results
   }
 
-  test('includes str_replace diff in summary', () => {
+  test('includes str_replace result in summary', () => {
     const messages = [
       createMessage('user', 'Edit this file'),
       createToolCallMessage('call-1', 'str_replace', {
@@ -1454,19 +1560,22 @@ describe('context-pruner str_replace and write_file tool results', () => {
         replacements: [{ old: 'foo', new: 'bar' }],
       }),
       createToolResultMessage('call-1', 'str_replace', {
-        diff: '--- a/src/utils.ts\n+++ b/src/utils.ts\n@@ -1,1 +1,1 @@\n-foo\n+bar',
+        file: 'src/utils.ts',
+        message: 'Updated file',
+        unifiedDiff: '--- a/src/utils.ts\n+++ b/src/utils.ts\n@@ -1,1 +1,1 @@\n-foo\n+bar',
       }),
     ]
 
     const results = runHandleSteps(messages)
     const content = results[0].input.messages[0].content[0].text
 
-    expect(content).toContain('[EDIT RESULT]')
+    expect(content).toContain('[EDIT RESULT: str_replace]')
+    expect(content).toContain('unifiedDiff')
     expect(content).toContain('-foo')
     expect(content).toContain('+bar')
   })
 
-  test('includes write_file diff in summary', () => {
+  test('includes write_file result in summary', () => {
     const messages = [
       createMessage('user', 'Create a new file'),
       createToolCallMessage('call-1', 'write_file', {
@@ -1474,18 +1583,20 @@ describe('context-pruner str_replace and write_file tool results', () => {
         content: 'export const hello = "world"',
       }),
       createToolResultMessage('call-1', 'write_file', {
-        diff: '--- /dev/null\n+++ b/src/new-file.ts\n@@ -0,0 +1 @@\n+export const hello = "world"',
+        file: 'src/new-file.ts',
+        message: 'Created file',
+        unifiedDiff: '--- /dev/null\n+++ b/src/new-file.ts\n@@ -0,0 +1 @@\n+export const hello = "world"',
       }),
     ]
 
     const results = runHandleSteps(messages)
     const content = results[0].input.messages[0].content[0].text
 
-    expect(content).toContain('[WRITE RESULT]')
-    expect(content).toContain('+export const hello = "world"')
+    expect(content).toContain('[EDIT RESULT: write_file]')
+    expect(content).toContain('export const hello')
   })
 
-  test('truncates very long str_replace diffs', () => {
+  test('truncates very long str_replace results', () => {
     const longDiff = 'X'.repeat(3000)
     const messages = [
       createMessage('user', 'Make big changes'),
@@ -1494,20 +1605,65 @@ describe('context-pruner str_replace and write_file tool results', () => {
         replacements: [],
       }),
       createToolResultMessage('call-1', 'str_replace', {
-        diff: longDiff,
+        file: 'src/big-file.ts',
+        message: 'Updated file',
+        unifiedDiff: longDiff,
       }),
     ]
 
     const results = runHandleSteps(messages)
     const content = results[0].input.messages[0].content[0].text
 
-    expect(content).toContain('[EDIT RESULT]')
+    expect(content).toContain('[EDIT RESULT: str_replace]')
     expect(content).toContain('...')
     // Should not contain the full diff
     expect(content).not.toContain(longDiff)
   })
 
-  test('does not include edit result when no diff is present', () => {
+  test('truncates very large tool entries to 5k token limit', () => {
+    // spawn_agents with multiple non-blacklisted agents producing large outputs
+    // Each agent output is capped at ~3,900 chars, but 5 agents × 3,900 = ~19,500 chars
+    // which exceeds the 5k token (15k char) TOOL_ENTRY_LIMIT
+    const largeAgentResults = Array.from({ length: 5 }, (_, i) => ({
+      agentType: `editor`,
+      value: {
+        type: 'string',
+        value: `AGENT_${i}_START_` + 'X'.repeat(4000) + `_AGENT_${i}_END`,
+      },
+    }))
+
+    const messages: Message[] = [
+      createMessage('user', 'Spawn many agents'),
+      createToolCallMessage('call-1', 'spawn_agents', {
+        agents: [
+          { agent_type: 'editor' },
+          { agent_type: 'editor' },
+          { agent_type: 'editor' },
+          { agent_type: 'editor' },
+          { agent_type: 'editor' },
+        ],
+      }),
+      {
+        role: 'tool',
+        toolCallId: 'call-1',
+        toolName: 'spawn_agents',
+        content: [{ type: 'json', value: largeAgentResults }],
+      } as ToolMessage,
+    ]
+
+    const results = runHandleSteps(messages)
+    const content = results[0].input.messages[0].content[0].text
+
+    // Should contain truncation notice from the TOOL_ENTRY_LIMIT cap
+    expect(content).toContain('[...truncated')
+    // The last agent's start marker should be cut by the overall entry cap
+    // (per-agent truncation only cuts within each agent's output, not across agents)
+    expect(content).not.toContain('AGENT_4_START_')
+    // The first agent's start should survive (80% prefix)
+    expect(content).toContain('AGENT_0_START_')
+  })
+
+  test('includes all result properties even without unifiedDiff', () => {
     const messages = [
       createMessage('user', 'Edit file'),
       createToolCallMessage('call-1', 'str_replace', {
@@ -1515,16 +1671,19 @@ describe('context-pruner str_replace and write_file tool results', () => {
         replacements: [],
       }),
       createToolResultMessage('call-1', 'str_replace', {
-        success: true,
+        file: 'src/file.ts',
+        errorMessage: 'No match found for old string',
       }),
     ]
 
     const results = runHandleSteps(messages)
     const content = results[0].input.messages[0].content[0].text
 
-    // Should have the tool call summary but not the result
+    // Should have both the tool call summary and the full result
     expect(content).toContain('Edited file: src/file.ts')
-    expect(content).not.toContain('[EDIT RESULT]')
+    expect(content).toContain('[EDIT RESULT: str_replace]')
+    expect(content).toContain('errorMessage')
+    expect(content).toContain('No match found for old string')
   })
 })
 
@@ -1560,11 +1719,11 @@ describe('context-pruner glob and list_directory tools', () => {
     return results
   }
 
-  test('summarizes glob tool with patterns', () => {
+  test('summarizes glob tool with pattern', () => {
     const messages = [
       createMessage('user', 'Find files'),
       createToolCallMessage('call-1', 'glob', {
-        patterns: [{ pattern: '*.ts' }, { pattern: '*.js' }],
+        pattern: '**/*.ts',
       }),
       createToolResultMessage('call-1', 'glob', { files: [] }),
     ]
@@ -1572,14 +1731,14 @@ describe('context-pruner glob and list_directory tools', () => {
     const results = runHandleSteps(messages)
     const content = results[0].input.messages[0].content[0].text
 
-    expect(content).toContain('Glob: *.ts, *.js')
+    expect(content).toContain('Glob: **/*.ts')
   })
 
-  test('summarizes list_directory tool with paths', () => {
+  test('summarizes list_directory tool with path', () => {
     const messages = [
       createMessage('user', 'List directories'),
       createToolCallMessage('call-1', 'list_directory', {
-        directories: [{ path: 'src' }, { path: 'lib' }],
+        path: 'src',
       }),
       createToolResultMessage('call-1', 'list_directory', { entries: [] }),
     ]
@@ -1587,7 +1746,7 @@ describe('context-pruner glob and list_directory tools', () => {
     const results = runHandleSteps(messages)
     const content = results[0].input.messages[0].content[0].text
 
-    expect(content).toContain('Listed dirs: src, lib')
+    expect(content).toContain('Listed dir: src')
   })
 
   test('summarizes read_subtree tool with paths', () => {
@@ -1605,3 +1764,597 @@ describe('context-pruner glob and list_directory tools', () => {
     expect(content).toContain('Read subtree: src/components, src/utils')
   })
 })
+
+describe('context-pruner dual-budget behavior', () => {
+  let mockAgentState: AgentState
+
+  beforeEach(() => {
+    mockAgentState = createMockAgentState([], 0)
+  })
+
+  const runHandleSteps = (
+    messages: Message[],
+    contextTokenCount: number,
+    maxContextLength: number,
+    budgets?: { assistantToolBudget?: number; userBudget?: number },
+  ) => {
+    mockAgentState.messageHistory = messages
+    mockAgentState.contextTokenCount = contextTokenCount
+    const mockLogger = {
+      debug: () => {},
+      info: () => {},
+      warn: () => {},
+      error: () => {},
+    }
+    const generator = contextPruner.handleSteps!({
+      agentState: mockAgentState,
+      logger: mockLogger,
+      params: { maxContextLength, ...budgets },
+    })
+    const results: any[] = []
+    let result = generator.next()
+    while (!result.done) {
+      if (typeof result.value === 'object') {
+        results.push(result.value)
+      }
+      result = generator.next()
+    }
+    return results
+  }
+
+  test('includes recent messages in summary and drops older ones', () => {
+    const messages = [
+      createMessage('user', 'Old user message 1'),
+      createMessage('assistant', 'Old assistant response 1'),
+      createMessage('user', 'Old user message 2'),
+      createMessage('assistant', 'Old assistant response 2'),
+      createMessage('user', 'Recent user message'),
+      createMessage('assistant', 'Recent assistant response'),
+    ]
+
+    // Small budgets on summarized sizes: only the most recent entries fit
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 15,
+      userBudget: 15,
+    })
+
+    const resultMessages = results[0].input.messages
+
+    // Should be a single summary message (no verbatim messages)
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    expect(content).toContain('<conversation_summary>')
+
+    // Recent messages should be in the summary
+    expect(content).toContain('Recent user message')
+    expect(content).toContain('Recent assistant response')
+
+    // Older messages should be dropped entirely (not in summary)
+    expect(content).not.toContain('Old user message 1')
+    expect(content).not.toContain('Old assistant response 1')
+    expect(content).not.toContain('Old user message 2')
+    expect(content).not.toContain('Old assistant response 2')
+  })
+
+  test('summarizes all messages when they fit within budgets', () => {
+    const messages = [
+      createMessage('user', 'Hello'),
+      createMessage('assistant', 'Hi there!'),
+      createMessage('user', 'How are you?'),
+      createMessage('assistant', 'I am fine!'),
+    ]
+
+    // Large budgets: all messages fit in summary
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 20000,
+      userBudget: 50000,
+    })
+
+    const resultMessages = results[0].input.messages
+
+    // All messages summarized into one
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    expect(content).toContain('Hello')
+    expect(content).toContain('Hi there!')
+    expect(content).toContain('How are you?')
+    expect(content).toContain('I am fine!')
+  })
+
+  test('respects user budget separately from assistant+tool budget', () => {
+    const largeUserText = 'U'.repeat(600) // ~200 tokens
+    const messages = [
+      createMessage('user', largeUserText),
+      createMessage('assistant', 'Short response'),
+      createMessage('user', 'Recent short question'),
+      createMessage('assistant', 'Recent short answer'),
+    ]
+
+    // User budget small enough to exclude the large user message
+    // Assistant budget large enough to include all assistant messages
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 5000,
+      userBudget: 100,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    expect(content).toContain('<conversation_summary>')
+    // The large user message should be dropped (not in summary)
+    expect(content).not.toContain(largeUserText)
+    // Recent messages should be in the summary
+    expect(content).toContain('Recent short question')
+    expect(content).toContain('Recent short answer')
+  })
+
+  test('drops tool entries beyond budget at the cutoff boundary', () => {
+    const messages = [
+      createMessage('user', 'Old message'),
+      createToolCallMessage('call-1', 'read_files', { paths: ['old.ts'] }),
+      createToolResultMessage('call-1', 'read_files', { content: 'old file' }),
+      createMessage('user', 'Recent message'),
+      createMessage('assistant', 'Recent response'),
+    ]
+
+    // Budget that excludes the older tool call entry
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 15,
+      userBudget: 15,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+
+    // Recent messages should be in the summary
+    expect(content).toContain('Recent message')
+    expect(content).toContain('Recent response')
+
+    // Tool call summary should be dropped (beyond budget)
+    expect(content).not.toContain('old.ts')
+  })
+
+  test('counts tool result summaries against assistant+tool budget', () => {
+    // Use str_replace with a large result — this produces a summarized [EDIT RESULT] entry
+    const largeDiff = 'LARGE_DIFF_CONTENT_' + 'X'.repeat(900)
+    const messages = [
+      createMessage('user', 'Do something'),
+      createToolCallMessage('call-1', 'str_replace', { path: 'big.ts', replacements: [] }),
+      createToolResultMessage('call-1', 'str_replace', { file: 'big.ts', message: 'Updated', unifiedDiff: largeDiff }),
+      createMessage('user', 'Recent question'),
+      createMessage('assistant', 'Recent answer'),
+    ]
+
+    // Assistant budget too small for the large [EDIT RESULT] summary entry
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 100,
+      userBudget: 5000,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    expect(content).toContain('<conversation_summary>')
+    // Recent messages should be in the summary
+    expect(content).toContain('Recent question')
+    expect(content).toContain('Recent answer')
+    // Large edit result entry should be dropped (exceeds assistant+tool budget)
+    expect(content).not.toContain('LARGE_DIFF_CONTENT_')
+  })
+
+  test('drops older messages and includes recent ones in summary', () => {
+    const messages = [
+      createMessage('user', 'First request about feature A'),
+      createMessage('assistant', 'Working on feature A'),
+      createMessage('user', 'Second request about feature B'),
+      createMessage('assistant', 'Working on feature B'),
+    ]
+
+    // Budget only fits the last pair of summarized entries
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 15,
+      userBudget: 15,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    expect(content).toContain('<conversation_summary>')
+
+    // Recent messages should be in the summary
+    expect(content).toContain('Second request about feature B')
+    expect(content).toContain('Working on feature B')
+
+    // Older messages should be dropped
+    expect(content).not.toContain('First request about feature A')
+    expect(content).not.toContain('Working on feature A')
+  })
+
+  test('excludes STEP_PROMPT tagged messages from budget calculation', () => {
+    const largeStepPrompt = 'S'.repeat(900) // ~300 tokens
+    const messages: Message[] = [
+      createMessage('user', 'User request'),
+      createMessage('assistant', 'Assistant response'),
+      {
+        role: 'user',
+        content: [{ type: 'text', text: largeStepPrompt }],
+        tags: ['STEP_PROMPT'],
+      },
+      createMessage('user', 'Recent question'),
+      createMessage('assistant', 'Recent answer'),
+    ]
+
+    // Budget is small but the STEP_PROMPT should NOT count against it,
+    // so both real user messages and both assistant messages should fit
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 200,
+      userBudget: 200,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    // Both real messages should be in the summary
+    expect(content).toContain('User request')
+    expect(content).toContain('Assistant response')
+    expect(content).toContain('Recent question')
+    expect(content).toContain('Recent answer')
+    // STEP_PROMPT content should NOT be in the summary
+    expect(content).not.toContain(largeStepPrompt)
+  })
+
+  test('excludes SUBAGENT_SPAWN tagged messages from budget calculation', () => {
+    const messages: Message[] = [
+      createMessage('user', 'User request'),
+      createMessage('assistant', 'First response'),
+      {
+        role: 'assistant',
+        content: [{ type: 'text', text: 'A'.repeat(900) }],
+        tags: ['SUBAGENT_SPAWN'],
+      },
+      createMessage('user', 'Follow up'),
+      createMessage('assistant', 'Second response'),
+    ]
+
+    // Budget is small but SUBAGENT_SPAWN should NOT count against it
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 200,
+      userBudget: 200,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    expect(content).toContain('User request')
+    expect(content).toContain('First response')
+    expect(content).toContain('Follow up')
+    expect(content).toContain('Second response')
+  })
+
+  test('charges old summary entries against their correct budgets', () => {
+    // Previous summary with a large [USER] entry that exceeds user budget
+    const largeUserContent = 'X'.repeat(900)
+    const previousSummary: Message = {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: `<conversation_summary>\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\n${largeUserContent}\n\n---\n\n[ASSISTANT]\nOld assistant response\n</conversation_summary>`,
+        },
+      ],
+    }
+
+    const messages: Message[] = [
+      previousSummary,
+      createMessage('user', 'After summary request'),
+      createMessage('assistant', 'After summary response'),
+    ]
+
+    // User budget is small — the large [USER] entry from the old summary
+    // should be dropped because it exceeds the user budget.
+    // The [ASSISTANT] entry from the old summary charges against assistant budget.
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 5000,
+      userBudget: 50,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    // Recent messages should be in the summary
+    expect(content).toContain('After summary request')
+    expect(content).toContain('After summary response')
+    // The old [ASSISTANT] entry fits the assistant budget and is after the cutoff
+    expect(content).toContain('Old assistant response')
+    // The large old [USER] entry should be dropped (exceeded user budget)
+    expect(content).not.toContain(largeUserContent)
+  })
+
+  test('drops old summary entries individually based on budget walk', () => {
+    // Previous summary with identifiable oldest and middle entries
+    const previousSummary: Message = {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: `<conversation_summary>\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLDEST_USER_ENTRY\n\n---\n\n[ASSISTANT]\nOLDEST_ASSISTANT_ENTRY\n\n---\n\n[USER]\nMIDDLE_USER_ENTRY\n\n---\n\n[ASSISTANT]\nMIDDLE_ASSISTANT_ENTRY\n</conversation_summary>`,
+        },
+      ],
+    }
+
+    const messages: Message[] = [
+      previousSummary,
+      createMessage('user', 'Recent request'),
+      createMessage('assistant', 'Recent response'),
+    ]
+
+    // Budget large enough for middle + recent entries but not oldest
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 25,
+      userBudget: 25,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    // Middle and recent entries should survive
+    expect(content).toContain('MIDDLE_USER_ENTRY')
+    expect(content).toContain('MIDDLE_ASSISTANT_ENTRY')
+    expect(content).toContain('Recent request')
+    expect(content).toContain('Recent response')
+    // Oldest entries should be dropped
+    expect(content).not.toContain('OLDEST_USER_ENTRY')
+    expect(content).not.toContain('OLDEST_ASSISTANT_ENTRY')
+  })
+
+  test('handles complex scenario with long messages of all types and previous summary', () => {
+    // Previous summary with 4 identifiable entries
+    const previousSummary: Message = {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: `<conversation_summary>\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLD_USER_REQUEST_1: The user asked about setting up authentication with OAuth2 and JWT tokens for the API.\n\n---\n\n[ASSISTANT]\nOLD_ASSISTANT_RESPONSE_1: Explained OAuth2 flow and implemented JWT token generation.\nTools: Read files: src/auth.ts, src/middleware.ts; Edited file: src/auth.ts\n\n---\n\n[USER]\nOLD_USER_REQUEST_2: Asked for unit tests for the auth module.\n\n---\n\n[ASSISTANT]\nOLD_ASSISTANT_RESPONSE_2: Created comprehensive test suite for authentication.\nTools: Wrote file: src/__tests__/auth.test.ts\n</conversation_summary>`,
+        },
+      ],
+    }
+
+    // Long user message (~45k chars, exceeds USER_MESSAGE_LIMIT of 13k tokens = 39k chars)
+    // Middle marker placed ~85% through so it falls in the truncated gap
+    // (past the 80% prefix but before the 20% suffix)
+    const longUserMessage = 'LONG_USER_START_' + 'Here is a detailed specification for the new feature. '.repeat(650) + '_LONG_USER_MIDDLE_MARKER_' + 'Here is a detailed specification for the new feature. '.repeat(150)
+
+    // Long assistant message with text (~8k chars, exceeds ASSISTANT_MESSAGE_LIMIT of 1.3k tokens = 3.9k chars)
+    // plus multiple tool calls. Middle marker placed ~60% through so it falls in the truncated gap.
+    const longAssistantText = 'LONG_ASSISTANT_START_' + 'I will implement this step by step, starting with the data model changes. '.repeat(60) + '_LONG_ASST_MIDDLE_MARKER_' + 'I will implement this step by step, starting with the data model changes. '.repeat(40)
+    const assistantWithToolCalls: Message = {
+      role: 'assistant',
+      content: [
+        { type: 'text', text: longAssistantText },
+        {
+          type: 'tool-call',
+          toolCallId: 'call-1',
+          toolName: 'read_files',
+          input: { paths: ['src/model.ts', 'src/service.ts'] },
+        },
+        {
+          type: 'tool-call',
+          toolCallId: 'call-2',
+          toolName: 'str_replace',
+          input: { path: 'src/model.ts', replacements: [] },
+        },
+        {
+          type: 'tool-call',
+          toolCallId: 'call-3',
+          toolName: 'spawn_agents',
+          input: {
+            agents: [
+              { agent_type: 'editor' },
+              { agent_type: 'editor' },
+              { agent_type: 'editor' },
+              { agent_type: 'editor' },
+              { agent_type: 'editor' },
+            ],
+          },
+        },
+      ],
+    }
+
+    // str_replace result with a large diff (~3k chars, exceeds 2k truncation limit)
+    const largeDiff = 'DIFF_START_MARKER_' + '+added line\n'.repeat(250) + '_DIFF_END_MARKER'
+
+    // spawn_agents result with 5 non-blacklisted agents producing large outputs
+    // Each ~4k chars, total ~20k, exceeds TOOL_ENTRY_LIMIT of 5k tokens = 15k chars
+    const largeAgentResults = Array.from({ length: 5 }, (_, i) => ({
+      agentType: 'editor',
+      value: {
+        type: 'string',
+        value: `AGENT_${i}_OUTPUT_START_` + 'Implementation details. '.repeat(160) + `_AGENT_${i}_OUTPUT_END`,
+      },
+    }))
+
+    const messages: Message[] = [
+      previousSummary,
+      createMessage('user', longUserMessage),
+      assistantWithToolCalls,
+      createToolResultMessage('call-1', 'read_files', { content: 'file data' } as JSONValue),
+      createToolResultMessage('call-2', 'str_replace', { file: 'src/model.ts', message: 'Updated', unifiedDiff: largeDiff }),
+      {
+        role: 'tool',
+        toolCallId: 'call-3',
+        toolName: 'spawn_agents',
+        content: [{ type: 'json', value: largeAgentResults }],
+      } as ToolMessage,
+      createMessage('user', 'FINAL_USER_REQUEST: Now run the tests'),
+      createMessage('assistant', 'FINAL_ASSISTANT_RESPONSE: Running tests now'),
+    ]
+
+    // Use default budgets — everything should fit
+    const results = runHandleSteps(messages, 250000, 200000)
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+
+    // === Structure checks ===
+    expect(content).toContain('<conversation_summary>')
+    expect(content).toContain('</conversation_summary>')
+    const summaryTagCount = (content.match(/<conversation_summary>/g) || []).length
+    expect(summaryTagCount).toBe(1)
+
+    // === Previous summary entries preserved ===
+    expect(content).toContain('OLD_USER_REQUEST_1')
+    expect(content).toContain('OLD_ASSISTANT_RESPONSE_1')
+    expect(content).toContain('OLD_USER_REQUEST_2')
+    expect(content).toContain('OLD_ASSISTANT_RESPONSE_2')
+
+    // === Long user message: truncated with 80/20 split ===
+    expect(content).toContain('LONG_USER_START_')
+    expect(content).not.toContain('_LONG_USER_MIDDLE_MARKER_') // Middle marker falls in truncated gap
+    expect(content).toContain('[...truncated')
+
+    // === Long assistant text: truncated ===
+    expect(content).toContain('LONG_ASSISTANT_START_')
+    expect(content).not.toContain('_LONG_ASST_MIDDLE_MARKER_') // Middle marker falls in truncated gap
+
+    // === Tool call summaries present ===
+    expect(content).toContain('Read files: src/model.ts, src/service.ts')
+    expect(content).toContain('Edited file: src/model.ts')
+    expect(content).toContain('Spawned agents:')
+
+    // === str_replace result: present but truncated at 2k chars ===
+    expect(content).toContain('[EDIT RESULT: str_replace]')
+    expect(content).toContain('DIFF_START_MARKER_')
+    expect(content).not.toContain('_DIFF_END_MARKER') // Truncated by 2k result limit
+
+    // === spawn_agents tool entry: truncated by TOOL_ENTRY_LIMIT ===
+    expect(content).toContain('AGENT_0_OUTPUT_START_') // First agent's start in 80% prefix
+    expect(content).not.toContain('AGENT_4_OUTPUT_START_') // Last agent's start falls in truncated gap
+
+    // === Final messages present ===
+    expect(content).toContain('FINAL_USER_REQUEST')
+    expect(content).toContain('FINAL_ASSISTANT_RESPONSE')
+
+    // === Entries are separated by --- ===
+    expect(content).toContain('---')
+  })
+
+  test('with tight budgets, drops old summary entries while keeping truncated new entries', () => {
+    // Same setup but with tight budgets: old summary entries get dropped,
+    // new entries survive (individually truncated)
+    const previousSummary: Message = {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: `<conversation_summary>\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLD_DROPPED_USER: ${'X'.repeat(600)}\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT: ${'Y'.repeat(600)}\n\n---\n\n[USER]\nOLD_DROPPED_USER_2: Asked about deployment\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT_2: Explained deployment process\n</conversation_summary>`,
+        },
+      ],
+    }
+
+    // Long user message (~12k chars, under truncation limit but uses significant budget)
+    const longUserMessage = 'SURVIVED_USER_START_' + 'Feature request details. '.repeat(400) + '_SURVIVED_USER_END'
+
+    // Assistant with tool calls
+    const assistantMsg: Message = {
+      role: 'assistant',
+      content: [
+        { type: 'text', text: 'SURVIVED_ASSISTANT: Working on it' },
+        {
+          type: 'tool-call',
+          toolCallId: 'call-1',
+          toolName: 'str_replace',
+          input: { path: 'src/app.ts', replacements: [] },
+        },
+      ],
+    }
+
+    // Tool result with a diff
+    const toolResult = createToolResultMessage('call-1', 'str_replace', {
+      file: 'src/app.ts',
+      message: 'Updated file',
+      unifiedDiff: '--- a/src/app.ts\n+++ b/src/app.ts\n@@ -1 +1 @@\n-old\n+SURVIVED_DIFF_CONTENT',
+    })
+
+    const messages: Message[] = [
+      previousSummary,
+      createMessage('user', longUserMessage),
+      assistantMsg,
+      toolResult,
+      createMessage('user', 'SURVIVED_FINAL_USER'),
+      createMessage('assistant', 'SURVIVED_FINAL_ASSISTANT'),
+    ]
+
+    // Tight budgets: enough for new entries but not old summary entries
+    // New assistant entries: ~25 (assistant text+tool) + ~56 (edit result JSON) + ~13 (final) = ~94 tokens
+    // Old assistant entries: ~20 for OLD_DROPPED_ASSISTANT_2 would push over budget of 100
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 100,
+      userBudget: 4200,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+
+    // === New entries survived ===
+    expect(content).toContain('SURVIVED_USER_START_')
+    expect(content).toContain('SURVIVED_ASSISTANT')
+    expect(content).toContain('SURVIVED_DIFF_CONTENT')
+    expect(content).toContain('SURVIVED_FINAL_USER')
+    expect(content).toContain('SURVIVED_FINAL_ASSISTANT')
+
+    // === Old summary entries dropped by budget walk ===
+    expect(content).not.toContain('OLD_DROPPED_USER:')
+    expect(content).not.toContain('OLD_DROPPED_ASSISTANT:')
+    expect(content).not.toContain('OLD_DROPPED_USER_2:')
+    expect(content).not.toContain('OLD_DROPPED_ASSISTANT_2:')
+  })
+
+  test('fully includes conversation summary when it fits within user budget', () => {
+    const previousSummary: Message = {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: `<conversation_summary>\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOld request about feature A\n\n---\n\n[ASSISTANT]\nWorked on feature A\n</conversation_summary>`,
+        },
+      ],
+    }
+
+    const messages: Message[] = [
+      previousSummary,
+      createMessage('user', 'New request about feature B'),
+      createMessage('assistant', 'Working on feature B'),
+    ]
+
+    // Large budget — everything fits
+    const results = runHandleSteps(messages, 250000, 200000, {
+      assistantToolBudget: 20000,
+      userBudget: 50000,
+    })
+
+    const resultMessages = results[0].input.messages
+    expect(resultMessages).toHaveLength(1)
+
+    const content = (resultMessages[0].content[0] as { text: string }).text
+    // Previous summary content should be fully included
+    expect(content).toContain('Old request about feature A')
+    expect(content).toContain('Worked on feature A')
+    // New messages should also be included
+    expect(content).toContain('New request about feature B')
+    expect(content).toContain('Working on feature B')
+  })
+})
diff --git a/agents/browser-use/browser-use.ts b/agents/browser-use/browser-use.ts
index 7b11db0f89..1536e3e361 100644
--- a/agents/browser-use/browser-use.ts
+++ b/agents/browser-use/browser-use.ts
@@ -127,7 +127,7 @@ const definition: AgentDefinition = {
   mcpServers: {
     'chrome-devtools': {
       command: 'npx',
-      args: ['-y', 'chrome-devtools-mcp@latest', '--headless'],
+      args: ['-y', 'chrome-devtools-mcp@latest', '--headless', '--isolated'],
     },
   },
 
diff --git a/agents/context-pruner.ts b/agents/context-pruner.ts
index bbf495baa1..55b1dd6bf7 100644
--- a/agents/context-pruner.ts
+++ b/agents/context-pruner.ts
@@ -10,259 +10,6 @@ import type {
   UserMessage,
 } from './types/util-types'
 
-// =============================================================================
-// Helper Functions (exported for testing)
-// =============================================================================
-
-/**
- * Truncates long text with 80% from the beginning and 20% from the end.
- * Preserves context from both ends of the text while indicating what was removed.
- *
- * @param text - The text to truncate
- * @param limit - Maximum character length
- * @returns Truncated text with notice of how many chars were removed
- */
-export function truncateLongText(text: string, limit: number): string {
-  if (text.length <= limit) {
-    return text
-  }
-  const availableChars = limit - 50 // 50 chars for the truncation notice
-  const prefixLength = Math.floor(availableChars * 0.8)
-  const suffixLength = availableChars - prefixLength
-  const prefix = text.slice(0, prefixLength)
-  const suffix = text.slice(-suffixLength)
-  const truncatedChars = text.length - prefixLength - suffixLength
-  return `${prefix}\n\n[...truncated ${truncatedChars} chars...]\n\n${suffix}`
-}
-
-/**
- * Estimates token count from a JSON-serializable object.
- * Uses a simple heuristic of ~3 characters per token.
- *
- * @param obj - The object to estimate tokens for
- * @returns Estimated token count
- */
-export function estimateTokens(obj: unknown): number {
-  return Math.ceil(JSON.stringify(obj).length / 3)
-}
-
-/**
- * Extracts text content from a message, handling both string and array formats.
- *
- * @param message - The message to extract text from
- * @returns Combined text content from the message
- */
-export function getTextContent(message: Message): string {
-  if (typeof message.content === 'string') {
-    return message.content
-  }
-  if (Array.isArray(message.content)) {
-    return message.content
-      .filter(
-        (part: Record<string, unknown>) =>
-          part.type === 'text' && typeof part.text === 'string',
-      )
-      .map((part: Record<string, unknown>) => part.text as string)
-      .join('\n')
-  }
-  return ''
-}
-
-/**
- * Summarizes a tool call into a human-readable description.
- * Handles various tool types with appropriate formatting.
- *
- * @param toolName - The name of the tool
- * @param input - The tool's input parameters
- * @returns A concise summary of the tool call
- */
-export function summarizeToolCall(
-  toolName: string,
-  input: Record<string, unknown>,
-): string {
-  switch (toolName) {
-    case 'read_files': {
-      const paths = input.paths as string[] | undefined
-      if (paths && paths.length > 0) {
-        return `Read files: ${paths.join(', ')}`
-      }
-      return 'Read files'
-    }
-    case 'write_file': {
-      const path = input.path as string | undefined
-      return path ? `Wrote file: ${path}` : 'Wrote file'
-    }
-    case 'str_replace': {
-      const path = input.path as string | undefined
-      return path ? `Edited file: ${path}` : 'Edited file'
-    }
-    case 'propose_write_file': {
-      const path = input.path as string | undefined
-      return path ? `Proposed write to: ${path}` : 'Proposed file write'
-    }
-    case 'propose_str_replace': {
-      const path = input.path as string | undefined
-      return path ? `Proposed edit to: ${path}` : 'Proposed file edit'
-    }
-    case 'read_subtree': {
-      const paths = input.paths as string[] | undefined
-      if (paths && paths.length > 0) {
-        return `Read subtree: ${paths.join(', ')}`
-      }
-      return 'Read subtree'
-    }
-    case 'code_search': {
-      const pattern = input.pattern as string | undefined
-      const flags = input.flags as string | undefined
-      if (pattern && flags) {
-        return `Code search: "${pattern}" (${flags})`
-      }
-      return pattern ? `Code search: "${pattern}"` : 'Code search'
-    }
-    case 'glob': {
-      const patterns = input.patterns as
-        | Array<{ pattern: string }>
-        | undefined
-      if (patterns && patterns.length > 0) {
-        return `Glob: ${patterns.map((p) => p.pattern).join(', ')}`
-      }
-      return 'Glob search'
-    }
-    case 'list_directory': {
-      const directories = input.directories as
-        | Array<{ path: string }>
-        | undefined
-      if (directories && directories.length > 0) {
-        return `Listed dirs: ${directories.map((d) => d.path).join(', ')}`
-      }
-      return 'Listed directory'
-    }
-    case 'find_files': {
-      const pattern = input.pattern as string | undefined
-      return pattern ? `Find files: "${pattern}"` : 'Find files'
-    }
-    case 'run_terminal_command': {
-      const command = input.command as string | undefined
-      if (command) {
-        const shortCmd =
-          command.length > 50 ? command.slice(0, 50) + '...' : command
-        return `Ran command: ${shortCmd}`
-      }
-      return 'Ran terminal command'
-    }
-    case 'spawn_agents':
-    case 'spawn_agent_inline': {
-      const agents = input.agents as
-        | Array<{
-            agent_type: string
-            prompt?: string
-            params?: Record<string, unknown>
-          }>
-        | undefined
-      const agentType = input.agent_type as string | undefined
-      const prompt = input.prompt as string | undefined
-      const agentParams = input.params as
-        | Record<string, unknown>
-        | undefined
-
-      if (agents && agents.length > 0) {
-        const agentDetails = agents.map((a) => {
-          let detail = a.agent_type
-          const extras: string[] = []
-          if (a.prompt) {
-            const truncatedPrompt =
-              a.prompt.length > 1000
-                ? a.prompt.slice(0, 1000) + '...'
-                : a.prompt
-            extras.push(`prompt: "${truncatedPrompt}"`)
-          }
-          if (a.params && Object.keys(a.params).length > 0) {
-            const paramsStr = JSON.stringify(a.params)
-            const truncatedParams =
-              paramsStr.length > 1000
-                ? paramsStr.slice(0, 1000) + '...'
-                : paramsStr
-            extras.push(`params: ${truncatedParams}`)
-          }
-          if (extras.length > 0) {
-            detail += ` (${extras.join(', ')})`
-          }
-          return detail
-        })
-        return `Spawned agents:\n${agentDetails.map((d) => `- ${d}`).join('\n')}`
-      }
-      if (agentType) {
-        const extras: string[] = []
-        if (prompt) {
-          const truncatedPrompt =
-            prompt.length > 1000 ? prompt.slice(0, 1000) + '...' : prompt
-          extras.push(`prompt: "${truncatedPrompt}"`)
-        }
-        if (agentParams && Object.keys(agentParams).length > 0) {
-          const paramsStr = JSON.stringify(agentParams)
-          const truncatedParams =
-            paramsStr.length > 1000
-              ? paramsStr.slice(0, 1000) + '...'
-              : paramsStr
-          extras.push(`params: ${truncatedParams}`)
-        }
-        if (extras.length > 0) {
-          return `Spawned agent: ${agentType} (${extras.join(', ')})`
-        }
-        return `Spawned agent: ${agentType}`
-      }
-      return 'Spawned agent(s)'
-    }
-    case 'write_todos': {
-      const todos = input.todos as
-        | Array<{ task: string; completed: boolean }>
-        | undefined
-      if (todos) {
-        const completed = todos.filter((t) => t.completed).length
-        const incomplete = todos.filter((t) => !t.completed)
-        if (incomplete.length === 0) {
-          return `Todos: ${completed}/${todos.length} complete (all done!)`
-        }
-        const remainingTasks = incomplete
-          .map((t) => `- ${t.task}`)
-          .join('\n')
-        return `Todos: ${completed}/${todos.length} complete. Remaining:\n${remainingTasks}`
-      }
-      return 'Updated todos'
-    }
-    case 'ask_user': {
-      const questions = input.questions as
-        | Array<{ question: string }>
-        | undefined
-      if (questions && questions.length > 0) {
-        const questionTexts = questions.map((q) => q.question).join('; ')
-        const truncated =
-          questionTexts.length > 200
-            ? questionTexts.slice(0, 200) + '...'
-            : questionTexts
-        return `Asked user: ${truncated}`
-      }
-      return 'Asked user question'
-    }
-    case 'suggest_followups':
-      return 'Suggested followups'
-    case 'web_search': {
-      const query = input.query as string | undefined
-      return query ? `Web search: "${query}"` : 'Web search'
-    }
-    case 'read_docs': {
-      const query = input.query as string | undefined
-      return query ? `Read docs: "${query}"` : 'Read docs'
-    }
-    case 'set_output':
-      return 'Set output'
-    case 'set_messages':
-      return 'Set messages'
-    default:
-      return `Used tool: ${toolName}`
-  }
-}
-
 const definition: AgentDefinition = {
   id: 'context-pruner',
   publisher,
@@ -278,6 +25,12 @@ const definition: AgentDefinition = {
         maxContextLength: {
           type: 'number',
         },
+        assistantToolBudget: {
+          type: 'number',
+        },
+        userBudget: {
+          type: 'number',
+        },
       },
       required: [],
     },
@@ -291,9 +44,6 @@ const definition: AgentDefinition = {
     // Constants (must be inside handleSteps since it's serialized to a string)
     // =============================================================================
 
-    /** Target: summarized messages should be at most 10% of max context */
-    const TARGET_SUMMARY_FACTOR = 0.1
-
     /** Agent IDs whose output should be excluded from spawn_agents results */
     const SPAWN_AGENTS_OUTPUT_BLACKLIST = [
       'file-picker',
@@ -302,11 +52,27 @@ const definition: AgentDefinition = {
       'basher',
       'code-reviewer',
       'code-reviewer-multi-prompt',
+      'librarian',
+      'tmux-cli',
+      'browser-use',
     ]
 
-    /** Limits for truncating long messages (chars) */
-    const USER_MESSAGE_LIMIT = 15000
-    const ASSISTANT_MESSAGE_LIMIT = 4000
+    /** Limits for truncating long messages in the summary (estimated tokens) */
+    const USER_MESSAGE_LIMIT = 13_000
+    const ASSISTANT_MESSAGE_LIMIT = 1_300
+    const TOOL_ENTRY_LIMIT = 5_000
+
+    /** Approximate characters per token (matches estimateTokens heuristic) */
+    const CHARS_PER_TOKEN = 3
+
+    /** Token budget for assistant + tool content in the conversation summary */
+    const ASSISTANT_TOOL_BUDGET = 20_000
+
+    /** Token budget for user content in the conversation summary */
+    const USER_BUDGET = 50_000
+
+    /** Fudge factor for token count threshold to trigger pruning earlier */
+    const TOKEN_COUNT_FUDGE_FACTOR = 1_000
 
     /** Prompt cache expiry time (Anthropic caches for 5 minutes) */
     const CACHE_EXPIRY_MS = 5 * 60 * 1000
@@ -315,8 +81,6 @@ const definition: AgentDefinition = {
     const SUMMARY_HEADER =
       'This is a summary of the conversation so far. The original messages have been condensed to save context space.'
 
-    /** Fudge factor for token count threshold to trigger pruning earlier */
-    const TOKEN_COUNT_FUDGE_FACTOR = 1000
 
     // =============================================================================
     // Helper Functions (must be inside handleSteps since it's serialized to a string)
@@ -338,13 +102,6 @@ const definition: AgentDefinition = {
       return `${prefix}\n\n[...truncated ${truncatedChars} chars...]\n\n${suffix}`
     }
 
-    /**
-     * Estimates token count from a JSON-serializable object.
-     */
-    function estimateTokens(obj: unknown): number {
-      return Math.ceil(JSON.stringify(obj).length / 3)
-    }
-
     /**
      * Extracts text content from a message.
      */
@@ -411,22 +168,12 @@ const definition: AgentDefinition = {
           return pattern ? `Code search: "${pattern}"` : 'Code search'
         }
         case 'glob': {
-          const patterns = input.patterns as
-            | Array<{ pattern: string }>
-            | undefined
-          if (patterns && patterns.length > 0) {
-            return `Glob: ${patterns.map((p) => p.pattern).join(', ')}`
-          }
-          return 'Glob search'
+          const pattern = input.pattern as string | undefined
+          return pattern ? `Glob: ${pattern}` : 'Glob search'
         }
         case 'list_directory': {
-          const directories = input.directories as
-            | Array<{ path: string }>
-            | undefined
-          if (directories && directories.length > 0) {
-            return `Listed dirs: ${directories.map((d) => d.path).join(', ')}`
-          }
-          return 'Listed directory'
+          const path = input.path as string | undefined
+          return path ? `Listed dir: ${path}` : 'Listed directory'
         }
         case 'find_files': {
           const pattern = input.pattern as string | undefined
@@ -627,69 +374,80 @@ const definition: AgentDefinition = {
     }
 
     // === SUMMARIZATION STRATEGY ===
-    // Convert entire conversation to a single summarized user message
-    // If there's already a summary from a previous compaction, extract and preserve it
+    // 1. Summarize ALL messages (apply transformations: truncation, tool summaries, etc.)
+    // 2. Walk backwards through summarized parts to apply token budgets
+    // 3. Older summarized parts beyond the budgets are dropped
 
-    // Check for existing conversation summary and extract its content
-    let previousSummary = ''
-    for (const message of currentMessages) {
-      if (message.role === 'user' && Array.isArray(message.content)) {
-        for (const part of message.content) {
-          if (part.type === 'text' && typeof part.text === 'string') {
-            const text = part.text as string
-            const summaryMatch = text.match(
-              /<conversation_summary>([\s\S]*?)<\/conversation_summary>/,
-            )
-            if (summaryMatch) {
-              let summaryContent = summaryMatch[1].trim()
-              // Remove the standard header if present
-              if (summaryContent.startsWith(SUMMARY_HEADER)) {
-                summaryContent = summaryContent
-                  .slice(SUMMARY_HEADER.length)
-                  .trim()
-              }
-              // Remove [PREVIOUS SUMMARY] prefix if present (from earlier compaction)
-              // to avoid nested markers
-              if (summaryContent.startsWith('[PREVIOUS SUMMARY]')) {
-                summaryContent = summaryContent
-                  .slice('[PREVIOUS SUMMARY]'.length)
-                  .trim()
-              }
-              previousSummary = summaryContent
-            }
-          }
-        }
+    const assistantToolBudget: number = params?.assistantToolBudget ?? ASSISTANT_TOOL_BUDGET
+    const userBudget: number = params?.userBudget ?? USER_BUDGET
+
+    function shouldExcludeMessage(message: Message): boolean {
+      if (message.tags?.includes('INSTRUCTIONS_PROMPT')) return true
+      if (message.tags?.includes('STEP_PROMPT')) return true
+      if (message.tags?.includes('SUBAGENT_SPAWN')) return true
+      return false
+    }
+
+    function isConversationSummary(message: Message): boolean {
+      if (message.role !== 'user') return false
+      return getTextContent(message).includes('<conversation_summary>')
+    }
+
+    function extractSummaryContent(message: Message): string {
+      const text = getTextContent(message)
+      const match = text.match(
+        /<conversation_summary>([\s\S]*?)<\/conversation_summary>/,
+      )
+      if (!match) return ''
+      let content = match[1].trim()
+      if (content.startsWith(SUMMARY_HEADER)) {
+        content = content.slice(SUMMARY_HEADER.length).trim()
       }
+      return content
     }
 
-    // Filter out messages that are previous summaries or have special tags to exclude
-    const messagesWithoutOldSummaries = currentMessages.filter((message) => {
-      // Exclude messages with special tags that shouldn't be in the summary
-      if (message.tags?.includes('INSTRUCTIONS_PROMPT')) return false
-      if (message.tags?.includes('STEP_PROMPT')) return false
-      if (message.tags?.includes('SUBAGENT_SPAWN')) return false
-
-      // Exclude previous conversation summaries
-      if (message.role === 'user' && Array.isArray(message.content)) {
-        for (const part of message.content) {
-          if (part.type === 'text' && typeof part.text === 'string') {
-            if ((part.text as string).includes('<conversation_summary>')) {
-              return false
-            }
-          }
+    /**
+     * Parses a previous summary text blob into role-tagged entries.
+     * Splits on the --- separator and determines each chunk's role
+     * based on its prefix marker.
+     */
+    function parseSummaryIntoEntries(
+      summaryText: string,
+    ): Array<{ role: 'user' | 'assistant_tool'; parts: string[] }> {
+      if (!summaryText.trim()) return []
+
+      const separator = '\n\n---\n\n'
+      const chunks = summaryText.split(separator).filter((c) => c.trim())
+
+      return chunks.map((chunk) => {
+        const trimmed = chunk.trim()
+        const isUser =
+          trimmed.startsWith('[USER]\n') ||
+          trimmed.startsWith('[USER] [with image')
+        return {
+          role: isUser ? ('user' as const) : ('assistant_tool' as const),
+          parts: [trimmed],
         }
+      })
+    }
+
+    // Extract previous summary content from all messages
+    let previousSummaryContent = ''
+    for (const message of currentMessages) {
+      if (isConversationSummary(message)) {
+        previousSummaryContent = extractSummaryContent(message)
       }
-      return true
-    })
+    }
 
-    // Build the summary
-    const summaryParts: string[] = []
+    // Filter out excluded and conversation summary messages for summarization
+    const messagesToSummarize = currentMessages.filter(
+      (message) => !shouldExcludeMessage(message) && !isConversationSummary(message),
+    )
 
     // Find the last user message with images to preserve in the final output
-    // We preserve the most recent user's images since they're likely the most relevant
     let lastUserImageParts: Array<Record<string, unknown>> = []
-    for (let i = messagesWithoutOldSummaries.length - 1; i >= 0; i--) {
-      const msg = messagesWithoutOldSummaries[i]
+    for (let i = messagesToSummarize.length - 1; i >= 0; i--) {
+      const msg = messagesToSummarize[i]
       if (msg.role === 'user' && Array.isArray(msg.content)) {
         const imageParts = msg.content.filter(
           (part: Record<string, unknown>) =>
@@ -702,18 +460,14 @@ const definition: AgentDefinition = {
       }
     }
 
-    // If there was a previous summary, include it first (no marker needed, already chronological)
-    if (previousSummary) {
-      summaryParts.push(previousSummary)
-    }
+    // Phase 1: Summarize ALL messages into tagged entries
+    const summarizedEntries: Array<{ role: 'user' | 'assistant_tool'; parts: string[] }> = []
 
-    for (const message of messagesWithoutOldSummaries) {
+    for (const message of messagesToSummarize) {
       if (message.role === 'user') {
         let text = getTextContent(message).trim()
         if (text) {
-          // Truncate very long user messages (80% prefix, 20% suffix)
-          text = truncateLongText(text, USER_MESSAGE_LIMIT)
-          // Check for images in the message
+          text = truncateLongText(text, USER_MESSAGE_LIMIT * CHARS_PER_TOKEN)
           let hasImages = false
           if (Array.isArray(message.content)) {
             hasImages = message.content.some(
@@ -722,7 +476,10 @@ const definition: AgentDefinition = {
             )
           }
           const imageNote = hasImages ? ' [with image(s)]' : ''
-          summaryParts.push(`[USER]${imageNote}\n${text}`)
+          summarizedEntries.push({
+            role: 'user',
+            parts: [`[USER]${imageNote}\n${text}`],
+          })
         }
       } else if (message.role === 'assistant') {
         const textParts: string[] = []
@@ -731,7 +488,6 @@ const definition: AgentDefinition = {
         if (Array.isArray(message.content)) {
           for (const part of message.content) {
             if (part.type === 'text' && typeof part.text === 'string') {
-              // Remove <think> tags and their contents before summarizing
               const textWithoutThinkTags = (part.text as string)
                 .replace(/<think>[\s\S]*?<\/think>/g, '')
                 .trim()
@@ -748,9 +504,8 @@ const definition: AgentDefinition = {
 
         const parts: string[] = []
         if (textParts.length > 0) {
-          // Truncate very long assistant text (80% prefix, 20% suffix)
           let combinedText = textParts.join('\n')
-          combinedText = truncateLongText(combinedText, ASSISTANT_MESSAGE_LIMIT)
+          combinedText = truncateLongText(combinedText, ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN)
           parts.push(combinedText)
         }
         if (toolSummaries.length > 0) {
@@ -758,44 +513,43 @@ const definition: AgentDefinition = {
         }
 
         if (parts.length > 0) {
-          summaryParts.push(`[ASSISTANT]\n${parts.join('\n')}`)
+          summarizedEntries.push({
+            role: 'assistant_tool',
+            parts: [`[ASSISTANT]\n${parts.join('\n')}`],
+          })
         }
       } else if (message.role === 'tool') {
-        // Tool results are already captured via the tool-call summaries
-        // But we capture errors, terminal exit codes, and ask_user answers
         const toolMessage = message as ToolMessage
+        const entryParts: string[] = []
+
         if (Array.isArray(toolMessage.content)) {
           for (const part of toolMessage.content) {
             if (part.type === 'json' && part.value) {
               const value = part.value as Record<string, unknown>
 
-              // Capture errors
               if (value.errorMessage || value.error) {
                 let errorText = String(value.errorMessage || value.error)
-                // Truncate long error messages to 100 chars
                 if (errorText.length > 100) {
                   errorText = errorText.slice(0, 100) + '...'
                 }
-                summaryParts.push(
+                entryParts.push(
                   `[TOOL ERROR: ${toolMessage.toolName}] ${errorText}`,
                 )
               }
 
-              // Capture terminal command exit codes (non-zero = failure)
               if (
                 toolMessage.toolName === 'run_terminal_command' &&
                 'exitCode' in value
               ) {
                 const exitCode = value.exitCode as number
                 if (exitCode !== 0) {
-                  summaryParts.push(`[COMMAND FAILED] Exit code: ${exitCode}`)
+                  entryParts.push(`[COMMAND FAILED] Exit code: ${exitCode}`)
                 }
               }
 
-              // Capture ask_user answers or skipped
               if (toolMessage.toolName === 'ask_user') {
                 if (value.skipped) {
-                  summaryParts.push('[USER SKIPPED QUESTION]')
+                  entryParts.push('[USER SKIPPED QUESTION]')
                 } else if ('answers' in value) {
                   const answers = value.answers as
                     | Array<{
@@ -814,43 +568,34 @@ const definition: AgentDefinition = {
                         return '(no answer)'
                       })
                       .join('; ')
-                    // Truncate long answers to 10,000 chars
                     const truncated =
                       answerTexts.length > 10_000
                         ? answerTexts.slice(0, 10_000) + '...'
                         : answerTexts
-                    summaryParts.push(`[USER ANSWERED] ${truncated}`)
+                    entryParts.push(`[USER ANSWERED] ${truncated}`)
                   }
                 }
               }
 
-              // Capture str_replace results (diff of changes made)
-              if (toolMessage.toolName === 'str_replace') {
-                const diff = value.diff as string | undefined
-                if (diff) {
-                  // Truncate long diffs to 2000 chars
-                  const truncatedDiff =
-                    diff.length > 2000 ? diff.slice(0, 2000) + '...' : diff
-                  summaryParts.push(`[EDIT RESULT]\n${truncatedDiff}`)
-                }
-              }
-
-              // Capture write_file results (diff of changes made)
-              if (toolMessage.toolName === 'write_file') {
-                const diff = value.diff as string | undefined
-                if (diff) {
-                  // Truncate long diffs to 2000 chars
-                  const truncatedDiff =
-                    diff.length > 2000 ? diff.slice(0, 2000) + '...' : diff
-                  summaryParts.push(`[WRITE RESULT]\n${truncatedDiff}`)
-                }
+              if (
+                toolMessage.toolName === 'str_replace' ||
+                toolMessage.toolName === 'propose_str_replace' ||
+                toolMessage.toolName === 'write_file' ||
+                toolMessage.toolName === 'propose_write_file'
+              ) {
+                const resultStr = JSON.stringify(value)
+                const truncatedResult =
+                  resultStr.length > 2000
+                    ? resultStr.slice(0, 2000) + '...'
+                    : resultStr
+                entryParts.push(
+                  `[EDIT RESULT: ${toolMessage.toolName}]\n${truncatedResult}`,
+                )
               }
             }
           }
         }
 
-        // Capture spawn_agents results (excluding blacklisted agents)
-        // The tool result value is an array of agent results at the top level
         if (
           toolMessage.toolName === 'spawn_agents' &&
           Array.isArray(toolMessage.content)
@@ -873,72 +618,88 @@ const definition: AgentDefinition = {
               if (includedResults.length > 0) {
                 const resultSummaries = includedResults.map((r) => {
                   let outputStr = ''
-                  // Extract the actual output from value.value (e.g., lastMessage content)
                   if (r.value?.value !== undefined && r.value?.value !== null) {
                     if (typeof r.value.value === 'string') {
                       outputStr = r.value.value
                     } else {
                       outputStr = JSON.stringify(r.value.value)
                     }
-                    // Remove <think> tags and their contents to save context tokens
                     outputStr = outputStr
                       .replace(/<think>[\s\S]*?<\/think>/g, '')
                       .trim()
-                    // Truncate long outputs to ASSISTANT_MESSAGE_LIMIT chars
-                    if (outputStr.length > ASSISTANT_MESSAGE_LIMIT) {
+                    if (outputStr.length > ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) {
                       outputStr =
-                        outputStr.slice(0, ASSISTANT_MESSAGE_LIMIT) + '...'
+                        outputStr.slice(0, ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) + '...'
                     }
                   }
                   return `- ${r.agentType}: ${outputStr || '(no output)'}`
                 })
-                summaryParts.push(
+                entryParts.push(
                   `[AGENT RESULTS]\n${resultSummaries.join('\n')}`,
                 )
               }
             }
           }
         }
+
+        if (entryParts.length > 0) {
+          const joinedToolEntry = truncateLongText(
+            entryParts.join('\n\n'),
+            TOOL_ENTRY_LIMIT * CHARS_PER_TOKEN,
+          )
+          summarizedEntries.push({
+            role: 'assistant_tool',
+            parts: [joinedToolEntry],
+          })
+        }
       }
     }
 
-    let summaryText = summaryParts.join('\n\n---\n\n')
-
-    // Calculate target size (10% of max context, for messages only)
-    const targetTokens = maxContextLength * TARGET_SUMMARY_FACTOR
-    let summaryTokens = estimateTokens(summaryText)
+    // Parse previous summary into role-tagged entries and combine with new entries
+    const allEntries = [
+      ...parseSummaryIntoEntries(previousSummaryContent),
+      ...summarizedEntries,
+    ]
 
-    // If summary is too big, truncate from the beginning
-    if (summaryTokens > targetTokens) {
-      const truncationMessage =
-        '[CONVERSATION TRUNCATED - Earlier messages omitted due to length]\n\n'
-      const truncationTokens = estimateTokens(truncationMessage)
-      const availableTokens = targetTokens - truncationTokens
+    // Phase 2: Walk backwards through all entries to apply token budgets
+    let assistantToolTokens = 0
+    let userTokens = 0
+    let cutoffIndex = 0
 
-      // Estimate characters to keep (rough: 3 chars per token)
-      const charsToKeep = Math.floor(availableTokens * 3)
+    for (let i = allEntries.length - 1; i >= 0; i--) {
+      const entry = allEntries[i]
+      const entryText = entry.parts.join('\n\n---\n\n')
+      const entryTokens = Math.ceil(entryText.length / CHARS_PER_TOKEN)
 
-      if (charsToKeep > 0 && charsToKeep < summaryText.length) {
-        // Truncate from the beginning, try to find a clean break point
-        const truncatedText = summaryText.slice(-charsToKeep)
-        // Find the first separator to make a clean cut
-        const separatorIndex = truncatedText.indexOf('\n\n---\n\n')
-        if (
-          separatorIndex !== -1 &&
-          separatorIndex < truncatedText.length / 2
-        ) {
-          summaryText =
-            truncationMessage +
-            truncatedText.slice(separatorIndex + '\n\n---\n\n'.length)
-        } else {
-          summaryText = truncationMessage + truncatedText
+      if (entry.role === 'user') {
+        if (userTokens + entryTokens > userBudget) {
+          cutoffIndex = i + 1
+          break
+        }
+        userTokens += entryTokens
+      } else {
+        if (assistantToolTokens + entryTokens > assistantToolBudget) {
+          cutoffIndex = i + 1
+          break
         }
-      } else if (charsToKeep <= 0) {
-        summaryText =
-          truncationMessage + '[Summary too large - content omitted]'
+        assistantToolTokens += entryTokens
       }
     }
 
+    // Phase 3: Build final summary from included entries
+    const summaryParts: string[] = []
+
+    for (let i = cutoffIndex; i < allEntries.length; i++) {
+      summaryParts.push(...allEntries[i].parts)
+    }
+
+    // Fallback: if nothing fit within budgets, always include at least the newest entry
+    if (summaryParts.length === 0 && allEntries.length > 0) {
+      summaryParts.push(...allEntries[allEntries.length - 1].parts)
+    }
+
+    const summaryText = summaryParts.join('\n\n---\n\n')
+
     // Create the summarized message with fresh sentAt timestamp
     // Include any images from the last user message that had images
     const now = Date.now()
diff --git a/cli/release/package.json b/cli/release/package.json
index f51779ae8b..e737956880 100644
--- a/cli/release/package.json
+++ b/cli/release/package.json
@@ -1,6 +1,6 @@
 {
   "name": "codebuff",
-  "version": "1.0.631",
+  "version": "1.0.633",
   "description": "AI coding agent",
   "license": "MIT",
   "bin": {
diff --git a/cli/src/commands/__tests__/router-input.test.ts b/cli/src/commands/__tests__/router-input.test.ts
index ac1310a795..653063abbc 100644
--- a/cli/src/commands/__tests__/router-input.test.ts
+++ b/cli/src/commands/__tests__/router-input.test.ts
@@ -372,22 +372,16 @@ describe('command-registry', () => {
       }
     })
 
-    test('connect slash command presence matches feature flag', () => {
-      const { CHATGPT_OAUTH_ENABLED } = require('@codebuff/common/constants/chatgpt-oauth')
+    test('connect command is not available in codebuff (freebuff-only)', () => {
       const hasConnectSlashCommand = SLASH_COMMANDS.some(
         (cmd) => cmd.id === 'connect',
       )
-      expect(hasConnectSlashCommand).toBe(CHATGPT_OAUTH_ENABLED)
+      expect(hasConnectSlashCommand).toBe(false)
     })
 
-    test('connect:chatgpt command registry availability matches feature flag', () => {
-      const { CHATGPT_OAUTH_ENABLED } = require('@codebuff/common/constants/chatgpt-oauth')
+    test('connect:chatgpt command is not available in codebuff (freebuff-only)', () => {
       const command = findCommand('connect:chatgpt')
-      if (CHATGPT_OAUTH_ENABLED) {
-        expect(command).toBeDefined()
-      } else {
-        expect(command).toBeUndefined()
-      }
+      expect(command).toBeUndefined()
     })
   })
 })
diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts
index b5b81d5800..69b8857b2e 100644
--- a/cli/src/commands/command-registry.ts
+++ b/cli/src/commands/command-registry.ts
@@ -179,6 +179,7 @@ const FREEBUFF_REMOVED_COMMANDS = new Set([
 ])
 
 const FREEBUFF_ONLY_COMMANDS = new Set([
+  'connect',
   'plan',
 ])
 
diff --git a/cli/src/components/help-banner.tsx b/cli/src/components/help-banner.tsx
index 0e0ee17007..ccf39bdf82 100644
--- a/cli/src/components/help-banner.tsx
+++ b/cli/src/components/help-banner.tsx
@@ -38,6 +38,7 @@ export const HelpBanner = () => {
   const theme = useTheme()
   const { data: subscriptionData } = useSubscriptionQuery()
   const hasSubscription = subscriptionData?.hasSubscription ?? false
+  const chatGptOAuth = getChatGptOAuthStatus()
 
   // Auto-hide after timeout
   React.useEffect(() => {
@@ -79,11 +80,16 @@ export const HelpBanner = () => {
         <box style={{ flexDirection: 'column', gap: 0 }}>
           <SectionHeader>Tips</SectionHeader>
           <box style={{ flexDirection: 'column', paddingLeft: 2 }}>
-            {IS_FREEBUFF && !getChatGptOAuthStatus().connected && (
+            {IS_FREEBUFF && !chatGptOAuth.connected && (
               <text style={{ fg: theme.muted }}>
                 Connect via /connect to unlock /plan & /review
               </text>
             )}
+            {IS_FREEBUFF && chatGptOAuth.connected && (
+              <text style={{ fg: theme.muted }}>
+                Try workflow: /interview → /plan → implement → /review
+              </text>
+            )}
             <text style={{ fg: theme.muted }}>
               Use @ to reference agents to spawn or files to read
             </text>
diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts
index 50dd90f0d2..6893640516 100644
--- a/cli/src/data/slash-commands.ts
+++ b/cli/src/data/slash-commands.ts
@@ -47,6 +47,7 @@ const FREEBUFF_REMOVED_COMMAND_IDS = new Set([
 ])
 
 const FREEBUFF_ONLY_COMMAND_IDS = new Set([
+  'connect',
   'plan',
 ])
 
diff --git a/cli/src/hooks/use-auth-state.ts b/cli/src/hooks/use-auth-state.ts
index e800b3355f..5f5ef29d01 100644
--- a/cli/src/hooks/use-auth-state.ts
+++ b/cli/src/hooks/use-auth-state.ts
@@ -6,6 +6,7 @@ import { useLoginStore } from '../state/login-store'
 import { identifyUser, trackEvent } from '../utils/analytics'
 import { getUserCredentials } from '../utils/auth'
 import { resetCodebuffClient } from '../utils/codebuff-client'
+import { IS_FREEBUFF } from '../utils/constants'
 import { loggerContext } from '../utils/logger'
 
 import type { MultilineInputHandle } from '../components/multiline-input'
@@ -14,7 +15,7 @@ import type { User } from '../utils/auth'
 const setAuthLoggerContext = (params: { userId: string; email: string }) => {
   loggerContext.userId = params.userId
   loggerContext.userEmail = params.email
-  identifyUser(params.userId, { email: params.email })
+  identifyUser(params.userId, { email: params.email, freebuff: IS_FREEBUFF })
 }
 
 const clearAuthLoggerContext = () => {
diff --git a/cli/src/index.tsx b/cli/src/index.tsx
index 62579dba34..7f2e3de77c 100644
--- a/cli/src/index.tsx
+++ b/cli/src/index.tsx
@@ -23,7 +23,7 @@ import { handlePublish } from './commands/publish'
 import { runPlainLogin } from './login/plain-login'
 import { initializeApp } from './init/init-app'
 import { getProjectRoot, setProjectRoot } from './project-files'
-import { initAnalytics, trackEvent } from './utils/analytics'
+import { trackEvent } from './utils/analytics'
 import { getAuthToken, getAuthTokenDetails } from './utils/auth'
 import { resetCodebuffClient } from './utils/codebuff-client'
 import { setApiClientAuthToken } from './utils/codebuff-api'
@@ -66,7 +66,7 @@ function loadPackageVersion(): string {
 // Without this, refetchInterval won't work because TanStack Query thinks the app is "unfocused"
 focusManager.setEventListener(() => {
   // No-op: no event listeners in CLI environment (no window focus/visibility events)
-  return () => {}
+  return () => { }
 })
 focusManager.setFocused(true)
 
@@ -222,26 +222,17 @@ async function main(): Promise<void> {
   const startCwd = process.cwd()
   const showProjectPicker = shouldShowProjectPicker(startCwd, homeDir)
 
-  // Initialize analytics early, before anything that might use the logger
-  // (the logger calls trackEvent, which throws if analytics isn't initialized)
-  try {
-    initAnalytics()
-
-    // Track app launch event
-    trackEvent(AnalyticsEvent.APP_LAUNCHED, {
-      version: loadPackageVersion(),
-      platform: process.platform,
-      arch: process.arch,
-      hasInitialPrompt: Boolean(initialPrompt),
-      hasAgentOverride: hasAgentOverride,
-      continueChat,
-      initialMode: initialMode ?? 'DEFAULT',
-      isFreeBuff: IS_FREEBUFF,
-    })
-  } catch (error) {
-    // Analytics initialization is optional - don't fail the app if it errors
-    logger.debug(error, 'Failed to initialize analytics')
-  }
+  // Requires analytics to be initialized, which is done in initializeApp
+  trackEvent(AnalyticsEvent.APP_LAUNCHED, {
+    version: loadPackageVersion(),
+    platform: process.platform,
+    arch: process.arch,
+    hasInitialPrompt: Boolean(initialPrompt),
+    hasAgentOverride: hasAgentOverride,
+    continueChat,
+    initialMode: initialMode ?? 'DEFAULT',
+    isFreeBuff: IS_FREEBUFF,
+  })
 
   // Initialize agent registry (loads user agents via SDK).
   // When --agent is provided, skip local .agents to avoid overrides.
diff --git a/cli/src/init/init-app.ts b/cli/src/init/init-app.ts
index 133c3ca181..1b8ae41efa 100644
--- a/cli/src/init/init-app.ts
+++ b/cli/src/init/init-app.ts
@@ -12,6 +12,7 @@ import { initializeThemeStore } from '../hooks/use-theme'
 import { setProjectRoot } from '../project-files'
 import { initTimestampFormatter } from '../utils/helpers'
 import { enableManualThemeRefresh } from '../utils/theme-system'
+import { initAnalytics } from '../utils/analytics'
 import { initializeDirenv } from './init-direnv'
 
 export async function initializeApp(params: { cwd?: string }): Promise<void> {
@@ -21,6 +22,14 @@ export async function initializeApp(params: { cwd?: string }): Promise<void> {
   const baseCwd = process.cwd()
   setProjectRoot(baseCwd)
 
+  // Initialize analytics before direnv, because direnv uses the logger
+  // which calls trackEvent — analytics must be ready first.
+  try {
+    initAnalytics()
+  } catch (error) {
+    console.debug('Failed to initialize analytics:', error)
+  }
+
   // Initialize direnv environment before anything else
   initializeDirenv()
 
diff --git a/common/src/analytics.ts b/common/src/analytics.ts
index 46965bd17d..ea88cf7e59 100644
--- a/common/src/analytics.ts
+++ b/common/src/analytics.ts
@@ -3,6 +3,7 @@ import { env, DEBUG_ANALYTICS } from '@codebuff/common/env'
 import { createPostHogClient, type AnalyticsClient } from './analytics-core'
 import { AnalyticsEvent } from './constants/analytics-events'
 
+import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 
 let client: AnalyticsClient | undefined
@@ -32,6 +33,18 @@ export async function flushAnalytics(logger?: Logger) {
   }
 }
 
+export function withDefaultProperties(
+  trackEventFn: TrackEventFn,
+  defaultProperties: Record<string, unknown>,
+): TrackEventFn {
+  return (params) => {
+    trackEventFn({
+      ...params,
+      properties: { ...defaultProperties, ...params.properties },
+    })
+  }
+}
+
 export function trackEvent({
   event,
   userId,
diff --git a/common/src/tools/params/tool/set-output.ts b/common/src/tools/params/tool/set-output.ts
index d9a69ea5da..1171f63dc3 100644
--- a/common/src/tools/params/tool/set-output.ts
+++ b/common/src/tools/params/tool/set-output.ts
@@ -6,6 +6,21 @@ import type { $ToolParams } from '../../constants'
 
 const toolName = 'set_output'
 const endsAgentStep = false
+
+// WHY `data` EXISTS IN THE INPUT SCHEMA:
+// Subagents inherit their parent's tool definitions, and because of prompt caching
+// we cannot modify or add tools mid-conversation. OpenAI models enforce the tool's
+// input schema strictly, so we need a permissive shape that any model can call.
+// An empty schema or `z.object({}).passthrough()` would be rejected by OpenAI's
+// strict schema enforcement. The `data: z.record(...)` field is a deliberately
+// vague shape that satisfies OpenAI while allowing us to inject the real
+// outputSchema later in the conversation (in the instructions prompt).
+//
+// At runtime, the handler (`packages/agent-runtime/src/tools/handlers/tool/set-output.ts`)
+// tries parsing against the real outputSchema in two ways:
+//   1. Parse the raw output (agent passed fields at top level)
+//   2. Fallback: parse `output.data` (agent wrapped fields in `data`)
+// This means both `{ results: [...] }` and `{ data: { results: [...] } }` are accepted.
 const inputSchema = z
   .looseObject({
     data: z.record(z.string(), z.any()).optional(),
diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json
index 25a1e24696..d29c729fc8 100644
--- a/freebuff/cli/release/package.json
+++ b/freebuff/cli/release/package.json
@@ -1,6 +1,6 @@
 {
   "name": "freebuff",
-  "version": "0.0.20",
+  "version": "0.0.21",
   "description": "The world's strongest free coding agent",
   "license": "MIT",
   "bin": {
diff --git a/packages/agent-runtime/src/templates/strings.ts b/packages/agent-runtime/src/templates/strings.ts
index 313c20b220..6ac005a151 100644
--- a/packages/agent-runtime/src/templates/strings.ts
+++ b/packages/agent-runtime/src/templates/strings.ts
@@ -226,7 +226,7 @@ export async function getAgentPrompt<T extends StringField>(
     if (outputSchema) {
       addendum += '\n\n## Output Schema\n\n'
       addendum +=
-        'When using the set_output tool, your output must conform to this schema:\n\n'
+        'When using the set_output tool, your output must conform to this schema. You may pass the fields either directly as top-level parameters or inside a `data` field — both are accepted.\n\n'
       addendum += '```json\n'
       try {
         // Convert Zod schema to JSON schema for display
diff --git a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts
index 2def7b1d51..8dec297118 100644
--- a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts
+++ b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts
@@ -52,13 +52,24 @@ export const handleSetOutput = (async (params: {
         agentTemplate.outputSchema.parse(data)
         finalOutput = data
       } catch (error2) {
-        const errorMessage = `Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: ${error}`
+        // Show whichever error has fewer issues — that represents the "closer" parse
+        // attempt and gives the agent more actionable feedback for retrying.
+        const issues1 = getZodIssueCount(error)
+        const issues2 = getZodIssueCount(error2)
+        const usedData = issues2 < issues1
+        const bestError = usedData ? error2 : error
+        const prefix = usedData
+          ? 'Output validation error: Your output was found inside the `data` field but still failed validation. Please fix the issues and try again without wrapping in `data`. Issues: '
+          : 'Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: '
+        const errorMessage = `${prefix}${bestError}`
         logger.error(
           {
             output,
             agentType: agentState.agentType,
             agentId: agentState.agentId,
-            error,
+            topLevelError: error,
+            dataFieldError: error2,
+            usedDataFieldError: usedData,
           },
           'set_output validation error',
         )
@@ -78,3 +89,15 @@ export const handleSetOutput = (async (params: {
 
   return { output: jsonToolResult({ message: 'Output set' }) }
 }) satisfies CodebuffToolHandlerFunction<ToolName>
+
+function getZodIssueCount(error: unknown): number {
+  if (
+    error != null &&
+    typeof error === 'object' &&
+    'issues' in error &&
+    Array.isArray((error as { issues: unknown }).issues)
+  ) {
+    return (error as { issues: unknown[] }).issues.length
+  }
+  return Infinity
+}
diff --git a/packages/billing/src/balance-calculator.ts b/packages/billing/src/balance-calculator.ts
index 7a96617128..1a2439f66a 100644
--- a/packages/billing/src/balance-calculator.ts
+++ b/packages/billing/src/balance-calculator.ts
@@ -536,6 +536,7 @@ export async function consumeCreditsAndAddAgentStep(params: {
   cacheReadInputTokens: number
   reasoningTokens: number | null
   outputTokens: number
+  ttftMs: number | null
 
   logger: Logger
 }): Promise<ErrorOr<CreditConsumptionResult & { agentStepId: string }>> {
@@ -561,6 +562,7 @@ export async function consumeCreditsAndAddAgentStep(params: {
     cacheReadInputTokens,
     reasoningTokens,
     outputTokens,
+    ttftMs,
 
     logger,
   } = params
@@ -650,6 +652,7 @@ export async function consumeCreditsAndAddAgentStep(params: {
             credits,
             byok,
             latency_ms: latencyMs,
+            ttft_ms: ttftMs,
             user_id: userId,
           })
         } catch (error) {
diff --git a/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql b/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql
new file mode 100644
index 0000000000..77648859f6
--- /dev/null
+++ b/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql
@@ -0,0 +1 @@
+ALTER TABLE "message" ADD COLUMN "ttft_ms" integer;
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/0042_snapshot.json b/packages/internal/src/db/migrations/meta/0042_snapshot.json
new file mode 100644
index 0000000000..abb7dceabe
--- /dev/null
+++ b/packages/internal/src/db/migrations/meta/0042_snapshot.json
@@ -0,0 +1,3078 @@
+{
+  "id": "c7772899-6ae6-4a07-890e-a1ca64dc6e61",
+  "prevId": "db3b93eb-3ed2-4468-80d1-0d082f4cecbd",
+  "version": "7",
+  "dialect": "postgresql",
+  "tables": {
+    "public.account": {
+      "name": "account",
+      "schema": "",
+      "columns": {
+        "userId": {
+          "name": "userId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider": {
+          "name": "provider",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "providerAccountId": {
+          "name": "providerAccountId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "refresh_token": {
+          "name": "refresh_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "access_token": {
+          "name": "access_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "token_type": {
+          "name": "token_type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "scope": {
+          "name": "scope",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "id_token": {
+          "name": "id_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "session_state": {
+          "name": "session_state",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "account_userId_user_id_fk": {
+          "name": "account_userId_user_id_fk",
+          "tableFrom": "account",
+          "tableTo": "user",
+          "columnsFrom": [
+            "userId"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "account_provider_providerAccountId_pk": {
+          "name": "account_provider_providerAccountId_pk",
+          "columns": [
+            "provider",
+            "providerAccountId"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.ad_impression": {
+      "name": "ad_impression",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "ad_text": {
+          "name": "ad_text",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cta": {
+          "name": "cta",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "''"
+        },
+        "url": {
+          "name": "url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "favicon": {
+          "name": "favicon",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "click_url": {
+          "name": "click_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "imp_url": {
+          "name": "imp_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "payout": {
+          "name": "payout",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "credits_granted": {
+          "name": "credits_granted",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "grant_operation_id": {
+          "name": "grant_operation_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "served_at": {
+          "name": "served_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "impression_fired_at": {
+          "name": "impression_fired_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "clicked_at": {
+          "name": "clicked_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_ad_impression_user": {
+          "name": "idx_ad_impression_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "served_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_ad_impression_imp_url": {
+          "name": "idx_ad_impression_imp_url",
+          "columns": [
+            {
+              "expression": "imp_url",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "ad_impression_user_id_user_id_fk": {
+          "name": "ad_impression_user_id_user_id_fk",
+          "tableFrom": "ad_impression",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "ad_impression_imp_url_unique": {
+          "name": "ad_impression_imp_url_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "imp_url"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_config": {
+      "name": "agent_config",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "version": {
+          "name": "version",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "publisher_id": {
+          "name": "publisher_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "major": {
+          "name": "major",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "minor": {
+          "name": "minor",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "patch": {
+          "name": "patch",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)",
+            "type": "stored"
+          }
+        },
+        "data": {
+          "name": "data",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_agent_config_publisher": {
+          "name": "idx_agent_config_publisher",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_config_publisher_id_publisher_id_fk": {
+          "name": "agent_config_publisher_id_publisher_id_fk",
+          "tableFrom": "agent_config",
+          "tableTo": "publisher",
+          "columnsFrom": [
+            "publisher_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "agent_config_publisher_id_id_version_pk": {
+          "name": "agent_config_publisher_id_id_version_pk",
+          "columns": [
+            "publisher_id",
+            "id",
+            "version"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_run": {
+      "name": "agent_run",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "publisher_id": {
+          "name": "publisher_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(agent_id, '/', 1)\n             ELSE NULL\n           END",
+            "type": "stored"
+          }
+        },
+        "agent_name": {
+          "name": "agent_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n             ELSE agent_id\n           END",
+            "type": "stored"
+          }
+        },
+        "agent_version": {
+          "name": "agent_version",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE\n             WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n               THEN split_part(agent_id, '@', 2)\n             ELSE NULL\n           END",
+            "type": "stored"
+          }
+        },
+        "ancestor_run_ids": {
+          "name": "ancestor_run_ids",
+          "type": "text[]",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "root_run_id": {
+          "name": "root_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END",
+            "type": "stored"
+          }
+        },
+        "parent_run_id": {
+          "name": "parent_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END",
+            "type": "stored"
+          }
+        },
+        "depth": {
+          "name": "depth",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)",
+            "type": "stored"
+          }
+        },
+        "duration_ms": {
+          "name": "duration_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+            "type": "stored"
+          }
+        },
+        "total_steps": {
+          "name": "total_steps",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 0
+        },
+        "direct_credits": {
+          "name": "direct_credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0'"
+        },
+        "total_credits": {
+          "name": "total_credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'0'"
+        },
+        "status": {
+          "name": "status",
+          "type": "agent_run_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'running'"
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_agent_run_user_id": {
+          "name": "idx_agent_run_user_id",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_parent": {
+          "name": "idx_agent_run_parent",
+          "columns": [
+            {
+              "expression": "parent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_root": {
+          "name": "idx_agent_run_root",
+          "columns": [
+            {
+              "expression": "root_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_agent_id": {
+          "name": "idx_agent_run_agent_id",
+          "columns": [
+            {
+              "expression": "agent_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_publisher": {
+          "name": "idx_agent_run_publisher",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_status": {
+          "name": "idx_agent_run_status",
+          "columns": [
+            {
+              "expression": "status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'running'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_ancestors_gin": {
+          "name": "idx_agent_run_ancestors_gin",
+          "columns": [
+            {
+              "expression": "ancestor_run_ids",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        },
+        "idx_agent_run_completed_publisher_agent": {
+          "name": "idx_agent_run_completed_publisher_agent",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_recent": {
+          "name": "idx_agent_run_completed_recent",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_version": {
+          "name": "idx_agent_run_completed_version",
+          "columns": [
+            {
+              "expression": "publisher_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "agent_version",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_run_completed_user": {
+          "name": "idx_agent_run_completed_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"agent_run\".\"status\" = 'completed'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_run_user_id_user_id_fk": {
+          "name": "agent_run_user_id_user_id_fk",
+          "tableFrom": "agent_run",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.agent_step": {
+      "name": "agent_step",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "agent_run_id": {
+          "name": "agent_run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "step_number": {
+          "name": "step_number",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "duration_ms": {
+          "name": "duration_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+            "type": "stored"
+          }
+        },
+        "credits": {
+          "name": "credits",
+          "type": "numeric(10, 6)",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'0'"
+        },
+        "child_run_ids": {
+          "name": "child_run_ids",
+          "type": "text[]",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "spawned_count": {
+          "name": "spawned_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "array_length(child_run_ids, 1)",
+            "type": "stored"
+          }
+        },
+        "message_id": {
+          "name": "message_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "status": {
+          "name": "status",
+          "type": "agent_step_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'completed'"
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "unique_step_number_per_run": {
+          "name": "unique_step_number_per_run",
+          "columns": [
+            {
+              "expression": "agent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "step_number",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_step_run_id": {
+          "name": "idx_agent_step_run_id",
+          "columns": [
+            {
+              "expression": "agent_run_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_step_children_gin": {
+          "name": "idx_agent_step_children_gin",
+          "columns": [
+            {
+              "expression": "child_run_ids",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_step_agent_run_id_agent_run_id_fk": {
+          "name": "agent_step_agent_run_id_agent_run_id_fk",
+          "tableFrom": "agent_step",
+          "tableTo": "agent_run",
+          "columnsFrom": [
+            "agent_run_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.credit_ledger": {
+      "name": "credit_ledger",
+      "schema": "",
+      "columns": {
+        "operation_id": {
+          "name": "operation_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "principal": {
+          "name": "principal",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "balance": {
+          "name": "balance",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "grant_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "priority": {
+          "name": "priority",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_credit_ledger_active_balance": {
+          "name": "idx_credit_ledger_active_balance",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "balance",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "priority",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_credit_ledger_org": {
+          "name": "idx_credit_ledger_org",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_credit_ledger_subscription": {
+          "name": "idx_credit_ledger_subscription",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "credit_ledger_user_id_user_id_fk": {
+          "name": "credit_ledger_user_id_user_id_fk",
+          "tableFrom": "credit_ledger",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "credit_ledger_org_id_org_id_fk": {
+          "name": "credit_ledger_org_id_org_id_fk",
+          "tableFrom": "credit_ledger",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.encrypted_api_keys": {
+      "name": "encrypted_api_keys",
+      "schema": "",
+      "columns": {
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "api_key_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "api_key": {
+          "name": "api_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "encrypted_api_keys_user_id_user_id_fk": {
+          "name": "encrypted_api_keys_user_id_user_id_fk",
+          "tableFrom": "encrypted_api_keys",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "encrypted_api_keys_user_id_type_pk": {
+          "name": "encrypted_api_keys_user_id_type_pk",
+          "columns": [
+            "user_id",
+            "type"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.fingerprint": {
+      "name": "fingerprint",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "sig_hash": {
+          "name": "sig_hash",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.git_eval_results": {
+      "name": "git_eval_results",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "cost_mode": {
+          "name": "cost_mode",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "reasoner_model": {
+          "name": "reasoner_model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "agent_model": {
+          "name": "agent_model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "metadata": {
+          "name": "metadata",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cost": {
+          "name": "cost",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "is_public": {
+          "name": "is_public",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.limit_override": {
+      "name": "limit_override",
+      "schema": "",
+      "columns": {
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "credits_per_block": {
+          "name": "credits_per_block",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "block_duration_hours": {
+          "name": "block_duration_hours",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "weekly_credit_limit": {
+          "name": "weekly_credit_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "limit_override_user_id_user_id_fk": {
+          "name": "limit_override_user_id_user_id_fk",
+          "tableFrom": "limit_override",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.message": {
+      "name": "message",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "finished_at": {
+          "name": "finished_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "client_id": {
+          "name": "client_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "client_request_id": {
+          "name": "client_request_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "request": {
+          "name": "request",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_message": {
+          "name": "last_message",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "generated": {
+            "as": "\"message\".\"request\" -> -1",
+            "type": "stored"
+          }
+        },
+        "reasoning_text": {
+          "name": "reasoning_text",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "response": {
+          "name": "response",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "input_tokens": {
+          "name": "input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "cache_creation_input_tokens": {
+          "name": "cache_creation_input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cache_read_input_tokens": {
+          "name": "cache_read_input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "reasoning_tokens": {
+          "name": "reasoning_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "output_tokens": {
+          "name": "output_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cost": {
+          "name": "cost",
+          "type": "numeric(100, 20)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "credits": {
+          "name": "credits",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "byok": {
+          "name": "byok",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "latency_ms": {
+          "name": "latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "ttft_ms": {
+          "name": "ttft_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "repo_url": {
+          "name": "repo_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "message_user_id_idx": {
+          "name": "message_user_id_idx",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_finished_at_user_id_idx": {
+          "name": "message_finished_at_user_id_idx",
+          "columns": [
+            {
+              "expression": "finished_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_org_id_idx": {
+          "name": "message_org_id_idx",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "message_org_id_finished_at_idx": {
+          "name": "message_org_id_finished_at_idx",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "finished_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "message_user_id_user_id_fk": {
+          "name": "message_user_id_user_id_fk",
+          "tableFrom": "message",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "message_org_id_org_id_fk": {
+          "name": "message_org_id_org_id_fk",
+          "tableFrom": "message",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org": {
+      "name": "org",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "slug": {
+          "name": "slug",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "owner_id": {
+          "name": "owner_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "current_period_start": {
+          "name": "current_period_start",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "current_period_end": {
+          "name": "current_period_end",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_enabled": {
+          "name": "auto_topup_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "auto_topup_threshold": {
+          "name": "auto_topup_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "auto_topup_amount": {
+          "name": "auto_topup_amount",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "credit_limit": {
+          "name": "credit_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "billing_alerts": {
+          "name": "billing_alerts",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "usage_alerts": {
+          "name": "usage_alerts",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "weekly_reports": {
+          "name": "weekly_reports",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "org_owner_id_user_id_fk": {
+          "name": "org_owner_id_user_id_fk",
+          "tableFrom": "org",
+          "tableTo": "user",
+          "columnsFrom": [
+            "owner_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "org_slug_unique": {
+          "name": "org_slug_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "slug"
+          ]
+        },
+        "org_stripe_customer_id_unique": {
+          "name": "org_stripe_customer_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "stripe_customer_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_feature": {
+      "name": "org_feature",
+      "schema": "",
+      "columns": {
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "feature": {
+          "name": "feature",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "config": {
+          "name": "config",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "is_active": {
+          "name": "is_active",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_org_feature_active": {
+          "name": "idx_org_feature_active",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "is_active",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_feature_org_id_org_id_fk": {
+          "name": "org_feature_org_id_org_id_fk",
+          "tableFrom": "org_feature",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "org_feature_org_id_feature_pk": {
+          "name": "org_feature_org_id_feature_pk",
+          "columns": [
+            "org_id",
+            "feature"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_invite": {
+      "name": "org_invite",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "role": {
+          "name": "role",
+          "type": "org_role",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "token": {
+          "name": "token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "invited_by": {
+          "name": "invited_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "accepted_at": {
+          "name": "accepted_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "accepted_by": {
+          "name": "accepted_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {
+        "idx_org_invite_token": {
+          "name": "idx_org_invite_token",
+          "columns": [
+            {
+              "expression": "token",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_invite_email": {
+          "name": "idx_org_invite_email",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "email",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_invite_expires": {
+          "name": "idx_org_invite_expires",
+          "columns": [
+            {
+              "expression": "expires_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_invite_org_id_org_id_fk": {
+          "name": "org_invite_org_id_org_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_invite_invited_by_user_id_fk": {
+          "name": "org_invite_invited_by_user_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "user",
+          "columnsFrom": [
+            "invited_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "org_invite_accepted_by_user_id_fk": {
+          "name": "org_invite_accepted_by_user_id_fk",
+          "tableFrom": "org_invite",
+          "tableTo": "user",
+          "columnsFrom": [
+            "accepted_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "org_invite_token_unique": {
+          "name": "org_invite_token_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "token"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_member": {
+      "name": "org_member",
+      "schema": "",
+      "columns": {
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "role": {
+          "name": "role",
+          "type": "org_role",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "joined_at": {
+          "name": "joined_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "org_member_org_id_org_id_fk": {
+          "name": "org_member_org_id_org_id_fk",
+          "tableFrom": "org_member",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_member_user_id_user_id_fk": {
+          "name": "org_member_user_id_user_id_fk",
+          "tableFrom": "org_member",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "org_member_org_id_user_id_pk": {
+          "name": "org_member_org_id_user_id_pk",
+          "columns": [
+            "org_id",
+            "user_id"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.org_repo": {
+      "name": "org_repo",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_url": {
+          "name": "repo_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_name": {
+          "name": "repo_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "repo_owner": {
+          "name": "repo_owner",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "approved_by": {
+          "name": "approved_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "approved_at": {
+          "name": "approved_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "is_active": {
+          "name": "is_active",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        }
+      },
+      "indexes": {
+        "idx_org_repo_active": {
+          "name": "idx_org_repo_active",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "is_active",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_repo_unique": {
+          "name": "idx_org_repo_unique",
+          "columns": [
+            {
+              "expression": "org_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "repo_url",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "org_repo_org_id_org_id_fk": {
+          "name": "org_repo_org_id_org_id_fk",
+          "tableFrom": "org_repo",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "org_repo_approved_by_user_id_fk": {
+          "name": "org_repo_approved_by_user_id_fk",
+          "tableFrom": "org_repo",
+          "tableTo": "user",
+          "columnsFrom": [
+            "approved_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.publisher": {
+      "name": "publisher",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "verified": {
+          "name": "verified",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "bio": {
+          "name": "bio",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "avatar_url": {
+          "name": "avatar_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "org_id": {
+          "name": "org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_by": {
+          "name": "created_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "publisher_user_id_user_id_fk": {
+          "name": "publisher_user_id_user_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "publisher_org_id_org_id_fk": {
+          "name": "publisher_org_id_org_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "org",
+          "columnsFrom": [
+            "org_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "publisher_created_by_user_id_fk": {
+          "name": "publisher_created_by_user_id_fk",
+          "tableFrom": "publisher",
+          "tableTo": "user",
+          "columnsFrom": [
+            "created_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {
+        "publisher_single_owner": {
+          "name": "publisher_single_owner",
+          "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n    (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)"
+        }
+      },
+      "isRLSEnabled": false
+    },
+    "public.referral": {
+      "name": "referral",
+      "schema": "",
+      "columns": {
+        "referrer_id": {
+          "name": "referrer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "referred_id": {
+          "name": "referred_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "referral_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'pending'"
+        },
+        "credits": {
+          "name": "credits",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "is_legacy": {
+          "name": "is_legacy",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "referral_referrer_id_user_id_fk": {
+          "name": "referral_referrer_id_user_id_fk",
+          "tableFrom": "referral",
+          "tableTo": "user",
+          "columnsFrom": [
+            "referrer_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "referral_referred_id_user_id_fk": {
+          "name": "referral_referred_id_user_id_fk",
+          "tableFrom": "referral",
+          "tableTo": "user",
+          "columnsFrom": [
+            "referred_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {
+        "referral_referrer_id_referred_id_pk": {
+          "name": "referral_referrer_id_referred_id_pk",
+          "columns": [
+            "referrer_id",
+            "referred_id"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.session": {
+      "name": "session",
+      "schema": "",
+      "columns": {
+        "sessionToken": {
+          "name": "sessionToken",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "userId": {
+          "name": "userId",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires": {
+          "name": "expires",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "fingerprint_id": {
+          "name": "fingerprint_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "type": {
+          "name": "type",
+          "type": "session_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'web'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "session_userId_user_id_fk": {
+          "name": "session_userId_user_id_fk",
+          "tableFrom": "session",
+          "tableTo": "user",
+          "columnsFrom": [
+            "userId"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "session_fingerprint_id_fingerprint_id_fk": {
+          "name": "session_fingerprint_id_fingerprint_id_fk",
+          "tableFrom": "session",
+          "tableTo": "fingerprint",
+          "columnsFrom": [
+            "fingerprint_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.subscription": {
+      "name": "subscription",
+      "schema": "",
+      "columns": {
+        "stripe_subscription_id": {
+          "name": "stripe_subscription_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_price_id": {
+          "name": "stripe_price_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "tier": {
+          "name": "tier",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "scheduled_tier": {
+          "name": "scheduled_tier",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "status": {
+          "name": "status",
+          "type": "subscription_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'active'"
+        },
+        "billing_period_start": {
+          "name": "billing_period_start",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "billing_period_end": {
+          "name": "billing_period_end",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "cancel_at_period_end": {
+          "name": "cancel_at_period_end",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "canceled_at": {
+          "name": "canceled_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_subscription_customer": {
+          "name": "idx_subscription_customer",
+          "columns": [
+            {
+              "expression": "stripe_customer_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_subscription_user": {
+          "name": "idx_subscription_user",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_subscription_status": {
+          "name": "idx_subscription_status",
+          "columns": [
+            {
+              "expression": "status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"subscription\".\"status\" = 'active'",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "subscription_user_id_user_id_fk": {
+          "name": "subscription_user_id_user_id_fk",
+          "tableFrom": "subscription",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.sync_failure": {
+      "name": "sync_failure",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "provider": {
+          "name": "provider",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "last_attempt_at": {
+          "name": "last_attempt_at",
+          "type": "timestamp with time zone",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "retry_count": {
+          "name": "retry_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 1
+        },
+        "last_error": {
+          "name": "last_error",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {
+        "idx_sync_failure_retry": {
+          "name": "idx_sync_failure_retry",
+          "columns": [
+            {
+              "expression": "retry_count",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "last_attempt_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "where": "\"sync_failure\".\"retry_count\" < 5",
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.user": {
+      "name": "user",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "password": {
+          "name": "password",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "emailVerified": {
+          "name": "emailVerified",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "image": {
+          "name": "image",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stripe_customer_id": {
+          "name": "stripe_customer_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "next_quota_reset": {
+          "name": "next_quota_reset",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now() + INTERVAL '1 month'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "referral_code": {
+          "name": "referral_code",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'ref-' || gen_random_uuid()"
+        },
+        "referral_limit": {
+          "name": "referral_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 5
+        },
+        "discord_id": {
+          "name": "discord_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "handle": {
+          "name": "handle",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_enabled": {
+          "name": "auto_topup_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "auto_topup_threshold": {
+          "name": "auto_topup_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auto_topup_amount": {
+          "name": "auto_topup_amount",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "banned": {
+          "name": "banned",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "fallback_to_a_la_carte": {
+          "name": "fallback_to_a_la_carte",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "user_email_unique": {
+          "name": "user_email_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "email"
+          ]
+        },
+        "user_stripe_customer_id_unique": {
+          "name": "user_stripe_customer_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "stripe_customer_id"
+          ]
+        },
+        "user_referral_code_unique": {
+          "name": "user_referral_code_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "referral_code"
+          ]
+        },
+        "user_discord_id_unique": {
+          "name": "user_discord_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "discord_id"
+          ]
+        },
+        "user_handle_unique": {
+          "name": "user_handle_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "handle"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.verificationToken": {
+      "name": "verificationToken",
+      "schema": "",
+      "columns": {
+        "identifier": {
+          "name": "identifier",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "token": {
+          "name": "token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires": {
+          "name": "expires",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "verificationToken_identifier_token_pk": {
+          "name": "verificationToken_identifier_token_pk",
+          "columns": [
+            "identifier",
+            "token"
+          ]
+        }
+      },
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    }
+  },
+  "enums": {
+    "public.referral_status": {
+      "name": "referral_status",
+      "schema": "public",
+      "values": [
+        "pending",
+        "completed"
+      ]
+    },
+    "public.agent_run_status": {
+      "name": "agent_run_status",
+      "schema": "public",
+      "values": [
+        "running",
+        "completed",
+        "failed",
+        "cancelled"
+      ]
+    },
+    "public.agent_step_status": {
+      "name": "agent_step_status",
+      "schema": "public",
+      "values": [
+        "running",
+        "completed",
+        "skipped"
+      ]
+    },
+    "public.api_key_type": {
+      "name": "api_key_type",
+      "schema": "public",
+      "values": [
+        "anthropic",
+        "gemini",
+        "openai"
+      ]
+    },
+    "public.grant_type": {
+      "name": "grant_type",
+      "schema": "public",
+      "values": [
+        "free",
+        "referral",
+        "referral_legacy",
+        "subscription",
+        "purchase",
+        "admin",
+        "organization",
+        "ad"
+      ]
+    },
+    "public.org_role": {
+      "name": "org_role",
+      "schema": "public",
+      "values": [
+        "owner",
+        "admin",
+        "member"
+      ]
+    },
+    "public.session_type": {
+      "name": "session_type",
+      "schema": "public",
+      "values": [
+        "web",
+        "pat",
+        "cli"
+      ]
+    },
+    "public.subscription_status": {
+      "name": "subscription_status",
+      "schema": "public",
+      "values": [
+        "incomplete",
+        "incomplete_expired",
+        "trialing",
+        "active",
+        "past_due",
+        "canceled",
+        "unpaid",
+        "paused"
+      ]
+    }
+  },
+  "schemas": {},
+  "sequences": {},
+  "roles": {},
+  "policies": {},
+  "views": {},
+  "_meta": {
+    "columns": {},
+    "schemas": {},
+    "tables": {}
+  }
+}
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json
index bce61005a2..8952549c98 100644
--- a/packages/internal/src/db/migrations/meta/_journal.json
+++ b/packages/internal/src/db/migrations/meta/_journal.json
@@ -295,6 +295,13 @@
       "when": 1770334047429,
       "tag": "0041_nappy_nebula",
       "breakpoints": true
+    },
+    {
+      "idx": 42,
+      "version": "7",
+      "when": 1773878149145,
+      "tag": "0042_needy_jack_murdock",
+      "breakpoints": true
     }
   ]
 }
\ No newline at end of file
diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts
index 1fa381c5df..0033314f00 100644
--- a/packages/internal/src/db/schema.ts
+++ b/packages/internal/src/db/schema.ts
@@ -232,6 +232,7 @@ export const message = pgTable(
     credits: integer('credits').notNull(),
     byok: boolean('byok').notNull().default(false),
     latency_ms: integer('latency_ms'),
+    ttft_ms: integer('ttft_ms'),
     user_id: text('user_id').references(() => user.id, { onDelete: 'cascade' }),
 
     org_id: text('org_id').references(() => org.id, { onDelete: 'cascade' }),
diff --git a/scripts/query-minimax-cache-stats.ts b/scripts/query-minimax-cache-stats.ts
new file mode 100644
index 0000000000..7c742c2ccc
--- /dev/null
+++ b/scripts/query-minimax-cache-stats.ts
@@ -0,0 +1,138 @@
+import { db } from '@codebuff/internal/db'
+import { sql } from 'drizzle-orm'
+
+async function queryMinimaxCacheStats() {
+  console.log('Querying minimax/minimax-m2.5 usage (last 19 hours)...\n')
+
+  // 1. Overall stats
+  const overallResult = await db.execute(sql`
+    SELECT
+      COUNT(*) AS total_requests,
+      ROUND(AVG(input_tokens)) AS avg_input_tokens,
+      ROUND(AVG(output_tokens)) AS avg_output_tokens,
+      ROUND(
+        CASE
+          WHEN SUM(input_tokens) > 0
+          THEN SUM(cache_read_input_tokens)::numeric / SUM(input_tokens) * 100
+          ELSE 0
+        END, 1
+      ) AS overall_cache_rate_pct,
+      COUNT(DISTINCT client_id) AS unique_clients
+    FROM message
+    WHERE finished_at >= NOW() - INTERVAL '19 hours'
+      AND model = 'minimax/minimax-m2.5'
+  `)
+
+  const overall = overallResult[0]
+  if (!overall || Number(overall.total_requests) === 0) {
+    console.log('No data found for minimax/minimax-m2.5 in the last 19 hours.')
+    return
+  }
+
+  console.log('Overall Stats')
+  console.log('═══════════════════════════════════════════')
+  console.log(`Total requests:          ${overall.total_requests}`)
+  console.log(`Unique clients:          ${overall.unique_clients}`)
+  console.log(`Avg input tokens:        ${overall.avg_input_tokens}`)
+  console.log(`Avg output tokens:       ${overall.avg_output_tokens}`)
+  console.log(`Overall cache rate:      ${overall.overall_cache_rate_pct}%`)
+
+  // 2. Per-client stats, ordered by lowest cache rate
+  const clientResult = await db.execute(sql`
+    SELECT
+      client_id,
+      COUNT(*) AS request_count,
+      MIN(finished_at) AS first_seen,
+      MAX(finished_at) AS last_seen,
+      ROUND(AVG(input_tokens)) AS avg_input,
+      ROUND(
+        CASE
+          WHEN SUM(input_tokens) > 0
+          THEN SUM(cache_read_input_tokens)::numeric / SUM(input_tokens) * 100
+          ELSE 0
+        END, 1
+      ) AS cache_rate_pct,
+      SUM(cache_read_input_tokens) AS total_cache_read,
+      SUM(input_tokens) AS total_input
+    FROM message
+    WHERE finished_at >= NOW() - INTERVAL '19 hours'
+      AND model = 'minimax/minimax-m2.5'
+      AND client_id IS NOT NULL
+    GROUP BY client_id
+    ORDER BY cache_rate_pct ASC, request_count DESC
+  `)
+
+  console.log('\n\nPer-Client Cache Rates (lowest first)')
+  console.log('═══════════════════════════════════════════')
+
+  if (clientResult.length === 0) {
+    console.log('No client-level data found.')
+    return
+  }
+
+  for (const row of clientResult) {
+    const clientId = String(row.client_id).slice(0, 12)
+    const reqs = String(row.request_count).padStart(4)
+    const cacheRate = String(row.cache_rate_pct).padStart(6)
+    const avgInput = String(row.avg_input).padStart(8)
+    const firstSeen = row.first_seen
+      ? new Date(String(row.first_seen)).toISOString().slice(0, 16)
+      : 'N/A'
+    const lastSeen = row.last_seen
+      ? new Date(String(row.last_seen)).toISOString().slice(0, 16)
+      : 'N/A'
+    console.log(
+      `  ${clientId}…  reqs: ${reqs}  cache: ${cacheRate}%  avg_input: ${avgInput}  range: ${firstSeen} → ${lastSeen}`,
+    )
+  }
+
+  // 3. Recent requests in time order
+  const recentResult = await db.execute(sql`
+    SELECT
+      client_id,
+      finished_at,
+      input_tokens,
+      cache_read_input_tokens,
+      COALESCE(cache_creation_input_tokens, 0) AS cache_creation_input_tokens,
+      output_tokens,
+      ROUND(
+        CASE
+          WHEN input_tokens > 0
+          THEN cache_read_input_tokens::numeric / input_tokens * 100
+          ELSE 0
+        END, 1
+      ) AS cache_rate_pct
+    FROM message
+    WHERE finished_at >= NOW() - INTERVAL '19 hours'
+      AND model = 'minimax/minimax-m2.5'
+    ORDER BY client_id, finished_at DESC
+    LIMIT 100
+  `)
+
+  console.log('\n\nRecent Requests (newest first, last 100)')
+  console.log('═══════════════════════════════════════════')
+
+  for (const row of recentResult) {
+    const clientId = row.client_id
+      ? String(row.client_id).slice(0, 12)
+      : 'unknown     '
+    const time = row.finished_at
+      ? new Date(String(row.finished_at)).toISOString().slice(0, 19)
+      : 'N/A'
+    const cacheRate = String(row.cache_rate_pct).padStart(6)
+    const input = String(row.input_tokens).padStart(7)
+    const cached = String(row.cache_read_input_tokens).padStart(7)
+    const creation = String(row.cache_creation_input_tokens).padStart(7)
+    const output = String(row.output_tokens).padStart(6)
+    console.log(
+      `  ${time}  ${clientId}…  cache: ${cacheRate}%  input: ${input}  cached: ${cached}  creation: ${creation}  output: ${output}`,
+    )
+  }
+}
+
+queryMinimaxCacheStats()
+  .then(() => process.exit(0))
+  .catch((err) => {
+    console.error(err)
+    process.exit(1)
+  })
diff --git a/scripts/query-usage-stats.ts b/scripts/query-usage-stats.ts
index 371701902d..15a35703b8 100644
--- a/scripts/query-usage-stats.ts
+++ b/scripts/query-usage-stats.ts
@@ -22,14 +22,13 @@ async function queryUsageStats() {
 
     token_stats AS (
       SELECT
-        ROUND(AVG(input_tokens + cache_read_input_tokens + cache_creation_input_tokens))
+        ROUND(AVG(input_tokens))
           AS avg_total_input_tokens,
         ROUND(
           AVG(
             CASE
-              WHEN (input_tokens + cache_read_input_tokens + cache_creation_input_tokens) > 0
-              THEN cache_read_input_tokens::numeric
-                   / (input_tokens + cache_read_input_tokens + cache_creation_input_tokens)
+              WHEN input_tokens > 0
+              THEN cache_read_input_tokens::numeric / input_tokens
               ELSE 0
             END
           ) * 100, 1
@@ -42,7 +41,9 @@ async function queryUsageStats() {
 
     client_stats AS (
       SELECT
-        ROUND(AVG(cnt)) AS avg_requests_per_client
+        ROUND(AVG(cnt)) AS avg_requests_per_client,
+        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cnt) AS median_requests_per_client,
+        MAX(cnt) AS max_requests_per_client
       FROM (
         SELECT client_id, COUNT(*) AS cnt
         FROM recent
@@ -70,6 +71,8 @@ async function queryUsageStats() {
       t.avg_cache_rate_pct,
       t.avg_output_tokens,
       c.avg_requests_per_client,
+      c.median_requests_per_client,
+      c.max_requests_per_client,
       r.median_rps,
       r.peak_rps,
       t.total_requests
@@ -90,6 +93,8 @@ async function queryUsageStats() {
   console.log(`Median RPS:              ${row.median_rps}`)
   console.log(`Peak RPS:                ${row.peak_rps}`)
   console.log(`Avg requests/client:     ${row.avg_requests_per_client}`)
+  console.log(`Median requests/client:  ${row.median_requests_per_client}`)
+  console.log(`Max requests/client:     ${row.max_requests_per_client}`)
   console.log(`Total requests (7d):     ${row.total_requests}`)
 }
 
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
index f28eb55a6e..9fb5ebc8bd 100644
--- a/scripts/test-fireworks-long.ts
+++ b/scripts/test-fireworks-long.ts
@@ -13,7 +13,7 @@
 export { }
 
 const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
-// const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/qne3jo8v'
+// const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9'
 const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
 
 // Pricing constants — https://fireworks.ai/pricing
@@ -23,6 +23,9 @@ const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
 
 const MAX_TOKENS = 100
 
+// Stable session ID so all turns route to the same machine for prompt caching
+const SESSION_ID = `bench-${Math.random().toString(36).slice(2, 10)}`
+
 function computeCost(usage: Record<string, unknown>): { cost: number; breakdown: string } {
   const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0
   const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0
@@ -175,6 +178,7 @@ async function makeConversationStreamRequest(
     headers: {
       Authorization: `Bearer ${apiKey}`,
       'Content-Type': 'application/json',
+      'x-session-affinity': SESSION_ID,
     },
     body: JSON.stringify({
       model: FIREWORKS_MODEL,
@@ -220,16 +224,13 @@ async function makeConversationStreamRequest(
         const chunk = JSON.parse(raw)
         chunkCount++
         const delta = chunk.choices?.[0]?.delta
+        if (delta && firstContentChunkTime === undefined) {
+          firstContentChunkTime = Date.now()
+          ttftMs = firstContentChunkTime - startTime
+        }
         if (delta?.content) {
-          if (firstContentChunkTime === undefined) {
-            firstContentChunkTime = Date.now()
-            ttftMs = firstContentChunkTime - startTime
-          }
           streamContent += delta.content
         }
-        if (delta?.reasoning_content) {
-          // Skip reasoning content for this test
-        }
         if (chunk.usage) streamUsage = chunk.usage
       } catch {
         // skip non-JSON lines
@@ -242,12 +243,9 @@ async function makeConversationStreamRequest(
     ? streamUsage.completion_tokens
     : 0
 
-  const generationTimeMs = firstContentChunkTime !== undefined
-    ? Date.now() - firstContentChunkTime
-    : elapsedMs
-  const outputTokensPerSec = generationTimeMs > 0
-    ? (outputTokens / (generationTimeMs / 1000))
-    : 0
+  const outputTokensPerSec = firstContentChunkTime !== undefined
+    ? (outputTokens / ((Date.now() - firstContentChunkTime) / 1000))
+    : undefined
 
   // Print compact per-turn stats
   const inputTokens = streamUsage && typeof streamUsage.prompt_tokens === 'number' ? streamUsage.prompt_tokens : 0
@@ -256,7 +254,7 @@ async function makeConversationStreamRequest(
   const cacheRate = inputTokens > 0 ? ((cachedTokens / inputTokens) * 100).toFixed(1) : '0.0'
   const cost = streamUsage ? `$${computeCost(streamUsage).cost.toFixed(6)}` : 'err'
 
-  console.log(`   ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec.toFixed(1)} tok/s | ${cost}`)
+  console.log(`   ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec !== undefined ? outputTokensPerSec.toFixed(1) + ' tok/s' : 'n/a'} | ${cost}`)
   console.log(`   Response: ${streamContent.slice(0, 150)}${streamContent.length > 150 ? '...' : ''}`)
   console.log()
 
@@ -277,6 +275,7 @@ async function main() {
   console.log(`Max tokens:  ${MAX_TOKENS} (low output per turn)`)
   console.log(`Turns:       ${TURN_PROMPTS.length}`)
   console.log(`Pricing:     $0.30/M input, $0.03/M cached, $1.20/M output`)
+  console.log(`Session ID:  ${SESSION_ID} (x-session-affinity header)`)
   console.log('='.repeat(60))
   console.log()
 
diff --git a/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts b/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts
new file mode 100644
index 0000000000..0d9802b58b
--- /dev/null
+++ b/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts
@@ -0,0 +1,317 @@
+import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test'
+
+import {
+  checkFreeModeRateLimit,
+  FREE_MODE_RATE_LIMITS,
+  resetFreeModeRateLimits,
+} from '../free-mode-rate-limiter'
+
+const SECOND_MS = 1000
+const MINUTE_MS = 60 * SECOND_MS
+const HOUR_MS = 60 * MINUTE_MS
+
+describe('free-mode-rate-limiter', () => {
+  let nowSpy: ReturnType<typeof spyOn>
+  let fakeNow: number
+
+  beforeEach(() => {
+    resetFreeModeRateLimits()
+    fakeNow = 1_000_000_000_000
+    nowSpy = spyOn(Date, 'now').mockImplementation(() => fakeNow)
+  })
+
+  afterEach(() => {
+    nowSpy.mockRestore()
+  })
+
+  function advanceTime(ms: number) {
+    fakeNow += ms
+  }
+
+  function makeRequests(userId: string, count: number) {
+    for (let i = 0; i < count; i++) {
+      if (i > 0) {
+        advanceTime(1 * SECOND_MS + 1)
+      }
+      const result = checkFreeModeRateLimit(userId)
+      if (result.limited) {
+        throw new Error(`Unexpectedly rate limited on request ${i + 1}`)
+      }
+    }
+  }
+
+  describe('checkFreeModeRateLimit', () => {
+    it('allows the first request', () => {
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(false)
+    })
+
+    it('limits when per-second limit is exceeded', () => {
+      // Make all requests within the same second (no time advancement)
+      for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+        expect(checkFreeModeRateLimit('user-1').limited).toBe(false)
+      }
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(true)
+      if (result.limited) {
+        expect(result.windowName).toBe('1 second')
+      }
+    })
+
+    it('resets per-second window after expiry', () => {
+      for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+        checkFreeModeRateLimit('user-1')
+      }
+      expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
+
+      advanceTime(1 * SECOND_MS + 1)
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(false)
+    })
+
+    it('allows requests up to the per-minute limit', () => {
+      for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_MINUTE; i++) {
+        const result = checkFreeModeRateLimit('user-1')
+        expect(result.limited).toBe(false)
+        if (i < FREE_MODE_RATE_LIMITS.PER_MINUTE - 1) {
+          advanceTime(1 * SECOND_MS + 1)
+        }
+      }
+    })
+
+    it('limits when per-minute limit is exceeded', () => {
+      makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+      // Advance past the 1-second window so the per-minute window is the one that triggers
+      advanceTime(1 * SECOND_MS + 1)
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(true)
+      if (result.limited) {
+        expect(result.windowName).toBe('1 minute')
+      }
+    })
+
+    it('limits when per-30-minute limit is exceeded', () => {
+      const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
+      const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
+
+      // Spread requests across multiple 1-minute windows to avoid hitting the per-minute limit
+      let sent = 0
+      while (sent < per30Min) {
+        const batch = Math.min(perMinute, per30Min - sent)
+        makeRequests('user-1', batch)
+        sent += batch
+        if (sent < per30Min) {
+          // Advance past the 1-minute window so it resets
+          advanceTime(1 * MINUTE_MS + 1)
+        }
+      }
+
+      // Advance past the 1-minute window so the per-30-minute window is the one that triggers
+      advanceTime(1 * MINUTE_MS + 1)
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(true)
+      if (result.limited) {
+        expect(result.windowName).toBe('30 minutes')
+      }
+    })
+
+    it('limits when per-5-hour limit is exceeded', () => {
+      const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
+      const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
+      const per5Hours = FREE_MODE_RATE_LIMITS.PER_5_HOURS
+
+      // Spread requests across multiple 30-minute windows
+      let sent = 0
+      while (sent < per5Hours) {
+        const batchFor30Min = Math.min(per30Min, per5Hours - sent)
+        // Within each 30-min window, spread across 1-min windows
+        let sentInWindow = 0
+        while (sentInWindow < batchFor30Min) {
+          const batch = Math.min(perMinute, batchFor30Min - sentInWindow)
+          makeRequests('user-1', batch)
+          sentInWindow += batch
+          if (sentInWindow < batchFor30Min) {
+            advanceTime(1 * MINUTE_MS + 1)
+          }
+        }
+        sent += sentInWindow
+        // Always advance past 30-min window to reset it for the next batch
+        // (stays well within the 5-hour window)
+        advanceTime(30 * MINUTE_MS + 1)
+      }
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(true)
+      if (result.limited) {
+        expect(result.windowName).toBe('5 hours')
+      }
+    })
+
+    it('limits when per-7-day limit is exceeded', () => {
+      const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
+      const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
+      const per5Hours = FREE_MODE_RATE_LIMITS.PER_5_HOURS
+      const per7Days = FREE_MODE_RATE_LIMITS.PER_7_DAYS
+
+      // Spread requests across multiple 5-hour windows
+      let sent = 0
+      while (sent < per7Days) {
+        const batchFor5Hours = Math.min(per5Hours, per7Days - sent)
+        let sentIn5Hr = 0
+        while (sentIn5Hr < batchFor5Hours) {
+          const batchFor30Min = Math.min(per30Min, batchFor5Hours - sentIn5Hr)
+          let sentIn30Min = 0
+          while (sentIn30Min < batchFor30Min) {
+            const batch = Math.min(perMinute, batchFor30Min - sentIn30Min)
+            makeRequests('user-1', batch)
+            sentIn30Min += batch
+            if (sentIn30Min < batchFor30Min) {
+              advanceTime(1 * MINUTE_MS + 1)
+            }
+          }
+          sentIn5Hr += sentIn30Min
+          advanceTime(30 * MINUTE_MS + 1)
+        }
+        sent += sentIn5Hr
+        // Advance past the 5-hour window (stays within 7-day window)
+        advanceTime(5 * HOUR_MS + 1)
+      }
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(true)
+      if (result.limited) {
+        expect(result.windowName).toBe('7 days')
+      }
+    })
+
+    it('does not increment counters when rate limited', () => {
+      makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+      // Advance past the 1-second window so the per-minute window blocks
+      advanceTime(1 * SECOND_MS + 1)
+
+      // These should all be rejected without changing state
+      for (let i = 0; i < 5; i++) {
+        const result = checkFreeModeRateLimit('user-1')
+        expect(result.limited).toBe(true)
+      }
+
+      // After the 1-minute window expires, the user should only have used PER_MINUTE requests
+      // against the 30-minute window, not PER_MINUTE + 5
+      advanceTime(1 * MINUTE_MS + 1)
+
+      // Should be allowed again (1-min window reset)
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(false)
+    })
+
+    it('returns correct retryAfterMs for the violated window', () => {
+      makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+      // makeRequests advanced time by (PER_MINUTE - 1) * (SECOND_MS + 1)
+      const elapsedInMakeRequests = (FREE_MODE_RATE_LIMITS.PER_MINUTE - 1) * (1 * SECOND_MS + 1)
+
+      // Advance past the 1-second window, then a bit more
+      const additionalAdvance = 2 * SECOND_MS
+      advanceTime(additionalAdvance)
+
+      const totalElapsed = elapsedInMakeRequests + additionalAdvance
+      const expectedRetryAfterMs = 1 * MINUTE_MS - totalElapsed
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(true)
+      if (result.limited) {
+        expect(result.windowName).toBe('1 minute')
+        expect(result.retryAfterMs).toBe(expectedRetryAfterMs)
+      }
+    })
+
+    it('resets per-minute window after expiry', () => {
+      makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+      advanceTime(1 * SECOND_MS + 1)
+
+      const limited = checkFreeModeRateLimit('user-1')
+      expect(limited.limited).toBe(true)
+
+      // Advance past the 1-minute window
+      advanceTime(1 * MINUTE_MS + 1)
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(false)
+    })
+
+    it('isolates different users', () => {
+      makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+      advanceTime(1 * SECOND_MS + 1)
+
+      // user-1 is rate limited
+      expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
+
+      // user-2 should not be affected
+      const result = checkFreeModeRateLimit('user-2')
+      expect(result.limited).toBe(false)
+    })
+
+    it('retryAfterMs is never negative', () => {
+      for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+        checkFreeModeRateLimit('user-1')
+      }
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(true)
+      if (result.limited) {
+        expect(result.retryAfterMs).toBeGreaterThanOrEqual(0)
+      }
+    })
+
+    it('tracks counts across all windows simultaneously', () => {
+      // Make some requests
+      makeRequests('user-1', 5)
+
+      // Advance past 1-minute window but within 30-minute window
+      advanceTime(1 * MINUTE_MS + 1)
+
+      // Make more requests — 1-min counter resets, but 30-min counter keeps accumulating
+      makeRequests('user-1', 5)
+
+      // Advance past 1-minute again
+      advanceTime(1 * MINUTE_MS + 1)
+
+      // The 30-min window should now have 10 requests counted
+      // and the 1-min window should be fresh
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(false)
+    })
+  })
+
+  describe('resetFreeModeRateLimits', () => {
+    it('clears all rate limit state', () => {
+      for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+        checkFreeModeRateLimit('user-1')
+      }
+      expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
+
+      resetFreeModeRateLimits()
+
+      const result = checkFreeModeRateLimit('user-1')
+      expect(result.limited).toBe(false)
+    })
+
+    it('clears state for all users', () => {
+      for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+        checkFreeModeRateLimit('user-1')
+        checkFreeModeRateLimit('user-2')
+      }
+
+      expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
+      expect(checkFreeModeRateLimit('user-2').limited).toBe(true)
+
+      resetFreeModeRateLimits()
+
+      expect(checkFreeModeRateLimit('user-1').limited).toBe(false)
+      expect(checkFreeModeRateLimit('user-2').limited).toBe(false)
+    })
+  })
+})
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index bf36ae417f..8553aa69e3 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -65,6 +65,8 @@ import {
   OpenRouterError,
 } from '@/llm-api/openrouter'
 import { extractApiKeyFromHeader } from '@/util/auth'
+import { withDefaultProperties } from '@codebuff/common/analytics'
+import { checkFreeModeRateLimit } from './free-mode-rate-limiter'
 
 const FREE_MODE_ALLOWED_COUNTRIES = new Set([
   'US', 'CA',
@@ -80,7 +82,13 @@ function extractClientIp(req: NextRequest): string | undefined {
   return req.headers.get('x-real-ip') ?? undefined
 }
 
-function getCountryFromIp(clientIp: string | undefined): string | null {
+function getCountryCode(req: NextRequest): string | null {
+  const cfCountry = req.headers.get('cf-ipcountry')
+  if (cfCountry && cfCountry !== 'XX' && cfCountry !== 'T1') {
+    return cfCountry.toUpperCase()
+  }
+
+  const clientIp = extractClientIp(req)
   if (!clientIp) {
     return null
   }
@@ -141,7 +149,6 @@ export async function postChatCompletions(params: {
     req,
     getUserInfoFromApiKey,
     loggerWithContext,
-    trackEvent,
     getUserUsageData,
     getAgentRunFromId,
     fetch,
@@ -150,6 +157,7 @@ export async function postChatCompletions(params: {
     getUserPreferences,
   } = params
   let { logger } = params
+  let { trackEvent } = params
 
   try {
     // Parse request body
@@ -175,6 +183,12 @@ export async function postChatCompletions(params: {
     const bodyStream = typedBody.stream ?? false
     const runId = typedBody.codebuff_metadata?.run_id
 
+    // Check if the request is in FREE mode (costs 0 credits for allowed agent+model combos)
+    const costMode = typedBody.codebuff_metadata?.cost_mode
+    const isFreeModeRequest = isFreeMode(costMode)
+
+    trackEvent = withDefaultProperties(trackEvent, { freebuff: isFreeModeRequest })
+
     // Extract and validate API key
     const apiKey = extractApiKeyFromHeader(req)
     if (!apiKey) {
@@ -242,14 +256,17 @@ export async function postChatCompletions(params: {
       logger,
     })
 
-    // Check if the request is in FREE mode (costs 0 credits for allowed agent+model combos)
-    const costMode = typedBody.codebuff_metadata?.cost_mode
-    const isFreeModeRequest = isFreeMode(costMode)
-
     // For free mode requests, check if user is in US or Canada
     if (isFreeModeRequest) {
+      const countryCode = getCountryCode(req)
       const clientIp = extractClientIp(req)
-      const countryCode = getCountryFromIp(clientIp)
+
+      const cfHeader = req.headers.get('cf-ipcountry')
+      const geoipResult = clientIp ? geoip.lookup(clientIp)?.country ?? null : null
+      logger.info(
+        { cfHeader, geoipResult, resolvedCountry: countryCode, clientIp: clientIp ? '[redacted]' : undefined },
+        'Free mode country detection',
+      )
 
       // If we couldn't determine country (null), allow the request (fail open)
       // This handles users behind VPNs, corporate proxies, or localhost
@@ -273,6 +290,7 @@ export async function postChatCompletions(params: {
           { status: 403 },
         )
       }
+
     }
 
     // Extract and validate agent run ID
@@ -333,6 +351,38 @@ export async function postChatCompletions(params: {
       )
     }
 
+    // Rate limit free mode requests (after validation so invalid requests don't consume quota)
+    if (isFreeModeRequest) {
+      const rateLimitResult = checkFreeModeRateLimit(userId)
+      if (rateLimitResult.limited) {
+        const retryAfterSeconds = Math.ceil(rateLimitResult.retryAfterMs / 1000)
+        const resetTime = new Date(Date.now() + rateLimitResult.retryAfterMs).toISOString()
+        const resetCountdown = formatQuotaResetCountdown(resetTime)
+
+        trackEvent({
+          event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR,
+          userId,
+          properties: {
+            error: 'free_mode_rate_limited',
+            windowName: rateLimitResult.windowName,
+            retryAfterSeconds,
+          },
+          logger,
+        })
+
+        return NextResponse.json(
+          {
+            error: 'free_mode_rate_limited',
+            message: `Free mode rate limit exceeded (${rateLimitResult.windowName} limit). Try again ${resetCountdown}.`,
+          },
+          {
+            status: 429,
+            headers: { 'Retry-After': String(retryAfterSeconds) },
+          },
+        )
+      }
+    }
+
     // For subscribers, ensure a block grant exists before processing the request.
     // This is done AFTER validation so malformed requests don't start a new 5-hour block.
     // When the function is provided, always include subscription credits in the balance:
diff --git a/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts b/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts
new file mode 100644
index 0000000000..b299291cd4
--- /dev/null
+++ b/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts
@@ -0,0 +1,167 @@
+/**
+ * In-memory rate limiter for FREE mode requests.
+ *
+ * Enforces multiple fixed-window limits per user to prevent abuse.
+ * Each window is anchored to the user's first request in that window
+ * and resets once the window duration elapses.
+ *
+ * Adjust the constants below to tune the limits.
+ */
+
+// ---------------------------------------------------------------------------
+// Configurable rate-limit constants
+// ---------------------------------------------------------------------------
+
+export const FREE_MODE_RATE_LIMITS = {
+  /** Max requests per 1-second window */
+  PER_SECOND: 2,
+  /** Max requests per 1-minute window */
+  PER_MINUTE: 20,
+  /** Max requests per 30-minute window */
+  PER_30_MINUTES: 200,
+  /** Max requests per 5-hour window */
+  PER_5_HOURS: 1_000,
+  /** Max requests per 7-day window */
+  PER_7_DAYS: 10_000,
+} as const
+
+// ---------------------------------------------------------------------------
+// Internal types
+// ---------------------------------------------------------------------------
+
+interface RateWindow {
+  name: string
+  windowMs: number
+  maxRequests: number
+}
+
+interface WindowTracker {
+  count: number
+  windowStart: number
+}
+
+export type RateLimitResult = {
+  limited: false
+} | {
+  limited: true
+  windowName: string
+  retryAfterMs: number
+}
+
+// ---------------------------------------------------------------------------
+// Window definitions (derived from the constants above)
+// ---------------------------------------------------------------------------
+
+const SECOND_MS = 1000
+const MINUTE_MS = 60 * SECOND_MS
+const HOUR_MS = 60 * MINUTE_MS
+const DAY_MS = 24 * HOUR_MS
+
+const RATE_WINDOWS: RateWindow[] = [
+  { name: '1 second',    windowMs: 1 * SECOND_MS,  maxRequests: FREE_MODE_RATE_LIMITS.PER_SECOND },
+  { name: '1 minute',    windowMs: 1 * MINUTE_MS,  maxRequests: FREE_MODE_RATE_LIMITS.PER_MINUTE },
+  { name: '30 minutes',  windowMs: 30 * MINUTE_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_30_MINUTES },
+  { name: '5 hours',     windowMs: 5 * HOUR_MS,    maxRequests: FREE_MODE_RATE_LIMITS.PER_5_HOURS },
+  { name: '7 days',      windowMs: 7 * DAY_MS,     maxRequests: FREE_MODE_RATE_LIMITS.PER_7_DAYS },
+]
+
+// ---------------------------------------------------------------------------
+// In-memory state
+// ---------------------------------------------------------------------------
+
+// userId -> (windowName -> tracker)
+const userWindows = new Map<string, Map<string, WindowTracker>>()
+
+let lastCleanupTime = 0
+const CLEANUP_INTERVAL_MS = 5 * MINUTE_MS
+
+// ---------------------------------------------------------------------------
+// Cleanup
+// ---------------------------------------------------------------------------
+
+function cleanupExpiredEntries(): void {
+  const now = Date.now()
+  for (const [userId, windows] of userWindows) {
+    for (const [windowName, tracker] of windows) {
+      const matchingWindow = RATE_WINDOWS.find((w) => w.name === windowName)
+      if (!matchingWindow) {
+        windows.delete(windowName)
+        continue
+      }
+      if (now - tracker.windowStart >= matchingWindow.windowMs) {
+        windows.delete(windowName)
+      }
+    }
+    if (windows.size === 0) {
+      userWindows.delete(userId)
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Check whether a free-mode request from `userId` should be rate-limited.
+ *
+ * If the request is allowed, each window's counter is incremented.
+ * If any window is exceeded, the request is rejected and no counters change.
+ */
+export function checkFreeModeRateLimit(userId: string): RateLimitResult {
+  const now = Date.now()
+
+  // Periodic cleanup to prevent memory leaks
+  if (now - lastCleanupTime > CLEANUP_INTERVAL_MS) {
+    cleanupExpiredEntries()
+    lastCleanupTime = now
+  }
+
+  let windows = userWindows.get(userId)
+  if (!windows) {
+    windows = new Map()
+    userWindows.set(userId, windows)
+  }
+
+  // First pass: check all windows without mutating
+  for (const rateWindow of RATE_WINDOWS) {
+    let tracker = windows.get(rateWindow.name)
+
+    // Reset the window if it has expired
+    if (tracker && now - tracker.windowStart >= rateWindow.windowMs) {
+      windows.delete(rateWindow.name)
+      tracker = undefined
+    }
+
+    const currentCount = tracker?.count ?? 0
+    if (currentCount >= rateWindow.maxRequests) {
+      const windowStart = tracker!.windowStart
+      const retryAfterMs = rateWindow.windowMs - (now - windowStart)
+      return {
+        limited: true,
+        windowName: rateWindow.name,
+        retryAfterMs: Math.max(0, retryAfterMs),
+      }
+    }
+  }
+
+  // Second pass: increment all window counters (request is allowed)
+  for (const rateWindow of RATE_WINDOWS) {
+    let tracker = windows.get(rateWindow.name)
+    if (!tracker) {
+      tracker = { count: 0, windowStart: now }
+      windows.set(rateWindow.name, tracker)
+    }
+    tracker.count++
+  }
+
+  return { limited: false }
+}
+
+/**
+ * Reset all rate-limit state. Exposed for testing.
+ */
+export function resetFreeModeRateLimits(): void {
+  userWindows.clear()
+  lastCleanupTime = 0
+}
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index df8f356d17..2108d408a2 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -13,7 +13,7 @@ import {
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 
 const STANDARD_MODEL_ID = 'accounts/fireworks/models/minimax-m2p5'
-const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/qne3jo8v'
+const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/lnfid5h9'
 
 function createMockLogger(): Logger {
   return {
diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 8582645944..52fe1885c3 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -39,7 +39,7 @@ function getCanopyWaveModelId(openrouterModel: string): string {
   return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel
 }
 
-type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean }
+type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
 
 type LineResult = {
   state: StreamState
@@ -170,6 +170,7 @@ export async function handleCanopyWaveNonStream({
     byok: false,
     logger,
     costMode,
+    ttftMs: null, // Non-stream - no TTFT to report
   })
 
   // Overwrite cost so SDK calculates exact credits we charged
@@ -218,7 +219,7 @@ export async function handleCanopyWaveStream({
   }
 
   let heartbeatInterval: NodeJS.Timeout
-  let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false }
+  let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null, billedAlready: false }
   let clientDisconnected = false
 
   const stream = new ReadableStream({
@@ -439,7 +440,7 @@ async function handleResponse({
   logger: Logger
   insertMessage: InsertMessageBigqueryFn
 }): Promise<{ state: StreamState; billedCredits?: number }> {
-  state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
+  state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel })
 
   // Some providers send cumulative usage on EVERY chunk (not just the final one),
   // so we must only bill once on the final chunk to avoid charging N times.
@@ -486,6 +487,7 @@ async function handleResponse({
     byok: false,
     logger,
     costMode,
+    ttftMs: state.ttftMs,
   })
 
   return { state, billedCredits }
@@ -494,6 +496,7 @@ async function handleResponse({
 function handleStreamChunk({
   data,
   state,
+  startTime,
   logger,
   userId,
   agentId,
@@ -501,6 +504,7 @@ function handleStreamChunk({
 }: {
   data: Record<string, unknown>
   state: StreamState
+  startTime: Date
   logger: Logger
   userId: string
   agentId: string
@@ -544,6 +548,13 @@ function handleStreamChunk({
   const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
     : typeof delta?.reasoning === 'string' ? delta.reasoning
     : ''
+
+  // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+  const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0
+  if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) {
+    state.ttftMs = Date.now() - startTime.getTime()
+  }
+
   if (state.reasoningText.length < MAX_BUFFER_SIZE) {
     state.reasoningText += reasoningDelta
     if (state.reasoningText.length >= MAX_BUFFER_SIZE) {
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 2b28937415..fccfd7892e 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -36,7 +36,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = false
 
 /** Custom deployment IDs for models with dedicated Fireworks deployments */
 const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
-  'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/qne3jo8v',
+  'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
 }
 
 /** Check if current time is within deployment hours (10am–8pm ET) */
@@ -79,7 +79,7 @@ function getFireworksModelId(openrouterModel: string): string {
   return FIREWORKS_MODEL_MAP[openrouterModel] ?? openrouterModel
 }
 
-type StreamState = { responseText: string; reasoningText: string }
+type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null }
 
 type LineResult = {
   state: StreamState
@@ -210,6 +210,7 @@ export async function handleFireworksNonStream({
     byok: false,
     logger,
     costMode,
+    ttftMs: null, // Non-stream - no TTFT to report
   })
 
   // Overwrite cost so SDK calculates exact credits we charged
@@ -258,7 +259,7 @@ export async function handleFireworksStream({
   }
 
   let heartbeatInterval: NodeJS.Timeout
-  let state: StreamState = { responseText: '', reasoningText: '' }
+  let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null }
   let clientDisconnected = false
 
   const stream = new ReadableStream({
@@ -473,7 +474,7 @@ async function handleResponse({
   logger: Logger
   insertMessage: InsertMessageBigqueryFn
 }): Promise<{ state: StreamState; billedCredits?: number }> {
-  state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
+  state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel })
 
   if ('error' in data || !data.usage) {
     return { state }
@@ -511,6 +512,7 @@ async function handleResponse({
     byok: false,
     logger,
     costMode,
+    ttftMs: state.ttftMs,
   })
 
   return { state, billedCredits }
@@ -519,6 +521,7 @@ async function handleResponse({
 function handleStreamChunk({
   data,
   state,
+  startTime,
   logger,
   userId,
   agentId,
@@ -526,6 +529,7 @@ function handleStreamChunk({
 }: {
   data: Record<string, unknown>
   state: StreamState
+  startTime: Date
   logger: Logger
   userId: string
   agentId: string
@@ -569,6 +573,13 @@ function handleStreamChunk({
   const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
     : typeof delta?.reasoning === 'string' ? delta.reasoning
       : ''
+
+  // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+  const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0
+  if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) {
+    state.ttftMs = Date.now() - startTime.getTime()
+  }
+
   if (state.reasoningText.length < MAX_BUFFER_SIZE) {
     state.reasoningText += reasoningDelta
     if (state.reasoningText.length >= MAX_BUFFER_SIZE) {
diff --git a/web/src/llm-api/helpers.ts b/web/src/llm-api/helpers.ts
index 1ba912cf57..14e578fa9b 100644
--- a/web/src/llm-api/helpers.ts
+++ b/web/src/llm-api/helpers.ts
@@ -114,6 +114,7 @@ export async function consumeCreditsForMessage(params: {
   byok: boolean
   logger: Logger
   costMode?: string
+  ttftMs?: number | null
 }): Promise<number> {
   const {
     messageId,
@@ -130,6 +131,7 @@ export async function consumeCreditsForMessage(params: {
     byok,
     logger,
     costMode,
+    ttftMs,
   } = params
 
   // Calculate initial credits based on cost
@@ -172,6 +174,7 @@ export async function consumeCreditsForMessage(params: {
     outputTokens: usageData.outputTokens,
     byok,
     logger,
+    ttftMs: ttftMs ?? null,
   })
 
   return credits
diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts
index 7ac2f1afeb..8f619e8357 100644
--- a/web/src/llm-api/openai.ts
+++ b/web/src/llm-api/openai.ts
@@ -304,6 +304,7 @@ export async function handleOpenAINonStream({
       byok: false,
       logger,
       costMode,
+      ttftMs: null, // Non-stream - no TTFT to report
     })
 
     return {
@@ -359,6 +360,7 @@ export async function handleOpenAINonStream({
     byok: false,
     logger,
     costMode,
+    ttftMs: null, // Non-stream - no TTFT to report
   })
 
   if (data.usage) {
@@ -424,6 +426,7 @@ export async function handleOpenAIStream({
   let heartbeatInterval: NodeJS.Timeout
   let responseText = ''
   let reasoningText = ''
+  let ttftMs: number | null = null
   let clientDisconnected = false
   const MAX_BUFFER_SIZE = 1 * 1024 * 1024 // 1MB
 
@@ -477,6 +480,14 @@ export async function handleOpenAIStream({
                   const obj = JSON.parse(raw)
                   const delta = obj.choices?.[0]?.delta
 
+                  // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+                  const hasContentDelta = delta?.content && responseText.length === 0
+                  const hasReasoningDelta = delta?.reasoning && reasoningText.length === 0
+                  const hasToolCallsDelta = delta?.tool_calls && delta.tool_calls.length > 0
+                  if (ttftMs === null && (hasContentDelta || hasReasoningDelta || hasToolCallsDelta)) {
+                    ttftMs = Date.now() - startTime.getTime()
+                  }
+
                   if (delta?.content && responseText.length < MAX_BUFFER_SIZE) {
                     responseText += delta.content
                     if (responseText.length >= MAX_BUFFER_SIZE) {
@@ -544,6 +555,7 @@ export async function handleOpenAIStream({
                       byok: false,
                       logger,
                       costMode,
+                      ttftMs,
                     })
                   }
                 } catch {
@@ -631,6 +643,7 @@ export async function handleOpenAIStream({
                     byok: false,
                     logger,
                     costMode,
+                    ttftMs,
                   })
                 }
               } catch {
diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts
index c99200f1b0..08b7a31ef5 100644
--- a/web/src/llm-api/openrouter.ts
+++ b/web/src/llm-api/openrouter.ts
@@ -23,7 +23,7 @@ import type {
   OpenRouterErrorMetadata,
 } from './types'
 
-type StreamState = { responseText: string; reasoningText: string }
+type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null }
 
 // Extended timeout for deep-thinking models (e.g., gpt-5) that can take
 // a long time to start streaming.
@@ -186,6 +186,7 @@ export async function handleOpenRouterNonStream({
       byok,
       logger,
       costMode,
+      ttftMs: null, // Non-stream - no TTFT to report
     })
 
     // Return the first response with aggregated data
@@ -257,6 +258,7 @@ export async function handleOpenRouterNonStream({
     byok,
     logger,
     costMode,
+    ttftMs: null, // Non-stream - no TTFT to report
   })
 
   // Overwrite cost so SDK calculates exact credits we charged
@@ -313,7 +315,7 @@ export async function handleOpenRouterStream({
   }
 
   let heartbeatInterval: NodeJS.Timeout
-  let state: StreamState = { responseText: '', reasoningText: '' }
+  let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null }
   let clientDisconnected = false
 
   // Create a ReadableStream that Next.js can handle
@@ -540,6 +542,7 @@ async function handleResponse({
   state = await handleStreamChunk({
     data,
     state,
+    startTime,
     logger,
     userId,
     agentId,
@@ -584,6 +587,7 @@ async function handleResponse({
     byok,
     logger,
     costMode,
+    ttftMs: state.ttftMs,
   })
 
   return { state, billedCredits }
@@ -592,6 +596,7 @@ async function handleResponse({
 async function handleStreamChunk({
   data,
   state,
+  startTime,
   logger,
   userId,
   agentId,
@@ -599,6 +604,7 @@ async function handleStreamChunk({
 }: {
   data: OpenRouterStreamChatCompletionChunk
   state: StreamState
+  startTime: Date
   logger: Logger
   userId: string
   agentId: string
@@ -641,6 +647,14 @@ async function handleStreamChunk({
   }
   const choice = data.choices[0]
 
+  // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+  const hasContentDelta = choice?.delta?.content != null && choice?.delta?.content !== ''
+  const hasReasoningDelta = choice?.delta?.reasoning != null && choice?.delta?.reasoning !== ''
+  const hasToolCallsDelta = choice?.delta?.tool_calls != null && (choice?.delta?.tool_calls as unknown[])?.length > 0
+  if (state.ttftMs === null && (hasContentDelta || hasReasoningDelta || hasToolCallsDelta)) {
+    state.ttftMs = Date.now() - startTime.getTime()
+  }
+
   // Append content and reasoning, but only up to the buffer limit.
   const contentDelta = choice.delta?.content ?? ''
   if (state.responseText.length < MAX_BUFFER_SIZE) {
diff --git a/web/src/llm-api/siliconflow.ts b/web/src/llm-api/siliconflow.ts
index 1146bbe3df..6398fe184f 100644
--- a/web/src/llm-api/siliconflow.ts
+++ b/web/src/llm-api/siliconflow.ts
@@ -39,7 +39,7 @@ function getSiliconFlowModelId(openrouterModel: string): string {
   return SILICONFLOW_MODEL_MAP[openrouterModel] ?? openrouterModel
 }
 
-type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean }
+type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
 
 type LineResult = {
   state: StreamState
@@ -171,6 +171,7 @@ export async function handleSiliconFlowNonStream({
     byok: false,
     logger,
     costMode,
+    ttftMs: null, // Non-stream - no TTFT to report
   })
 
   // Overwrite cost so SDK calculates exact credits we charged
@@ -219,7 +220,7 @@ export async function handleSiliconFlowStream({
   }
 
   let heartbeatInterval: NodeJS.Timeout
-  let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false }
+  let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null, billedAlready: false }
   let clientDisconnected = false
 
   const stream = new ReadableStream({
@@ -440,7 +441,7 @@ async function handleResponse({
   logger: Logger
   insertMessage: InsertMessageBigqueryFn
 }): Promise<{ state: StreamState; billedCredits?: number }> {
-  state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
+  state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel })
 
   // Some providers send cumulative usage on EVERY chunk (not just the final one),
   // so we must only bill once on the final chunk to avoid charging N times.
@@ -487,6 +488,7 @@ async function handleResponse({
     byok: false,
     logger,
     costMode,
+    ttftMs: state.ttftMs,
   })
 
   return { state, billedCredits }
@@ -495,6 +497,7 @@ async function handleResponse({
 function handleStreamChunk({
   data,
   state,
+  startTime,
   logger,
   userId,
   agentId,
@@ -502,6 +505,7 @@ function handleStreamChunk({
 }: {
   data: Record<string, unknown>
   state: StreamState
+  startTime: Date
   logger: Logger
   userId: string
   agentId: string
@@ -545,6 +549,13 @@ function handleStreamChunk({
   const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
     : typeof delta?.reasoning === 'string' ? delta.reasoning
     : ''
+
+  // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+  const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0
+  if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) {
+    state.ttftMs = Date.now() - startTime.getTime()
+  }
+
   if (state.reasoningText.length < MAX_BUFFER_SIZE) {
     state.reasoningText += reasoningDelta
     if (state.reasoningText.length >= MAX_BUFFER_SIZE) {