diff --git a/agents/__tests__/context-pruner.test.ts b/agents/__tests__/context-pruner.test.ts
index 45c61b4b9f..b691f33a9f 100644
--- a/agents/__tests__/context-pruner.test.ts
+++ b/agents/__tests__/context-pruner.test.ts
@@ -219,6 +219,7 @@ describe('context-pruner handleSteps', () => {
messages: Message[],
contextTokenCount?: number,
maxContextLength?: number,
+ budgets?: { assistantToolBudget?: number; userBudget?: number },
) => {
mockAgentState.messageHistory = messages
// If contextTokenCount not provided, estimate from messages
@@ -233,7 +234,10 @@ describe('context-pruner handleSteps', () => {
const generator = contextPruner.handleSteps!({
agentState: mockAgentState,
logger: mockLogger,
- params: maxContextLength ? { maxContextLength } : {},
+ params: {
+ ...(maxContextLength ? { maxContextLength } : {}),
+ ...budgets,
+ },
})
const results: any[] = []
let result = generator.next()
@@ -381,36 +385,6 @@ describe('context-pruner handleSteps', () => {
expect(content).toContain('[USER] [with image(s)]')
})
- test('truncates summary when it exceeds target size', () => {
- // Create many messages to generate a large summary
- const messages: Message[] = []
- for (let i = 0; i < 100; i++) {
- messages.push(
- createMessage(
- 'user',
- `User message number ${i} with some additional content to make it longer`,
- ),
- )
- messages.push(
- createMessage(
- 'assistant',
- `Assistant response number ${i} with detailed explanation`,
- ),
- )
- }
-
- // Use a very small max context to force truncation
- const results = runHandleSteps(messages, 500000, 5000)
- const content = results[0].input.messages[0].content[0].text
-
- // Should contain truncation notice
- expect(content).toContain('[CONVERSATION TRUNCATED')
-
- // Should still have the wrapper tags
- expect(content).toContain('')
- expect(content).toContain('')
- })
-
test('removes only INSTRUCTIONS_PROMPT and SUBAGENT_SPAWN when under context limit', () => {
const messages: Message[] = [
createMessage('user', 'Hello'),
@@ -700,6 +674,7 @@ describe('context-pruner long message truncation', () => {
messages: Message[],
contextTokenCount: number,
maxContextLength: number,
+ budgets?: { assistantToolBudget?: number; userBudget?: number },
) => {
mockAgentState.messageHistory = messages
mockAgentState.contextTokenCount = contextTokenCount
@@ -712,7 +687,7 @@ describe('context-pruner long message truncation', () => {
const generator = contextPruner.handleSteps!({
agentState: mockAgentState,
logger: mockLogger,
- params: { maxContextLength },
+ params: { maxContextLength, ...budgets },
})
const results: any[] = []
let result = generator.next()
@@ -726,8 +701,8 @@ describe('context-pruner long message truncation', () => {
}
test('truncates very long user messages with 80-20 ratio', () => {
- // Create a message that exceeds 20k chars
- const longText = 'A'.repeat(25000)
+ // Create a message that exceeds the user message token limit (~13k tokens = ~39k chars)
+ const longText = 'A'.repeat(45000)
const messages = [
createMessage('user', longText),
createMessage('assistant', 'Got it'),
@@ -1118,6 +1093,7 @@ describe('context-pruner repeated compaction', () => {
messages: Message[],
contextTokenCount: number,
maxContextLength: number,
+ budgets?: { assistantToolBudget?: number; userBudget?: number },
) => {
mockAgentState.messageHistory = messages
mockAgentState.contextTokenCount = contextTokenCount
@@ -1130,7 +1106,7 @@ describe('context-pruner repeated compaction', () => {
const generator = contextPruner.handleSteps!({
agentState: mockAgentState,
logger: mockLogger,
- params: { maxContextLength },
+ params: { maxContextLength, ...budgets },
})
const results: any[] = []
let result = generator.next()
@@ -1208,6 +1184,135 @@ First assistant response
expect(summaryTagCount).toBe(1)
})
+ test('drops old entries each cycle when budgets are tight', () => {
+ const simulateCompaction = (
+ inputMessages: Message[],
+ budgets: { assistantToolBudget: number; userBudget: number },
+ ): Message => {
+ const result = runHandleSteps(inputMessages, 250000, 200000, budgets)
+ return result[0].input.messages[0]
+ }
+
+ const tightBudgets = { assistantToolBudget: 25, userBudget: 25 }
+
+ // === CYCLE 1: 3 pairs of messages, tight budgets drop the oldest ===
+ const cycle1Messages = [
+ createMessage('user', 'Cycle1-Request-A'),
+ createMessage('assistant', 'Cycle1-Response-A'),
+ createMessage('user', 'Cycle1-Request-B'),
+ createMessage('assistant', 'Cycle1-Response-B'),
+ createMessage('user', 'Cycle1-Request-C'),
+ createMessage('assistant', 'Cycle1-Response-C'),
+ ]
+ const summary1 = simulateCompaction(cycle1Messages, tightBudgets)
+ const summary1Text = (summary1.content[0] as { type: 'text'; text: string })
+ .text
+
+ // Most recent entries should survive
+ expect(summary1Text).toContain('Cycle1-Request-C')
+ expect(summary1Text).toContain('Cycle1-Response-C')
+ // Oldest entries should be dropped
+ expect(summary1Text).not.toContain('Cycle1-Request-A')
+ expect(summary1Text).not.toContain('Cycle1-Response-A')
+
+ // === CYCLE 2: Add new messages, compact again ===
+ const cycle2Messages = [
+ summary1,
+ createMessage('user', 'Cycle2-Request-D'),
+ createMessage('assistant', 'Cycle2-Response-D'),
+ ]
+ const summary2 = simulateCompaction(cycle2Messages, tightBudgets)
+ const summary2Text = (summary2.content[0] as { type: 'text'; text: string })
+ .text
+
+ // Newest entries from cycle 2 should survive
+ expect(summary2Text).toContain('Cycle2-Request-D')
+ expect(summary2Text).toContain('Cycle2-Response-D')
+ // Cycle 1's oldest survivors should now be dropped
+ expect(summary2Text).not.toContain('Cycle1-Request-A')
+ expect(summary2Text).not.toContain('Cycle1-Response-A')
+
+ // === CYCLE 3: Add more, compact again ===
+ const cycle3Messages = [
+ summary2,
+ createMessage('user', 'Cycle3-Request-E'),
+ createMessage('assistant', 'Cycle3-Response-E'),
+ ]
+ const summary3 = simulateCompaction(cycle3Messages, tightBudgets)
+ const summary3Text = (summary3.content[0] as { type: 'text'; text: string })
+ .text
+
+ // Newest entries from cycle 3 should survive
+ expect(summary3Text).toContain('Cycle3-Request-E')
+ expect(summary3Text).toContain('Cycle3-Response-E')
+ // Very old entries should definitely be gone
+ expect(summary3Text).not.toContain('Cycle1-Request-A')
+ expect(summary3Text).not.toContain('Cycle1-Response-A')
+
+ // Verify only one conversation_summary tag (no nesting)
+ const summaryTagCount = (
+ summary3Text.match(//g) || []
+ ).length
+ expect(summaryTagCount).toBe(1)
+ })
+
+ test('keeps multi-part tool entries grouped across compaction cycles', () => {
+ const simulateCompaction = (
+ inputMessages: Message[],
+ ): Message => {
+ const result = runHandleSteps(inputMessages, 250000, 200000)
+ return result[0].input.messages[0]
+ }
+
+ // Create a tool result that produces multiple entryParts:
+ // both an error AND a non-zero exit code
+ const cycle1Messages: Message[] = [
+ createMessage('user', 'Run tests'),
+ createToolCallMessage('call-1', 'run_terminal_command', {
+ command: 'npm test',
+ }),
+ createToolResultMessage('call-1', 'run_terminal_command', {
+ errorMessage: 'Test suite failed',
+ exitCode: 1,
+ }),
+ createMessage('user', 'Fix the tests'),
+ createMessage('assistant', 'I will fix them'),
+ ]
+
+ // Cycle 1: compact
+ const summary1 = simulateCompaction(cycle1Messages)
+ const summary1Text = (summary1.content[0] as { type: 'text'; text: string })
+ .text
+
+ // Both parts should be present in cycle 1
+ expect(summary1Text).toContain('[TOOL ERROR: run_terminal_command] Test suite failed')
+ expect(summary1Text).toContain('[COMMAND FAILED] Exit code: 1')
+
+ // Cycle 2: re-compact — the multi-part entry should stay as one entry
+ const cycle2Messages: Message[] = [
+ summary1,
+ createMessage('user', 'Try again'),
+ createMessage('assistant', 'Running tests again'),
+ ]
+ const summary2 = simulateCompaction(cycle2Messages)
+ const summary2Text = (summary2.content[0] as { type: 'text'; text: string })
+ .text
+
+ // Both parts should still be present together after re-compaction
+ expect(summary2Text).toContain('[TOOL ERROR: run_terminal_command] Test suite failed')
+ expect(summary2Text).toContain('[COMMAND FAILED] Exit code: 1')
+
+ // They should be within the same --- delimited chunk (not split apart)
+ const separator = '\n\n---\n\n'
+ const chunks = summary2Text
+ .replace(/[\s\S]*?\n\n/, '')
+ .replace(/<\/conversation_summary>[\s\S]*/, '')
+ .split(separator)
+ const errorChunk = chunks.find((c) => c.includes('[TOOL ERROR:'))
+ expect(errorChunk).toBeDefined()
+ expect(errorChunk).toContain('[COMMAND FAILED] Exit code: 1')
+ })
+
test('handles 3+ compaction cycles without nested PREVIOUS SUMMARY markers', () => {
// Helper to simulate running the context pruner and getting the output
const simulateCompaction = (inputMessages: Message[]): Message => {
@@ -1355,6 +1460,7 @@ describe('context-pruner threshold behavior', () => {
messages: Message[],
contextTokenCount: number,
maxContextLength: number,
+ budgets?: { assistantToolBudget?: number; userBudget?: number },
) => {
mockAgentState.messageHistory = messages
mockAgentState.contextTokenCount = contextTokenCount
@@ -1367,7 +1473,7 @@ describe('context-pruner threshold behavior', () => {
const generator = contextPruner.handleSteps!({
agentState: mockAgentState,
logger: mockLogger,
- params: { maxContextLength },
+ params: { maxContextLength, ...budgets },
})
const results: any[] = []
let result = generator.next()
@@ -1446,7 +1552,7 @@ describe('context-pruner str_replace and write_file tool results', () => {
return results
}
- test('includes str_replace diff in summary', () => {
+ test('includes str_replace result in summary', () => {
const messages = [
createMessage('user', 'Edit this file'),
createToolCallMessage('call-1', 'str_replace', {
@@ -1454,19 +1560,22 @@ describe('context-pruner str_replace and write_file tool results', () => {
replacements: [{ old: 'foo', new: 'bar' }],
}),
createToolResultMessage('call-1', 'str_replace', {
- diff: '--- a/src/utils.ts\n+++ b/src/utils.ts\n@@ -1,1 +1,1 @@\n-foo\n+bar',
+ file: 'src/utils.ts',
+ message: 'Updated file',
+ unifiedDiff: '--- a/src/utils.ts\n+++ b/src/utils.ts\n@@ -1,1 +1,1 @@\n-foo\n+bar',
}),
]
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[EDIT RESULT]')
+ expect(content).toContain('[EDIT RESULT: str_replace]')
+ expect(content).toContain('unifiedDiff')
expect(content).toContain('-foo')
expect(content).toContain('+bar')
})
- test('includes write_file diff in summary', () => {
+ test('includes write_file result in summary', () => {
const messages = [
createMessage('user', 'Create a new file'),
createToolCallMessage('call-1', 'write_file', {
@@ -1474,18 +1583,20 @@ describe('context-pruner str_replace and write_file tool results', () => {
content: 'export const hello = "world"',
}),
createToolResultMessage('call-1', 'write_file', {
- diff: '--- /dev/null\n+++ b/src/new-file.ts\n@@ -0,0 +1 @@\n+export const hello = "world"',
+ file: 'src/new-file.ts',
+ message: 'Created file',
+ unifiedDiff: '--- /dev/null\n+++ b/src/new-file.ts\n@@ -0,0 +1 @@\n+export const hello = "world"',
}),
]
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[WRITE RESULT]')
- expect(content).toContain('+export const hello = "world"')
+ expect(content).toContain('[EDIT RESULT: write_file]')
+ expect(content).toContain('export const hello')
})
- test('truncates very long str_replace diffs', () => {
+ test('truncates very long str_replace results', () => {
const longDiff = 'X'.repeat(3000)
const messages = [
createMessage('user', 'Make big changes'),
@@ -1494,20 +1605,65 @@ describe('context-pruner str_replace and write_file tool results', () => {
replacements: [],
}),
createToolResultMessage('call-1', 'str_replace', {
- diff: longDiff,
+ file: 'src/big-file.ts',
+ message: 'Updated file',
+ unifiedDiff: longDiff,
}),
]
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[EDIT RESULT]')
+ expect(content).toContain('[EDIT RESULT: str_replace]')
expect(content).toContain('...')
// Should not contain the full diff
expect(content).not.toContain(longDiff)
})
- test('does not include edit result when no diff is present', () => {
+ test('truncates very large tool entries to 5k token limit', () => {
+ // spawn_agents with multiple non-blacklisted agents producing large outputs
+ // Each agent output is capped at ~3,900 chars, but 5 agents × 3,900 = ~19,500 chars
+ // which exceeds the 5k token (15k char) TOOL_ENTRY_LIMIT
+ const largeAgentResults = Array.from({ length: 5 }, (_, i) => ({
+ agentType: `editor`,
+ value: {
+ type: 'string',
+ value: `AGENT_${i}_START_` + 'X'.repeat(4000) + `_AGENT_${i}_END`,
+ },
+ }))
+
+ const messages: Message[] = [
+ createMessage('user', 'Spawn many agents'),
+ createToolCallMessage('call-1', 'spawn_agents', {
+ agents: [
+ { agent_type: 'editor' },
+ { agent_type: 'editor' },
+ { agent_type: 'editor' },
+ { agent_type: 'editor' },
+ { agent_type: 'editor' },
+ ],
+ }),
+ {
+ role: 'tool',
+ toolCallId: 'call-1',
+ toolName: 'spawn_agents',
+ content: [{ type: 'json', value: largeAgentResults }],
+ } as ToolMessage,
+ ]
+
+ const results = runHandleSteps(messages)
+ const content = results[0].input.messages[0].content[0].text
+
+ // Should contain truncation notice from the TOOL_ENTRY_LIMIT cap
+ expect(content).toContain('[...truncated')
+ // The last agent's start marker should be cut by the overall entry cap
+ // (per-agent truncation only cuts within each agent's output, not across agents)
+ expect(content).not.toContain('AGENT_4_START_')
+ // The first agent's start should survive (80% prefix)
+ expect(content).toContain('AGENT_0_START_')
+ })
+
+ test('includes all result properties even without unifiedDiff', () => {
const messages = [
createMessage('user', 'Edit file'),
createToolCallMessage('call-1', 'str_replace', {
@@ -1515,16 +1671,19 @@ describe('context-pruner str_replace and write_file tool results', () => {
replacements: [],
}),
createToolResultMessage('call-1', 'str_replace', {
- success: true,
+ file: 'src/file.ts',
+ errorMessage: 'No match found for old string',
}),
]
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- // Should have the tool call summary but not the result
+ // Should have both the tool call summary and the full result
expect(content).toContain('Edited file: src/file.ts')
- expect(content).not.toContain('[EDIT RESULT]')
+ expect(content).toContain('[EDIT RESULT: str_replace]')
+ expect(content).toContain('errorMessage')
+ expect(content).toContain('No match found for old string')
})
})
@@ -1560,11 +1719,11 @@ describe('context-pruner glob and list_directory tools', () => {
return results
}
- test('summarizes glob tool with patterns', () => {
+ test('summarizes glob tool with pattern', () => {
const messages = [
createMessage('user', 'Find files'),
createToolCallMessage('call-1', 'glob', {
- patterns: [{ pattern: '*.ts' }, { pattern: '*.js' }],
+ pattern: '**/*.ts',
}),
createToolResultMessage('call-1', 'glob', { files: [] }),
]
@@ -1572,14 +1731,14 @@ describe('context-pruner glob and list_directory tools', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Glob: *.ts, *.js')
+ expect(content).toContain('Glob: **/*.ts')
})
- test('summarizes list_directory tool with paths', () => {
+ test('summarizes list_directory tool with path', () => {
const messages = [
createMessage('user', 'List directories'),
createToolCallMessage('call-1', 'list_directory', {
- directories: [{ path: 'src' }, { path: 'lib' }],
+ path: 'src',
}),
createToolResultMessage('call-1', 'list_directory', { entries: [] }),
]
@@ -1587,7 +1746,7 @@ describe('context-pruner glob and list_directory tools', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Listed dirs: src, lib')
+ expect(content).toContain('Listed dir: src')
})
test('summarizes read_subtree tool with paths', () => {
@@ -1605,3 +1764,597 @@ describe('context-pruner glob and list_directory tools', () => {
expect(content).toContain('Read subtree: src/components, src/utils')
})
})
+
+describe('context-pruner dual-budget behavior', () => {
+ let mockAgentState: AgentState
+
+ beforeEach(() => {
+ mockAgentState = createMockAgentState([], 0)
+ })
+
+ const runHandleSteps = (
+ messages: Message[],
+ contextTokenCount: number,
+ maxContextLength: number,
+ budgets?: { assistantToolBudget?: number; userBudget?: number },
+ ) => {
+ mockAgentState.messageHistory = messages
+ mockAgentState.contextTokenCount = contextTokenCount
+ const mockLogger = {
+ debug: () => {},
+ info: () => {},
+ warn: () => {},
+ error: () => {},
+ }
+ const generator = contextPruner.handleSteps!({
+ agentState: mockAgentState,
+ logger: mockLogger,
+ params: { maxContextLength, ...budgets },
+ })
+ const results: any[] = []
+ let result = generator.next()
+ while (!result.done) {
+ if (typeof result.value === 'object') {
+ results.push(result.value)
+ }
+ result = generator.next()
+ }
+ return results
+ }
+
+ test('includes recent messages in summary and drops older ones', () => {
+ const messages = [
+ createMessage('user', 'Old user message 1'),
+ createMessage('assistant', 'Old assistant response 1'),
+ createMessage('user', 'Old user message 2'),
+ createMessage('assistant', 'Old assistant response 2'),
+ createMessage('user', 'Recent user message'),
+ createMessage('assistant', 'Recent assistant response'),
+ ]
+
+ // Small budgets on summarized sizes: only the most recent entries fit
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 15,
+ userBudget: 15,
+ })
+
+ const resultMessages = results[0].input.messages
+
+ // Should be a single summary message (no verbatim messages)
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ expect(content).toContain('')
+
+ // Recent messages should be in the summary
+ expect(content).toContain('Recent user message')
+ expect(content).toContain('Recent assistant response')
+
+ // Older messages should be dropped entirely (not in summary)
+ expect(content).not.toContain('Old user message 1')
+ expect(content).not.toContain('Old assistant response 1')
+ expect(content).not.toContain('Old user message 2')
+ expect(content).not.toContain('Old assistant response 2')
+ })
+
+ test('summarizes all messages when they fit within budgets', () => {
+ const messages = [
+ createMessage('user', 'Hello'),
+ createMessage('assistant', 'Hi there!'),
+ createMessage('user', 'How are you?'),
+ createMessage('assistant', 'I am fine!'),
+ ]
+
+ // Large budgets: all messages fit in summary
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 20000,
+ userBudget: 50000,
+ })
+
+ const resultMessages = results[0].input.messages
+
+ // All messages summarized into one
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ expect(content).toContain('Hello')
+ expect(content).toContain('Hi there!')
+ expect(content).toContain('How are you?')
+ expect(content).toContain('I am fine!')
+ })
+
+ test('respects user budget separately from assistant+tool budget', () => {
+ const largeUserText = 'U'.repeat(600) // ~200 tokens
+ const messages = [
+ createMessage('user', largeUserText),
+ createMessage('assistant', 'Short response'),
+ createMessage('user', 'Recent short question'),
+ createMessage('assistant', 'Recent short answer'),
+ ]
+
+ // User budget small enough to exclude the large user message
+ // Assistant budget large enough to include all assistant messages
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 5000,
+ userBudget: 100,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ expect(content).toContain('')
+ // The large user message should be dropped (not in summary)
+ expect(content).not.toContain(largeUserText)
+ // Recent messages should be in the summary
+ expect(content).toContain('Recent short question')
+ expect(content).toContain('Recent short answer')
+ })
+
+ test('drops tool entries beyond budget at the cutoff boundary', () => {
+ const messages = [
+ createMessage('user', 'Old message'),
+ createToolCallMessage('call-1', 'read_files', { paths: ['old.ts'] }),
+ createToolResultMessage('call-1', 'read_files', { content: 'old file' }),
+ createMessage('user', 'Recent message'),
+ createMessage('assistant', 'Recent response'),
+ ]
+
+ // Budget that excludes the older tool call entry
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 15,
+ userBudget: 15,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+
+ // Recent messages should be in the summary
+ expect(content).toContain('Recent message')
+ expect(content).toContain('Recent response')
+
+ // Tool call summary should be dropped (beyond budget)
+ expect(content).not.toContain('old.ts')
+ })
+
+ test('counts tool result summaries against assistant+tool budget', () => {
+ // Use str_replace with a large result — this produces a summarized [EDIT RESULT] entry
+ const largeDiff = 'LARGE_DIFF_CONTENT_' + 'X'.repeat(900)
+ const messages = [
+ createMessage('user', 'Do something'),
+ createToolCallMessage('call-1', 'str_replace', { path: 'big.ts', replacements: [] }),
+ createToolResultMessage('call-1', 'str_replace', { file: 'big.ts', message: 'Updated', unifiedDiff: largeDiff }),
+ createMessage('user', 'Recent question'),
+ createMessage('assistant', 'Recent answer'),
+ ]
+
+ // Assistant budget too small for the large [EDIT RESULT] summary entry
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 100,
+ userBudget: 5000,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ expect(content).toContain('')
+ // Recent messages should be in the summary
+ expect(content).toContain('Recent question')
+ expect(content).toContain('Recent answer')
+ // Large edit result entry should be dropped (exceeds assistant+tool budget)
+ expect(content).not.toContain('LARGE_DIFF_CONTENT_')
+ })
+
+ test('drops older messages and includes recent ones in summary', () => {
+ const messages = [
+ createMessage('user', 'First request about feature A'),
+ createMessage('assistant', 'Working on feature A'),
+ createMessage('user', 'Second request about feature B'),
+ createMessage('assistant', 'Working on feature B'),
+ ]
+
+ // Budget only fits the last pair of summarized entries
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 15,
+ userBudget: 15,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ expect(content).toContain('')
+
+ // Recent messages should be in the summary
+ expect(content).toContain('Second request about feature B')
+ expect(content).toContain('Working on feature B')
+
+ // Older messages should be dropped
+ expect(content).not.toContain('First request about feature A')
+ expect(content).not.toContain('Working on feature A')
+ })
+
+ test('excludes STEP_PROMPT tagged messages from budget calculation', () => {
+ const largeStepPrompt = 'S'.repeat(900) // ~300 tokens
+ const messages: Message[] = [
+ createMessage('user', 'User request'),
+ createMessage('assistant', 'Assistant response'),
+ {
+ role: 'user',
+ content: [{ type: 'text', text: largeStepPrompt }],
+ tags: ['STEP_PROMPT'],
+ },
+ createMessage('user', 'Recent question'),
+ createMessage('assistant', 'Recent answer'),
+ ]
+
+ // Budget is small but the STEP_PROMPT should NOT count against it,
+ // so both real user messages and both assistant messages should fit
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 200,
+ userBudget: 200,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ // Both real messages should be in the summary
+ expect(content).toContain('User request')
+ expect(content).toContain('Assistant response')
+ expect(content).toContain('Recent question')
+ expect(content).toContain('Recent answer')
+ // STEP_PROMPT content should NOT be in the summary
+ expect(content).not.toContain(largeStepPrompt)
+ })
+
+ test('excludes SUBAGENT_SPAWN tagged messages from budget calculation', () => {
+ const messages: Message[] = [
+ createMessage('user', 'User request'),
+ createMessage('assistant', 'First response'),
+ {
+ role: 'assistant',
+ content: [{ type: 'text', text: 'A'.repeat(900) }],
+ tags: ['SUBAGENT_SPAWN'],
+ },
+ createMessage('user', 'Follow up'),
+ createMessage('assistant', 'Second response'),
+ ]
+
+ // Budget is small but SUBAGENT_SPAWN should NOT count against it
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 200,
+ userBudget: 200,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ expect(content).toContain('User request')
+ expect(content).toContain('First response')
+ expect(content).toContain('Follow up')
+ expect(content).toContain('Second response')
+ })
+
+ test('charges old summary entries against their correct budgets', () => {
+ // Previous summary with a large [USER] entry that exceeds user budget
+ const largeUserContent = 'X'.repeat(900)
+ const previousSummary: Message = {
+ role: 'user',
+ content: [
+ {
+ type: 'text',
+ text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\n${largeUserContent}\n\n---\n\n[ASSISTANT]\nOld assistant response\n`,
+ },
+ ],
+ }
+
+ const messages: Message[] = [
+ previousSummary,
+ createMessage('user', 'After summary request'),
+ createMessage('assistant', 'After summary response'),
+ ]
+
+ // User budget is small — the large [USER] entry from the old summary
+ // should be dropped because it exceeds the user budget.
+ // The [ASSISTANT] entry from the old summary charges against assistant budget.
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 5000,
+ userBudget: 50,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ // Recent messages should be in the summary
+ expect(content).toContain('After summary request')
+ expect(content).toContain('After summary response')
+ // The old [ASSISTANT] entry fits the assistant budget and is after the cutoff
+ expect(content).toContain('Old assistant response')
+ // The large old [USER] entry should be dropped (exceeded user budget)
+ expect(content).not.toContain(largeUserContent)
+ })
+
+ test('drops old summary entries individually based on budget walk', () => {
+ // Previous summary with identifiable oldest and middle entries
+ const previousSummary: Message = {
+ role: 'user',
+ content: [
+ {
+ type: 'text',
+ text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLDEST_USER_ENTRY\n\n---\n\n[ASSISTANT]\nOLDEST_ASSISTANT_ENTRY\n\n---\n\n[USER]\nMIDDLE_USER_ENTRY\n\n---\n\n[ASSISTANT]\nMIDDLE_ASSISTANT_ENTRY\n`,
+ },
+ ],
+ }
+
+ const messages: Message[] = [
+ previousSummary,
+ createMessage('user', 'Recent request'),
+ createMessage('assistant', 'Recent response'),
+ ]
+
+ // Budget large enough for middle + recent entries but not oldest
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 25,
+ userBudget: 25,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ // Middle and recent entries should survive
+ expect(content).toContain('MIDDLE_USER_ENTRY')
+ expect(content).toContain('MIDDLE_ASSISTANT_ENTRY')
+ expect(content).toContain('Recent request')
+ expect(content).toContain('Recent response')
+ // Oldest entries should be dropped
+ expect(content).not.toContain('OLDEST_USER_ENTRY')
+ expect(content).not.toContain('OLDEST_ASSISTANT_ENTRY')
+ })
+
+ test('handles complex scenario with long messages of all types and previous summary', () => {
+ // Previous summary with 4 identifiable entries
+ const previousSummary: Message = {
+ role: 'user',
+ content: [
+ {
+ type: 'text',
+ text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLD_USER_REQUEST_1: The user asked about setting up authentication with OAuth2 and JWT tokens for the API.\n\n---\n\n[ASSISTANT]\nOLD_ASSISTANT_RESPONSE_1: Explained OAuth2 flow and implemented JWT token generation.\nTools: Read files: src/auth.ts, src/middleware.ts; Edited file: src/auth.ts\n\n---\n\n[USER]\nOLD_USER_REQUEST_2: Asked for unit tests for the auth module.\n\n---\n\n[ASSISTANT]\nOLD_ASSISTANT_RESPONSE_2: Created comprehensive test suite for authentication.\nTools: Wrote file: src/__tests__/auth.test.ts\n`,
+ },
+ ],
+ }
+
+ // Long user message (~45k chars, exceeds USER_MESSAGE_LIMIT of 13k tokens = 39k chars)
+ // Middle marker placed ~85% through so it falls in the truncated gap
+ // (past the 80% prefix but before the 20% suffix)
+ const longUserMessage = 'LONG_USER_START_' + 'Here is a detailed specification for the new feature. '.repeat(650) + '_LONG_USER_MIDDLE_MARKER_' + 'Here is a detailed specification for the new feature. '.repeat(150)
+
+ // Long assistant message with text (~8k chars, exceeds ASSISTANT_MESSAGE_LIMIT of 1.3k tokens = 3.9k chars)
+ // plus multiple tool calls. Middle marker placed ~60% through so it falls in the truncated gap.
+ const longAssistantText = 'LONG_ASSISTANT_START_' + 'I will implement this step by step, starting with the data model changes. '.repeat(60) + '_LONG_ASST_MIDDLE_MARKER_' + 'I will implement this step by step, starting with the data model changes. '.repeat(40)
+ const assistantWithToolCalls: Message = {
+ role: 'assistant',
+ content: [
+ { type: 'text', text: longAssistantText },
+ {
+ type: 'tool-call',
+ toolCallId: 'call-1',
+ toolName: 'read_files',
+ input: { paths: ['src/model.ts', 'src/service.ts'] },
+ },
+ {
+ type: 'tool-call',
+ toolCallId: 'call-2',
+ toolName: 'str_replace',
+ input: { path: 'src/model.ts', replacements: [] },
+ },
+ {
+ type: 'tool-call',
+ toolCallId: 'call-3',
+ toolName: 'spawn_agents',
+ input: {
+ agents: [
+ { agent_type: 'editor' },
+ { agent_type: 'editor' },
+ { agent_type: 'editor' },
+ { agent_type: 'editor' },
+ { agent_type: 'editor' },
+ ],
+ },
+ },
+ ],
+ }
+
+ // str_replace result with a large diff (~3k chars, exceeds 2k truncation limit)
+ const largeDiff = 'DIFF_START_MARKER_' + '+added line\n'.repeat(250) + '_DIFF_END_MARKER'
+
+ // spawn_agents result with 5 non-blacklisted agents producing large outputs
+ // Each ~4k chars, total ~20k, exceeds TOOL_ENTRY_LIMIT of 5k tokens = 15k chars
+ const largeAgentResults = Array.from({ length: 5 }, (_, i) => ({
+ agentType: 'editor',
+ value: {
+ type: 'string',
+ value: `AGENT_${i}_OUTPUT_START_` + 'Implementation details. '.repeat(160) + `_AGENT_${i}_OUTPUT_END`,
+ },
+ }))
+
+ const messages: Message[] = [
+ previousSummary,
+ createMessage('user', longUserMessage),
+ assistantWithToolCalls,
+ createToolResultMessage('call-1', 'read_files', { content: 'file data' } as JSONValue),
+ createToolResultMessage('call-2', 'str_replace', { file: 'src/model.ts', message: 'Updated', unifiedDiff: largeDiff }),
+ {
+ role: 'tool',
+ toolCallId: 'call-3',
+ toolName: 'spawn_agents',
+ content: [{ type: 'json', value: largeAgentResults }],
+ } as ToolMessage,
+ createMessage('user', 'FINAL_USER_REQUEST: Now run the tests'),
+ createMessage('assistant', 'FINAL_ASSISTANT_RESPONSE: Running tests now'),
+ ]
+
+ // Use default budgets — everything should fit
+ const results = runHandleSteps(messages, 250000, 200000)
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+
+ // === Structure checks ===
+ expect(content).toContain('')
+ expect(content).toContain('')
+ const summaryTagCount = (content.match(//g) || []).length
+ expect(summaryTagCount).toBe(1)
+
+ // === Previous summary entries preserved ===
+ expect(content).toContain('OLD_USER_REQUEST_1')
+ expect(content).toContain('OLD_ASSISTANT_RESPONSE_1')
+ expect(content).toContain('OLD_USER_REQUEST_2')
+ expect(content).toContain('OLD_ASSISTANT_RESPONSE_2')
+
+ // === Long user message: truncated with 80/20 split ===
+ expect(content).toContain('LONG_USER_START_')
+ expect(content).not.toContain('_LONG_USER_MIDDLE_MARKER_') // Middle marker falls in truncated gap
+ expect(content).toContain('[...truncated')
+
+ // === Long assistant text: truncated ===
+ expect(content).toContain('LONG_ASSISTANT_START_')
+ expect(content).not.toContain('_LONG_ASST_MIDDLE_MARKER_') // Middle marker falls in truncated gap
+
+ // === Tool call summaries present ===
+ expect(content).toContain('Read files: src/model.ts, src/service.ts')
+ expect(content).toContain('Edited file: src/model.ts')
+ expect(content).toContain('Spawned agents:')
+
+ // === str_replace result: present but truncated at 2k chars ===
+ expect(content).toContain('[EDIT RESULT: str_replace]')
+ expect(content).toContain('DIFF_START_MARKER_')
+ expect(content).not.toContain('_DIFF_END_MARKER') // Truncated by 2k result limit
+
+ // === spawn_agents tool entry: truncated by TOOL_ENTRY_LIMIT ===
+ expect(content).toContain('AGENT_0_OUTPUT_START_') // First agent's start in 80% prefix
+ expect(content).not.toContain('AGENT_4_OUTPUT_START_') // Last agent's start falls in truncated gap
+
+ // === Final messages present ===
+ expect(content).toContain('FINAL_USER_REQUEST')
+ expect(content).toContain('FINAL_ASSISTANT_RESPONSE')
+
+ // === Entries are separated by --- ===
+ expect(content).toContain('---')
+ })
+
+ test('with tight budgets, drops old summary entries while keeping truncated new entries', () => {
+ // Same setup but with tight budgets: old summary entries get dropped,
+ // new entries survive (individually truncated)
+ const previousSummary: Message = {
+ role: 'user',
+ content: [
+ {
+ type: 'text',
+ text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLD_DROPPED_USER: ${'X'.repeat(600)}\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT: ${'Y'.repeat(600)}\n\n---\n\n[USER]\nOLD_DROPPED_USER_2: Asked about deployment\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT_2: Explained deployment process\n`,
+ },
+ ],
+ }
+
+ // Long user message (~12k chars, under truncation limit but uses significant budget)
+ const longUserMessage = 'SURVIVED_USER_START_' + 'Feature request details. '.repeat(400) + '_SURVIVED_USER_END'
+
+ // Assistant with tool calls
+ const assistantMsg: Message = {
+ role: 'assistant',
+ content: [
+ { type: 'text', text: 'SURVIVED_ASSISTANT: Working on it' },
+ {
+ type: 'tool-call',
+ toolCallId: 'call-1',
+ toolName: 'str_replace',
+ input: { path: 'src/app.ts', replacements: [] },
+ },
+ ],
+ }
+
+ // Tool result with a diff
+ const toolResult = createToolResultMessage('call-1', 'str_replace', {
+ file: 'src/app.ts',
+ message: 'Updated file',
+ unifiedDiff: '--- a/src/app.ts\n+++ b/src/app.ts\n@@ -1 +1 @@\n-old\n+SURVIVED_DIFF_CONTENT',
+ })
+
+ const messages: Message[] = [
+ previousSummary,
+ createMessage('user', longUserMessage),
+ assistantMsg,
+ toolResult,
+ createMessage('user', 'SURVIVED_FINAL_USER'),
+ createMessage('assistant', 'SURVIVED_FINAL_ASSISTANT'),
+ ]
+
+ // Tight budgets: enough for new entries but not old summary entries
+ // New assistant entries: ~25 (assistant text+tool) + ~56 (edit result JSON) + ~13 (final) = ~94 tokens
+ // Old assistant entries: ~20 for OLD_DROPPED_ASSISTANT_2 would push over budget of 100
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 100,
+ userBudget: 4200,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+
+ // === New entries survived ===
+ expect(content).toContain('SURVIVED_USER_START_')
+ expect(content).toContain('SURVIVED_ASSISTANT')
+ expect(content).toContain('SURVIVED_DIFF_CONTENT')
+ expect(content).toContain('SURVIVED_FINAL_USER')
+ expect(content).toContain('SURVIVED_FINAL_ASSISTANT')
+
+ // === Old summary entries dropped by budget walk ===
+ expect(content).not.toContain('OLD_DROPPED_USER:')
+ expect(content).not.toContain('OLD_DROPPED_ASSISTANT:')
+ expect(content).not.toContain('OLD_DROPPED_USER_2:')
+ expect(content).not.toContain('OLD_DROPPED_ASSISTANT_2:')
+ })
+
+ test('fully includes conversation summary when it fits within user budget', () => {
+ const previousSummary: Message = {
+ role: 'user',
+ content: [
+ {
+ type: 'text',
+ text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOld request about feature A\n\n---\n\n[ASSISTANT]\nWorked on feature A\n`,
+ },
+ ],
+ }
+
+ const messages: Message[] = [
+ previousSummary,
+ createMessage('user', 'New request about feature B'),
+ createMessage('assistant', 'Working on feature B'),
+ ]
+
+ // Large budget — everything fits
+ const results = runHandleSteps(messages, 250000, 200000, {
+ assistantToolBudget: 20000,
+ userBudget: 50000,
+ })
+
+ const resultMessages = results[0].input.messages
+ expect(resultMessages).toHaveLength(1)
+
+ const content = (resultMessages[0].content[0] as { text: string }).text
+ // Previous summary content should be fully included
+ expect(content).toContain('Old request about feature A')
+ expect(content).toContain('Worked on feature A')
+ // New messages should also be included
+ expect(content).toContain('New request about feature B')
+ expect(content).toContain('Working on feature B')
+ })
+})
diff --git a/agents/browser-use/browser-use.ts b/agents/browser-use/browser-use.ts
index 7b11db0f89..1536e3e361 100644
--- a/agents/browser-use/browser-use.ts
+++ b/agents/browser-use/browser-use.ts
@@ -127,7 +127,7 @@ const definition: AgentDefinition = {
mcpServers: {
'chrome-devtools': {
command: 'npx',
- args: ['-y', 'chrome-devtools-mcp@latest', '--headless'],
+ args: ['-y', 'chrome-devtools-mcp@latest', '--headless', '--isolated'],
},
},
diff --git a/agents/context-pruner.ts b/agents/context-pruner.ts
index bbf495baa1..55b1dd6bf7 100644
--- a/agents/context-pruner.ts
+++ b/agents/context-pruner.ts
@@ -10,259 +10,6 @@ import type {
UserMessage,
} from './types/util-types'
-// =============================================================================
-// Helper Functions (exported for testing)
-// =============================================================================
-
-/**
- * Truncates long text with 80% from the beginning and 20% from the end.
- * Preserves context from both ends of the text while indicating what was removed.
- *
- * @param text - The text to truncate
- * @param limit - Maximum character length
- * @returns Truncated text with notice of how many chars were removed
- */
-export function truncateLongText(text: string, limit: number): string {
- if (text.length <= limit) {
- return text
- }
- const availableChars = limit - 50 // 50 chars for the truncation notice
- const prefixLength = Math.floor(availableChars * 0.8)
- const suffixLength = availableChars - prefixLength
- const prefix = text.slice(0, prefixLength)
- const suffix = text.slice(-suffixLength)
- const truncatedChars = text.length - prefixLength - suffixLength
- return `${prefix}\n\n[...truncated ${truncatedChars} chars...]\n\n${suffix}`
-}
-
-/**
- * Estimates token count from a JSON-serializable object.
- * Uses a simple heuristic of ~3 characters per token.
- *
- * @param obj - The object to estimate tokens for
- * @returns Estimated token count
- */
-export function estimateTokens(obj: unknown): number {
- return Math.ceil(JSON.stringify(obj).length / 3)
-}
-
-/**
- * Extracts text content from a message, handling both string and array formats.
- *
- * @param message - The message to extract text from
- * @returns Combined text content from the message
- */
-export function getTextContent(message: Message): string {
- if (typeof message.content === 'string') {
- return message.content
- }
- if (Array.isArray(message.content)) {
- return message.content
- .filter(
- (part: Record) =>
- part.type === 'text' && typeof part.text === 'string',
- )
- .map((part: Record) => part.text as string)
- .join('\n')
- }
- return ''
-}
-
-/**
- * Summarizes a tool call into a human-readable description.
- * Handles various tool types with appropriate formatting.
- *
- * @param toolName - The name of the tool
- * @param input - The tool's input parameters
- * @returns A concise summary of the tool call
- */
-export function summarizeToolCall(
- toolName: string,
- input: Record,
-): string {
- switch (toolName) {
- case 'read_files': {
- const paths = input.paths as string[] | undefined
- if (paths && paths.length > 0) {
- return `Read files: ${paths.join(', ')}`
- }
- return 'Read files'
- }
- case 'write_file': {
- const path = input.path as string | undefined
- return path ? `Wrote file: ${path}` : 'Wrote file'
- }
- case 'str_replace': {
- const path = input.path as string | undefined
- return path ? `Edited file: ${path}` : 'Edited file'
- }
- case 'propose_write_file': {
- const path = input.path as string | undefined
- return path ? `Proposed write to: ${path}` : 'Proposed file write'
- }
- case 'propose_str_replace': {
- const path = input.path as string | undefined
- return path ? `Proposed edit to: ${path}` : 'Proposed file edit'
- }
- case 'read_subtree': {
- const paths = input.paths as string[] | undefined
- if (paths && paths.length > 0) {
- return `Read subtree: ${paths.join(', ')}`
- }
- return 'Read subtree'
- }
- case 'code_search': {
- const pattern = input.pattern as string | undefined
- const flags = input.flags as string | undefined
- if (pattern && flags) {
- return `Code search: "${pattern}" (${flags})`
- }
- return pattern ? `Code search: "${pattern}"` : 'Code search'
- }
- case 'glob': {
- const patterns = input.patterns as
- | Array<{ pattern: string }>
- | undefined
- if (patterns && patterns.length > 0) {
- return `Glob: ${patterns.map((p) => p.pattern).join(', ')}`
- }
- return 'Glob search'
- }
- case 'list_directory': {
- const directories = input.directories as
- | Array<{ path: string }>
- | undefined
- if (directories && directories.length > 0) {
- return `Listed dirs: ${directories.map((d) => d.path).join(', ')}`
- }
- return 'Listed directory'
- }
- case 'find_files': {
- const pattern = input.pattern as string | undefined
- return pattern ? `Find files: "${pattern}"` : 'Find files'
- }
- case 'run_terminal_command': {
- const command = input.command as string | undefined
- if (command) {
- const shortCmd =
- command.length > 50 ? command.slice(0, 50) + '...' : command
- return `Ran command: ${shortCmd}`
- }
- return 'Ran terminal command'
- }
- case 'spawn_agents':
- case 'spawn_agent_inline': {
- const agents = input.agents as
- | Array<{
- agent_type: string
- prompt?: string
- params?: Record
- }>
- | undefined
- const agentType = input.agent_type as string | undefined
- const prompt = input.prompt as string | undefined
- const agentParams = input.params as
- | Record
- | undefined
-
- if (agents && agents.length > 0) {
- const agentDetails = agents.map((a) => {
- let detail = a.agent_type
- const extras: string[] = []
- if (a.prompt) {
- const truncatedPrompt =
- a.prompt.length > 1000
- ? a.prompt.slice(0, 1000) + '...'
- : a.prompt
- extras.push(`prompt: "${truncatedPrompt}"`)
- }
- if (a.params && Object.keys(a.params).length > 0) {
- const paramsStr = JSON.stringify(a.params)
- const truncatedParams =
- paramsStr.length > 1000
- ? paramsStr.slice(0, 1000) + '...'
- : paramsStr
- extras.push(`params: ${truncatedParams}`)
- }
- if (extras.length > 0) {
- detail += ` (${extras.join(', ')})`
- }
- return detail
- })
- return `Spawned agents:\n${agentDetails.map((d) => `- ${d}`).join('\n')}`
- }
- if (agentType) {
- const extras: string[] = []
- if (prompt) {
- const truncatedPrompt =
- prompt.length > 1000 ? prompt.slice(0, 1000) + '...' : prompt
- extras.push(`prompt: "${truncatedPrompt}"`)
- }
- if (agentParams && Object.keys(agentParams).length > 0) {
- const paramsStr = JSON.stringify(agentParams)
- const truncatedParams =
- paramsStr.length > 1000
- ? paramsStr.slice(0, 1000) + '...'
- : paramsStr
- extras.push(`params: ${truncatedParams}`)
- }
- if (extras.length > 0) {
- return `Spawned agent: ${agentType} (${extras.join(', ')})`
- }
- return `Spawned agent: ${agentType}`
- }
- return 'Spawned agent(s)'
- }
- case 'write_todos': {
- const todos = input.todos as
- | Array<{ task: string; completed: boolean }>
- | undefined
- if (todos) {
- const completed = todos.filter((t) => t.completed).length
- const incomplete = todos.filter((t) => !t.completed)
- if (incomplete.length === 0) {
- return `Todos: ${completed}/${todos.length} complete (all done!)`
- }
- const remainingTasks = incomplete
- .map((t) => `- ${t.task}`)
- .join('\n')
- return `Todos: ${completed}/${todos.length} complete. Remaining:\n${remainingTasks}`
- }
- return 'Updated todos'
- }
- case 'ask_user': {
- const questions = input.questions as
- | Array<{ question: string }>
- | undefined
- if (questions && questions.length > 0) {
- const questionTexts = questions.map((q) => q.question).join('; ')
- const truncated =
- questionTexts.length > 200
- ? questionTexts.slice(0, 200) + '...'
- : questionTexts
- return `Asked user: ${truncated}`
- }
- return 'Asked user question'
- }
- case 'suggest_followups':
- return 'Suggested followups'
- case 'web_search': {
- const query = input.query as string | undefined
- return query ? `Web search: "${query}"` : 'Web search'
- }
- case 'read_docs': {
- const query = input.query as string | undefined
- return query ? `Read docs: "${query}"` : 'Read docs'
- }
- case 'set_output':
- return 'Set output'
- case 'set_messages':
- return 'Set messages'
- default:
- return `Used tool: ${toolName}`
- }
-}
-
const definition: AgentDefinition = {
id: 'context-pruner',
publisher,
@@ -278,6 +25,12 @@ const definition: AgentDefinition = {
maxContextLength: {
type: 'number',
},
+ assistantToolBudget: {
+ type: 'number',
+ },
+ userBudget: {
+ type: 'number',
+ },
},
required: [],
},
@@ -291,9 +44,6 @@ const definition: AgentDefinition = {
// Constants (must be inside handleSteps since it's serialized to a string)
// =============================================================================
- /** Target: summarized messages should be at most 10% of max context */
- const TARGET_SUMMARY_FACTOR = 0.1
-
/** Agent IDs whose output should be excluded from spawn_agents results */
const SPAWN_AGENTS_OUTPUT_BLACKLIST = [
'file-picker',
@@ -302,11 +52,27 @@ const definition: AgentDefinition = {
'basher',
'code-reviewer',
'code-reviewer-multi-prompt',
+ 'librarian',
+ 'tmux-cli',
+ 'browser-use',
]
- /** Limits for truncating long messages (chars) */
- const USER_MESSAGE_LIMIT = 15000
- const ASSISTANT_MESSAGE_LIMIT = 4000
+ /** Limits for truncating long messages in the summary (estimated tokens) */
+ const USER_MESSAGE_LIMIT = 13_000
+ const ASSISTANT_MESSAGE_LIMIT = 1_300
+ const TOOL_ENTRY_LIMIT = 5_000
+
+ /** Approximate characters per token (matches estimateTokens heuristic) */
+ const CHARS_PER_TOKEN = 3
+
+ /** Token budget for assistant + tool content in the conversation summary */
+ const ASSISTANT_TOOL_BUDGET = 20_000
+
+ /** Token budget for user content in the conversation summary */
+ const USER_BUDGET = 50_000
+
+ /** Fudge factor for token count threshold to trigger pruning earlier */
+ const TOKEN_COUNT_FUDGE_FACTOR = 1_000
/** Prompt cache expiry time (Anthropic caches for 5 minutes) */
const CACHE_EXPIRY_MS = 5 * 60 * 1000
@@ -315,8 +81,6 @@ const definition: AgentDefinition = {
const SUMMARY_HEADER =
'This is a summary of the conversation so far. The original messages have been condensed to save context space.'
- /** Fudge factor for token count threshold to trigger pruning earlier */
- const TOKEN_COUNT_FUDGE_FACTOR = 1000
// =============================================================================
// Helper Functions (must be inside handleSteps since it's serialized to a string)
@@ -338,13 +102,6 @@ const definition: AgentDefinition = {
return `${prefix}\n\n[...truncated ${truncatedChars} chars...]\n\n${suffix}`
}
- /**
- * Estimates token count from a JSON-serializable object.
- */
- function estimateTokens(obj: unknown): number {
- return Math.ceil(JSON.stringify(obj).length / 3)
- }
-
/**
* Extracts text content from a message.
*/
@@ -411,22 +168,12 @@ const definition: AgentDefinition = {
return pattern ? `Code search: "${pattern}"` : 'Code search'
}
case 'glob': {
- const patterns = input.patterns as
- | Array<{ pattern: string }>
- | undefined
- if (patterns && patterns.length > 0) {
- return `Glob: ${patterns.map((p) => p.pattern).join(', ')}`
- }
- return 'Glob search'
+ const pattern = input.pattern as string | undefined
+ return pattern ? `Glob: ${pattern}` : 'Glob search'
}
case 'list_directory': {
- const directories = input.directories as
- | Array<{ path: string }>
- | undefined
- if (directories && directories.length > 0) {
- return `Listed dirs: ${directories.map((d) => d.path).join(', ')}`
- }
- return 'Listed directory'
+ const path = input.path as string | undefined
+ return path ? `Listed dir: ${path}` : 'Listed directory'
}
case 'find_files': {
const pattern = input.pattern as string | undefined
@@ -627,69 +374,80 @@ const definition: AgentDefinition = {
}
// === SUMMARIZATION STRATEGY ===
- // Convert entire conversation to a single summarized user message
- // If there's already a summary from a previous compaction, extract and preserve it
+ // 1. Summarize ALL messages (apply transformations: truncation, tool summaries, etc.)
+ // 2. Walk backwards through summarized parts to apply token budgets
+ // 3. Older summarized parts beyond the budgets are dropped
- // Check for existing conversation summary and extract its content
- let previousSummary = ''
- for (const message of currentMessages) {
- if (message.role === 'user' && Array.isArray(message.content)) {
- for (const part of message.content) {
- if (part.type === 'text' && typeof part.text === 'string') {
- const text = part.text as string
- const summaryMatch = text.match(
- /([\s\S]*?)<\/conversation_summary>/,
- )
- if (summaryMatch) {
- let summaryContent = summaryMatch[1].trim()
- // Remove the standard header if present
- if (summaryContent.startsWith(SUMMARY_HEADER)) {
- summaryContent = summaryContent
- .slice(SUMMARY_HEADER.length)
- .trim()
- }
- // Remove [PREVIOUS SUMMARY] prefix if present (from earlier compaction)
- // to avoid nested markers
- if (summaryContent.startsWith('[PREVIOUS SUMMARY]')) {
- summaryContent = summaryContent
- .slice('[PREVIOUS SUMMARY]'.length)
- .trim()
- }
- previousSummary = summaryContent
- }
- }
- }
+ const assistantToolBudget: number = params?.assistantToolBudget ?? ASSISTANT_TOOL_BUDGET
+ const userBudget: number = params?.userBudget ?? USER_BUDGET
+
+ function shouldExcludeMessage(message: Message): boolean {
+ if (message.tags?.includes('INSTRUCTIONS_PROMPT')) return true
+ if (message.tags?.includes('STEP_PROMPT')) return true
+ if (message.tags?.includes('SUBAGENT_SPAWN')) return true
+ return false
+ }
+
+ function isConversationSummary(message: Message): boolean {
+ if (message.role !== 'user') return false
+ return getTextContent(message).includes('')
+ }
+
+ function extractSummaryContent(message: Message): string {
+ const text = getTextContent(message)
+ const match = text.match(
+ /([\s\S]*?)<\/conversation_summary>/,
+ )
+ if (!match) return ''
+ let content = match[1].trim()
+ if (content.startsWith(SUMMARY_HEADER)) {
+ content = content.slice(SUMMARY_HEADER.length).trim()
}
+ return content
}
- // Filter out messages that are previous summaries or have special tags to exclude
- const messagesWithoutOldSummaries = currentMessages.filter((message) => {
- // Exclude messages with special tags that shouldn't be in the summary
- if (message.tags?.includes('INSTRUCTIONS_PROMPT')) return false
- if (message.tags?.includes('STEP_PROMPT')) return false
- if (message.tags?.includes('SUBAGENT_SPAWN')) return false
-
- // Exclude previous conversation summaries
- if (message.role === 'user' && Array.isArray(message.content)) {
- for (const part of message.content) {
- if (part.type === 'text' && typeof part.text === 'string') {
- if ((part.text as string).includes('')) {
- return false
- }
- }
+ /**
+ * Parses a previous summary text blob into role-tagged entries.
+ * Splits on the --- separator and determines each chunk's role
+ * based on its prefix marker.
+ */
+ function parseSummaryIntoEntries(
+ summaryText: string,
+ ): Array<{ role: 'user' | 'assistant_tool'; parts: string[] }> {
+ if (!summaryText.trim()) return []
+
+ const separator = '\n\n---\n\n'
+ const chunks = summaryText.split(separator).filter((c) => c.trim())
+
+ return chunks.map((chunk) => {
+ const trimmed = chunk.trim()
+ const isUser =
+ trimmed.startsWith('[USER]\n') ||
+ trimmed.startsWith('[USER] [with image')
+ return {
+ role: isUser ? ('user' as const) : ('assistant_tool' as const),
+ parts: [trimmed],
}
+ })
+ }
+
+ // Extract previous summary content from all messages
+ let previousSummaryContent = ''
+ for (const message of currentMessages) {
+ if (isConversationSummary(message)) {
+ previousSummaryContent = extractSummaryContent(message)
}
- return true
- })
+ }
- // Build the summary
- const summaryParts: string[] = []
+ // Filter out excluded and conversation summary messages for summarization
+ const messagesToSummarize = currentMessages.filter(
+ (message) => !shouldExcludeMessage(message) && !isConversationSummary(message),
+ )
// Find the last user message with images to preserve in the final output
- // We preserve the most recent user's images since they're likely the most relevant
let lastUserImageParts: Array> = []
- for (let i = messagesWithoutOldSummaries.length - 1; i >= 0; i--) {
- const msg = messagesWithoutOldSummaries[i]
+ for (let i = messagesToSummarize.length - 1; i >= 0; i--) {
+ const msg = messagesToSummarize[i]
if (msg.role === 'user' && Array.isArray(msg.content)) {
const imageParts = msg.content.filter(
(part: Record) =>
@@ -702,18 +460,14 @@ const definition: AgentDefinition = {
}
}
- // If there was a previous summary, include it first (no marker needed, already chronological)
- if (previousSummary) {
- summaryParts.push(previousSummary)
- }
+ // Phase 1: Summarize ALL messages into tagged entries
+ const summarizedEntries: Array<{ role: 'user' | 'assistant_tool'; parts: string[] }> = []
- for (const message of messagesWithoutOldSummaries) {
+ for (const message of messagesToSummarize) {
if (message.role === 'user') {
let text = getTextContent(message).trim()
if (text) {
- // Truncate very long user messages (80% prefix, 20% suffix)
- text = truncateLongText(text, USER_MESSAGE_LIMIT)
- // Check for images in the message
+ text = truncateLongText(text, USER_MESSAGE_LIMIT * CHARS_PER_TOKEN)
let hasImages = false
if (Array.isArray(message.content)) {
hasImages = message.content.some(
@@ -722,7 +476,10 @@ const definition: AgentDefinition = {
)
}
const imageNote = hasImages ? ' [with image(s)]' : ''
- summaryParts.push(`[USER]${imageNote}\n${text}`)
+ summarizedEntries.push({
+ role: 'user',
+ parts: [`[USER]${imageNote}\n${text}`],
+ })
}
} else if (message.role === 'assistant') {
const textParts: string[] = []
@@ -731,7 +488,6 @@ const definition: AgentDefinition = {
if (Array.isArray(message.content)) {
for (const part of message.content) {
if (part.type === 'text' && typeof part.text === 'string') {
- // Remove tags and their contents before summarizing
const textWithoutThinkTags = (part.text as string)
.replace(/[\s\S]*?<\/think>/g, '')
.trim()
@@ -748,9 +504,8 @@ const definition: AgentDefinition = {
const parts: string[] = []
if (textParts.length > 0) {
- // Truncate very long assistant text (80% prefix, 20% suffix)
let combinedText = textParts.join('\n')
- combinedText = truncateLongText(combinedText, ASSISTANT_MESSAGE_LIMIT)
+ combinedText = truncateLongText(combinedText, ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN)
parts.push(combinedText)
}
if (toolSummaries.length > 0) {
@@ -758,44 +513,43 @@ const definition: AgentDefinition = {
}
if (parts.length > 0) {
- summaryParts.push(`[ASSISTANT]\n${parts.join('\n')}`)
+ summarizedEntries.push({
+ role: 'assistant_tool',
+ parts: [`[ASSISTANT]\n${parts.join('\n')}`],
+ })
}
} else if (message.role === 'tool') {
- // Tool results are already captured via the tool-call summaries
- // But we capture errors, terminal exit codes, and ask_user answers
const toolMessage = message as ToolMessage
+ const entryParts: string[] = []
+
if (Array.isArray(toolMessage.content)) {
for (const part of toolMessage.content) {
if (part.type === 'json' && part.value) {
const value = part.value as Record
- // Capture errors
if (value.errorMessage || value.error) {
let errorText = String(value.errorMessage || value.error)
- // Truncate long error messages to 100 chars
if (errorText.length > 100) {
errorText = errorText.slice(0, 100) + '...'
}
- summaryParts.push(
+ entryParts.push(
`[TOOL ERROR: ${toolMessage.toolName}] ${errorText}`,
)
}
- // Capture terminal command exit codes (non-zero = failure)
if (
toolMessage.toolName === 'run_terminal_command' &&
'exitCode' in value
) {
const exitCode = value.exitCode as number
if (exitCode !== 0) {
- summaryParts.push(`[COMMAND FAILED] Exit code: ${exitCode}`)
+ entryParts.push(`[COMMAND FAILED] Exit code: ${exitCode}`)
}
}
- // Capture ask_user answers or skipped
if (toolMessage.toolName === 'ask_user') {
if (value.skipped) {
- summaryParts.push('[USER SKIPPED QUESTION]')
+ entryParts.push('[USER SKIPPED QUESTION]')
} else if ('answers' in value) {
const answers = value.answers as
| Array<{
@@ -814,43 +568,34 @@ const definition: AgentDefinition = {
return '(no answer)'
})
.join('; ')
- // Truncate long answers to 10,000 chars
const truncated =
answerTexts.length > 10_000
? answerTexts.slice(0, 10_000) + '...'
: answerTexts
- summaryParts.push(`[USER ANSWERED] ${truncated}`)
+ entryParts.push(`[USER ANSWERED] ${truncated}`)
}
}
}
- // Capture str_replace results (diff of changes made)
- if (toolMessage.toolName === 'str_replace') {
- const diff = value.diff as string | undefined
- if (diff) {
- // Truncate long diffs to 2000 chars
- const truncatedDiff =
- diff.length > 2000 ? diff.slice(0, 2000) + '...' : diff
- summaryParts.push(`[EDIT RESULT]\n${truncatedDiff}`)
- }
- }
-
- // Capture write_file results (diff of changes made)
- if (toolMessage.toolName === 'write_file') {
- const diff = value.diff as string | undefined
- if (diff) {
- // Truncate long diffs to 2000 chars
- const truncatedDiff =
- diff.length > 2000 ? diff.slice(0, 2000) + '...' : diff
- summaryParts.push(`[WRITE RESULT]\n${truncatedDiff}`)
- }
+ if (
+ toolMessage.toolName === 'str_replace' ||
+ toolMessage.toolName === 'propose_str_replace' ||
+ toolMessage.toolName === 'write_file' ||
+ toolMessage.toolName === 'propose_write_file'
+ ) {
+ const resultStr = JSON.stringify(value)
+ const truncatedResult =
+ resultStr.length > 2000
+ ? resultStr.slice(0, 2000) + '...'
+ : resultStr
+ entryParts.push(
+ `[EDIT RESULT: ${toolMessage.toolName}]\n${truncatedResult}`,
+ )
}
}
}
}
- // Capture spawn_agents results (excluding blacklisted agents)
- // The tool result value is an array of agent results at the top level
if (
toolMessage.toolName === 'spawn_agents' &&
Array.isArray(toolMessage.content)
@@ -873,72 +618,88 @@ const definition: AgentDefinition = {
if (includedResults.length > 0) {
const resultSummaries = includedResults.map((r) => {
let outputStr = ''
- // Extract the actual output from value.value (e.g., lastMessage content)
if (r.value?.value !== undefined && r.value?.value !== null) {
if (typeof r.value.value === 'string') {
outputStr = r.value.value
} else {
outputStr = JSON.stringify(r.value.value)
}
- // Remove tags and their contents to save context tokens
outputStr = outputStr
.replace(/[\s\S]*?<\/think>/g, '')
.trim()
- // Truncate long outputs to ASSISTANT_MESSAGE_LIMIT chars
- if (outputStr.length > ASSISTANT_MESSAGE_LIMIT) {
+ if (outputStr.length > ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) {
outputStr =
- outputStr.slice(0, ASSISTANT_MESSAGE_LIMIT) + '...'
+ outputStr.slice(0, ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) + '...'
}
}
return `- ${r.agentType}: ${outputStr || '(no output)'}`
})
- summaryParts.push(
+ entryParts.push(
`[AGENT RESULTS]\n${resultSummaries.join('\n')}`,
)
}
}
}
}
+
+ if (entryParts.length > 0) {
+ const joinedToolEntry = truncateLongText(
+ entryParts.join('\n\n'),
+ TOOL_ENTRY_LIMIT * CHARS_PER_TOKEN,
+ )
+ summarizedEntries.push({
+ role: 'assistant_tool',
+ parts: [joinedToolEntry],
+ })
+ }
}
}
- let summaryText = summaryParts.join('\n\n---\n\n')
-
- // Calculate target size (10% of max context, for messages only)
- const targetTokens = maxContextLength * TARGET_SUMMARY_FACTOR
- let summaryTokens = estimateTokens(summaryText)
+ // Parse previous summary into role-tagged entries and combine with new entries
+ const allEntries = [
+ ...parseSummaryIntoEntries(previousSummaryContent),
+ ...summarizedEntries,
+ ]
- // If summary is too big, truncate from the beginning
- if (summaryTokens > targetTokens) {
- const truncationMessage =
- '[CONVERSATION TRUNCATED - Earlier messages omitted due to length]\n\n'
- const truncationTokens = estimateTokens(truncationMessage)
- const availableTokens = targetTokens - truncationTokens
+ // Phase 2: Walk backwards through all entries to apply token budgets
+ let assistantToolTokens = 0
+ let userTokens = 0
+ let cutoffIndex = 0
- // Estimate characters to keep (rough: 3 chars per token)
- const charsToKeep = Math.floor(availableTokens * 3)
+ for (let i = allEntries.length - 1; i >= 0; i--) {
+ const entry = allEntries[i]
+ const entryText = entry.parts.join('\n\n---\n\n')
+ const entryTokens = Math.ceil(entryText.length / CHARS_PER_TOKEN)
- if (charsToKeep > 0 && charsToKeep < summaryText.length) {
- // Truncate from the beginning, try to find a clean break point
- const truncatedText = summaryText.slice(-charsToKeep)
- // Find the first separator to make a clean cut
- const separatorIndex = truncatedText.indexOf('\n\n---\n\n')
- if (
- separatorIndex !== -1 &&
- separatorIndex < truncatedText.length / 2
- ) {
- summaryText =
- truncationMessage +
- truncatedText.slice(separatorIndex + '\n\n---\n\n'.length)
- } else {
- summaryText = truncationMessage + truncatedText
+ if (entry.role === 'user') {
+ if (userTokens + entryTokens > userBudget) {
+ cutoffIndex = i + 1
+ break
+ }
+ userTokens += entryTokens
+ } else {
+ if (assistantToolTokens + entryTokens > assistantToolBudget) {
+ cutoffIndex = i + 1
+ break
}
- } else if (charsToKeep <= 0) {
- summaryText =
- truncationMessage + '[Summary too large - content omitted]'
+ assistantToolTokens += entryTokens
}
}
+ // Phase 3: Build final summary from included entries
+ const summaryParts: string[] = []
+
+ for (let i = cutoffIndex; i < allEntries.length; i++) {
+ summaryParts.push(...allEntries[i].parts)
+ }
+
+ // Fallback: if nothing fit within budgets, always include at least the newest entry
+ if (summaryParts.length === 0 && allEntries.length > 0) {
+ summaryParts.push(...allEntries[allEntries.length - 1].parts)
+ }
+
+ const summaryText = summaryParts.join('\n\n---\n\n')
+
// Create the summarized message with fresh sentAt timestamp
// Include any images from the last user message that had images
const now = Date.now()
diff --git a/cli/release/package.json b/cli/release/package.json
index f51779ae8b..e737956880 100644
--- a/cli/release/package.json
+++ b/cli/release/package.json
@@ -1,6 +1,6 @@
{
"name": "codebuff",
- "version": "1.0.631",
+ "version": "1.0.633",
"description": "AI coding agent",
"license": "MIT",
"bin": {
diff --git a/cli/src/commands/__tests__/router-input.test.ts b/cli/src/commands/__tests__/router-input.test.ts
index ac1310a795..653063abbc 100644
--- a/cli/src/commands/__tests__/router-input.test.ts
+++ b/cli/src/commands/__tests__/router-input.test.ts
@@ -372,22 +372,16 @@ describe('command-registry', () => {
}
})
- test('connect slash command presence matches feature flag', () => {
- const { CHATGPT_OAUTH_ENABLED } = require('@codebuff/common/constants/chatgpt-oauth')
+ test('connect command is not available in codebuff (freebuff-only)', () => {
const hasConnectSlashCommand = SLASH_COMMANDS.some(
(cmd) => cmd.id === 'connect',
)
- expect(hasConnectSlashCommand).toBe(CHATGPT_OAUTH_ENABLED)
+ expect(hasConnectSlashCommand).toBe(false)
})
- test('connect:chatgpt command registry availability matches feature flag', () => {
- const { CHATGPT_OAUTH_ENABLED } = require('@codebuff/common/constants/chatgpt-oauth')
+ test('connect:chatgpt command is not available in codebuff (freebuff-only)', () => {
const command = findCommand('connect:chatgpt')
- if (CHATGPT_OAUTH_ENABLED) {
- expect(command).toBeDefined()
- } else {
- expect(command).toBeUndefined()
- }
+ expect(command).toBeUndefined()
})
})
})
diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts
index b5b81d5800..69b8857b2e 100644
--- a/cli/src/commands/command-registry.ts
+++ b/cli/src/commands/command-registry.ts
@@ -179,6 +179,7 @@ const FREEBUFF_REMOVED_COMMANDS = new Set([
])
const FREEBUFF_ONLY_COMMANDS = new Set([
+ 'connect',
'plan',
])
diff --git a/cli/src/components/help-banner.tsx b/cli/src/components/help-banner.tsx
index 0e0ee17007..ccf39bdf82 100644
--- a/cli/src/components/help-banner.tsx
+++ b/cli/src/components/help-banner.tsx
@@ -38,6 +38,7 @@ export const HelpBanner = () => {
const theme = useTheme()
const { data: subscriptionData } = useSubscriptionQuery()
const hasSubscription = subscriptionData?.hasSubscription ?? false
+ const chatGptOAuth = getChatGptOAuthStatus()
// Auto-hide after timeout
React.useEffect(() => {
@@ -79,11 +80,16 @@ export const HelpBanner = () => {
Tips
- {IS_FREEBUFF && !getChatGptOAuthStatus().connected && (
+ {IS_FREEBUFF && !chatGptOAuth.connected && (
Connect via /connect to unlock /plan & /review
)}
+ {IS_FREEBUFF && chatGptOAuth.connected && (
+
+ Try workflow: /interview → /plan → implement → /review
+
+ )}
Use @ to reference agents to spawn or files to read
diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts
index 50dd90f0d2..6893640516 100644
--- a/cli/src/data/slash-commands.ts
+++ b/cli/src/data/slash-commands.ts
@@ -47,6 +47,7 @@ const FREEBUFF_REMOVED_COMMAND_IDS = new Set([
])
const FREEBUFF_ONLY_COMMAND_IDS = new Set([
+ 'connect',
'plan',
])
diff --git a/cli/src/hooks/use-auth-state.ts b/cli/src/hooks/use-auth-state.ts
index e800b3355f..5f5ef29d01 100644
--- a/cli/src/hooks/use-auth-state.ts
+++ b/cli/src/hooks/use-auth-state.ts
@@ -6,6 +6,7 @@ import { useLoginStore } from '../state/login-store'
import { identifyUser, trackEvent } from '../utils/analytics'
import { getUserCredentials } from '../utils/auth'
import { resetCodebuffClient } from '../utils/codebuff-client'
+import { IS_FREEBUFF } from '../utils/constants'
import { loggerContext } from '../utils/logger'
import type { MultilineInputHandle } from '../components/multiline-input'
@@ -14,7 +15,7 @@ import type { User } from '../utils/auth'
const setAuthLoggerContext = (params: { userId: string; email: string }) => {
loggerContext.userId = params.userId
loggerContext.userEmail = params.email
- identifyUser(params.userId, { email: params.email })
+ identifyUser(params.userId, { email: params.email, freebuff: IS_FREEBUFF })
}
const clearAuthLoggerContext = () => {
diff --git a/cli/src/index.tsx b/cli/src/index.tsx
index 62579dba34..7f2e3de77c 100644
--- a/cli/src/index.tsx
+++ b/cli/src/index.tsx
@@ -23,7 +23,7 @@ import { handlePublish } from './commands/publish'
import { runPlainLogin } from './login/plain-login'
import { initializeApp } from './init/init-app'
import { getProjectRoot, setProjectRoot } from './project-files'
-import { initAnalytics, trackEvent } from './utils/analytics'
+import { trackEvent } from './utils/analytics'
import { getAuthToken, getAuthTokenDetails } from './utils/auth'
import { resetCodebuffClient } from './utils/codebuff-client'
import { setApiClientAuthToken } from './utils/codebuff-api'
@@ -66,7 +66,7 @@ function loadPackageVersion(): string {
// Without this, refetchInterval won't work because TanStack Query thinks the app is "unfocused"
focusManager.setEventListener(() => {
// No-op: no event listeners in CLI environment (no window focus/visibility events)
- return () => {}
+ return () => { }
})
focusManager.setFocused(true)
@@ -222,26 +222,17 @@ async function main(): Promise {
const startCwd = process.cwd()
const showProjectPicker = shouldShowProjectPicker(startCwd, homeDir)
- // Initialize analytics early, before anything that might use the logger
- // (the logger calls trackEvent, which throws if analytics isn't initialized)
- try {
- initAnalytics()
-
- // Track app launch event
- trackEvent(AnalyticsEvent.APP_LAUNCHED, {
- version: loadPackageVersion(),
- platform: process.platform,
- arch: process.arch,
- hasInitialPrompt: Boolean(initialPrompt),
- hasAgentOverride: hasAgentOverride,
- continueChat,
- initialMode: initialMode ?? 'DEFAULT',
- isFreeBuff: IS_FREEBUFF,
- })
- } catch (error) {
- // Analytics initialization is optional - don't fail the app if it errors
- logger.debug(error, 'Failed to initialize analytics')
- }
+ // Requires analytics to be initialized, which is done in initializeApp
+ trackEvent(AnalyticsEvent.APP_LAUNCHED, {
+ version: loadPackageVersion(),
+ platform: process.platform,
+ arch: process.arch,
+ hasInitialPrompt: Boolean(initialPrompt),
+ hasAgentOverride: hasAgentOverride,
+ continueChat,
+ initialMode: initialMode ?? 'DEFAULT',
+ isFreeBuff: IS_FREEBUFF,
+ })
// Initialize agent registry (loads user agents via SDK).
// When --agent is provided, skip local .agents to avoid overrides.
diff --git a/cli/src/init/init-app.ts b/cli/src/init/init-app.ts
index 133c3ca181..1b8ae41efa 100644
--- a/cli/src/init/init-app.ts
+++ b/cli/src/init/init-app.ts
@@ -12,6 +12,7 @@ import { initializeThemeStore } from '../hooks/use-theme'
import { setProjectRoot } from '../project-files'
import { initTimestampFormatter } from '../utils/helpers'
import { enableManualThemeRefresh } from '../utils/theme-system'
+import { initAnalytics } from '../utils/analytics'
import { initializeDirenv } from './init-direnv'
export async function initializeApp(params: { cwd?: string }): Promise {
@@ -21,6 +22,14 @@ export async function initializeApp(params: { cwd?: string }): Promise {
const baseCwd = process.cwd()
setProjectRoot(baseCwd)
+ // Initialize analytics before direnv, because direnv uses the logger
+ // which calls trackEvent — analytics must be ready first.
+ try {
+ initAnalytics()
+ } catch (error) {
+ console.debug('Failed to initialize analytics:', error)
+ }
+
// Initialize direnv environment before anything else
initializeDirenv()
diff --git a/common/src/analytics.ts b/common/src/analytics.ts
index 46965bd17d..ea88cf7e59 100644
--- a/common/src/analytics.ts
+++ b/common/src/analytics.ts
@@ -3,6 +3,7 @@ import { env, DEBUG_ANALYTICS } from '@codebuff/common/env'
import { createPostHogClient, type AnalyticsClient } from './analytics-core'
import { AnalyticsEvent } from './constants/analytics-events'
+import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
import type { Logger } from '@codebuff/common/types/contracts/logger'
let client: AnalyticsClient | undefined
@@ -32,6 +33,18 @@ export async function flushAnalytics(logger?: Logger) {
}
}
+export function withDefaultProperties(
+ trackEventFn: TrackEventFn,
+ defaultProperties: Record,
+): TrackEventFn {
+ return (params) => {
+ trackEventFn({
+ ...params,
+ properties: { ...defaultProperties, ...params.properties },
+ })
+ }
+}
+
export function trackEvent({
event,
userId,
diff --git a/common/src/tools/params/tool/set-output.ts b/common/src/tools/params/tool/set-output.ts
index d9a69ea5da..1171f63dc3 100644
--- a/common/src/tools/params/tool/set-output.ts
+++ b/common/src/tools/params/tool/set-output.ts
@@ -6,6 +6,21 @@ import type { $ToolParams } from '../../constants'
const toolName = 'set_output'
const endsAgentStep = false
+
+// WHY `data` EXISTS IN THE INPUT SCHEMA:
+// Subagents inherit their parent's tool definitions, and because of prompt caching
+// we cannot modify or add tools mid-conversation. OpenAI models enforce the tool's
+// input schema strictly, so we need a permissive shape that any model can call.
+// An empty schema or `z.object({}).passthrough()` would be rejected by OpenAI's
+// strict schema enforcement. The `data: z.record(...)` field is a deliberately
+// vague shape that satisfies OpenAI while allowing us to inject the real
+// outputSchema later in the conversation (in the instructions prompt).
+//
+// At runtime, the handler (`packages/agent-runtime/src/tools/handlers/tool/set-output.ts`)
+// tries parsing against the real outputSchema in two ways:
+// 1. Parse the raw output (agent passed fields at top level)
+// 2. Fallback: parse `output.data` (agent wrapped fields in `data`)
+// This means both `{ results: [...] }` and `{ data: { results: [...] } }` are accepted.
const inputSchema = z
.looseObject({
data: z.record(z.string(), z.any()).optional(),
diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json
index 25a1e24696..d29c729fc8 100644
--- a/freebuff/cli/release/package.json
+++ b/freebuff/cli/release/package.json
@@ -1,6 +1,6 @@
{
"name": "freebuff",
- "version": "0.0.20",
+ "version": "0.0.21",
"description": "The world's strongest free coding agent",
"license": "MIT",
"bin": {
diff --git a/packages/agent-runtime/src/templates/strings.ts b/packages/agent-runtime/src/templates/strings.ts
index 313c20b220..6ac005a151 100644
--- a/packages/agent-runtime/src/templates/strings.ts
+++ b/packages/agent-runtime/src/templates/strings.ts
@@ -226,7 +226,7 @@ export async function getAgentPrompt(
if (outputSchema) {
addendum += '\n\n## Output Schema\n\n'
addendum +=
- 'When using the set_output tool, your output must conform to this schema:\n\n'
+ 'When using the set_output tool, your output must conform to this schema. You may pass the fields either directly as top-level parameters or inside a `data` field — both are accepted.\n\n'
addendum += '```json\n'
try {
// Convert Zod schema to JSON schema for display
diff --git a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts
index 2def7b1d51..8dec297118 100644
--- a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts
+++ b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts
@@ -52,13 +52,24 @@ export const handleSetOutput = (async (params: {
agentTemplate.outputSchema.parse(data)
finalOutput = data
} catch (error2) {
- const errorMessage = `Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: ${error}`
+ // Show whichever error has fewer issues — that represents the "closer" parse
+ // attempt and gives the agent more actionable feedback for retrying.
+ const issues1 = getZodIssueCount(error)
+ const issues2 = getZodIssueCount(error2)
+ const usedData = issues2 < issues1
+ const bestError = usedData ? error2 : error
+ const prefix = usedData
+ ? 'Output validation error: Your output was found inside the `data` field but still failed validation. Please fix the issues and try again without wrapping in `data`. Issues: '
+ : 'Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: '
+ const errorMessage = `${prefix}${bestError}`
logger.error(
{
output,
agentType: agentState.agentType,
agentId: agentState.agentId,
- error,
+ topLevelError: error,
+ dataFieldError: error2,
+ usedDataFieldError: usedData,
},
'set_output validation error',
)
@@ -78,3 +89,15 @@ export const handleSetOutput = (async (params: {
return { output: jsonToolResult({ message: 'Output set' }) }
}) satisfies CodebuffToolHandlerFunction
+
+function getZodIssueCount(error: unknown): number {
+ if (
+ error != null &&
+ typeof error === 'object' &&
+ 'issues' in error &&
+ Array.isArray((error as { issues: unknown }).issues)
+ ) {
+ return (error as { issues: unknown[] }).issues.length
+ }
+ return Infinity
+}
diff --git a/packages/billing/src/balance-calculator.ts b/packages/billing/src/balance-calculator.ts
index 7a96617128..1a2439f66a 100644
--- a/packages/billing/src/balance-calculator.ts
+++ b/packages/billing/src/balance-calculator.ts
@@ -536,6 +536,7 @@ export async function consumeCreditsAndAddAgentStep(params: {
cacheReadInputTokens: number
reasoningTokens: number | null
outputTokens: number
+ ttftMs: number | null
logger: Logger
}): Promise> {
@@ -561,6 +562,7 @@ export async function consumeCreditsAndAddAgentStep(params: {
cacheReadInputTokens,
reasoningTokens,
outputTokens,
+ ttftMs,
logger,
} = params
@@ -650,6 +652,7 @@ export async function consumeCreditsAndAddAgentStep(params: {
credits,
byok,
latency_ms: latencyMs,
+ ttft_ms: ttftMs,
user_id: userId,
})
} catch (error) {
diff --git a/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql b/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql
new file mode 100644
index 0000000000..77648859f6
--- /dev/null
+++ b/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql
@@ -0,0 +1 @@
+ALTER TABLE "message" ADD COLUMN "ttft_ms" integer;
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/0042_snapshot.json b/packages/internal/src/db/migrations/meta/0042_snapshot.json
new file mode 100644
index 0000000000..abb7dceabe
--- /dev/null
+++ b/packages/internal/src/db/migrations/meta/0042_snapshot.json
@@ -0,0 +1,3078 @@
+{
+ "id": "c7772899-6ae6-4a07-890e-a1ca64dc6e61",
+ "prevId": "db3b93eb-3ed2-4468-80d1-0d082f4cecbd",
+ "version": "7",
+ "dialect": "postgresql",
+ "tables": {
+ "public.account": {
+ "name": "account",
+ "schema": "",
+ "columns": {
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "provider": {
+ "name": "provider",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "providerAccountId": {
+ "name": "providerAccountId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "refresh_token": {
+ "name": "refresh_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "access_token": {
+ "name": "access_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "token_type": {
+ "name": "token_type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "scope": {
+ "name": "scope",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "id_token": {
+ "name": "id_token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "session_state": {
+ "name": "session_state",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "account_userId_user_id_fk": {
+ "name": "account_userId_user_id_fk",
+ "tableFrom": "account",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "account_provider_providerAccountId_pk": {
+ "name": "account_provider_providerAccountId_pk",
+ "columns": [
+ "provider",
+ "providerAccountId"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.ad_impression": {
+ "name": "ad_impression",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "ad_text": {
+ "name": "ad_text",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "title": {
+ "name": "title",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "cta": {
+ "name": "cta",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "''"
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "favicon": {
+ "name": "favicon",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "click_url": {
+ "name": "click_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "imp_url": {
+ "name": "imp_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "payout": {
+ "name": "payout",
+ "type": "numeric(10, 6)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "credits_granted": {
+ "name": "credits_granted",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "grant_operation_id": {
+ "name": "grant_operation_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "served_at": {
+ "name": "served_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "impression_fired_at": {
+ "name": "impression_fired_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "clicked_at": {
+ "name": "clicked_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "idx_ad_impression_user": {
+ "name": "idx_ad_impression_user",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "served_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_ad_impression_imp_url": {
+ "name": "idx_ad_impression_imp_url",
+ "columns": [
+ {
+ "expression": "imp_url",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "ad_impression_user_id_user_id_fk": {
+ "name": "ad_impression_user_id_user_id_fk",
+ "tableFrom": "ad_impression",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "ad_impression_imp_url_unique": {
+ "name": "ad_impression_imp_url_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "imp_url"
+ ]
+ }
+ },
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.agent_config": {
+ "name": "agent_config",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "version": {
+ "name": "version",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "publisher_id": {
+ "name": "publisher_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "major": {
+ "name": "major",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)",
+ "type": "stored"
+ }
+ },
+ "minor": {
+ "name": "minor",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)",
+ "type": "stored"
+ }
+ },
+ "patch": {
+ "name": "patch",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)",
+ "type": "stored"
+ }
+ },
+ "data": {
+ "name": "data",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "idx_agent_config_publisher": {
+ "name": "idx_agent_config_publisher",
+ "columns": [
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "agent_config_publisher_id_publisher_id_fk": {
+ "name": "agent_config_publisher_id_publisher_id_fk",
+ "tableFrom": "agent_config",
+ "tableTo": "publisher",
+ "columnsFrom": [
+ "publisher_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "agent_config_publisher_id_id_version_pk": {
+ "name": "agent_config_publisher_id_id_version_pk",
+ "columns": [
+ "publisher_id",
+ "id",
+ "version"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.agent_run": {
+ "name": "agent_run",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "agent_id": {
+ "name": "agent_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "publisher_id": {
+ "name": "publisher_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '/', 1)\n ELSE NULL\n END",
+ "type": "stored"
+ }
+ },
+ "agent_name": {
+ "name": "agent_name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n ELSE agent_id\n END",
+ "type": "stored"
+ }
+ },
+ "agent_version": {
+ "name": "agent_version",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '@', 2)\n ELSE NULL\n END",
+ "type": "stored"
+ }
+ },
+ "ancestor_run_ids": {
+ "name": "ancestor_run_ids",
+ "type": "text[]",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "root_run_id": {
+ "name": "root_run_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END",
+ "type": "stored"
+ }
+ },
+ "parent_run_id": {
+ "name": "parent_run_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END",
+ "type": "stored"
+ }
+ },
+ "depth": {
+ "name": "depth",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)",
+ "type": "stored"
+ }
+ },
+ "duration_ms": {
+ "name": "duration_ms",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+ "type": "stored"
+ }
+ },
+ "total_steps": {
+ "name": "total_steps",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "default": 0
+ },
+ "direct_credits": {
+ "name": "direct_credits",
+ "type": "numeric(10, 6)",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "'0'"
+ },
+ "total_credits": {
+ "name": "total_credits",
+ "type": "numeric(10, 6)",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "'0'"
+ },
+ "status": {
+ "name": "status",
+ "type": "agent_run_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'running'"
+ },
+ "error_message": {
+ "name": "error_message",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "completed_at": {
+ "name": "completed_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "idx_agent_run_user_id": {
+ "name": "idx_agent_run_user_id",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_parent": {
+ "name": "idx_agent_run_parent",
+ "columns": [
+ {
+ "expression": "parent_run_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_root": {
+ "name": "idx_agent_run_root",
+ "columns": [
+ {
+ "expression": "root_run_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_agent_id": {
+ "name": "idx_agent_run_agent_id",
+ "columns": [
+ {
+ "expression": "agent_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_publisher": {
+ "name": "idx_agent_run_publisher",
+ "columns": [
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_status": {
+ "name": "idx_agent_run_status",
+ "columns": [
+ {
+ "expression": "status",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'running'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_ancestors_gin": {
+ "name": "idx_agent_run_ancestors_gin",
+ "columns": [
+ {
+ "expression": "ancestor_run_ids",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "gin",
+ "with": {}
+ },
+ "idx_agent_run_completed_publisher_agent": {
+ "name": "idx_agent_run_completed_publisher_agent",
+ "columns": [
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "agent_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'completed'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_completed_recent": {
+ "name": "idx_agent_run_completed_recent",
+ "columns": [
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "agent_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'completed'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_completed_version": {
+ "name": "idx_agent_run_completed_version",
+ "columns": [
+ {
+ "expression": "publisher_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "agent_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "agent_version",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'completed'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_run_completed_user": {
+ "name": "idx_agent_run_completed_user",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"agent_run\".\"status\" = 'completed'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "agent_run_user_id_user_id_fk": {
+ "name": "agent_run_user_id_user_id_fk",
+ "tableFrom": "agent_run",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.agent_step": {
+ "name": "agent_step",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "agent_run_id": {
+ "name": "agent_run_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "step_number": {
+ "name": "step_number",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "duration_ms": {
+ "name": "duration_ms",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer",
+ "type": "stored"
+ }
+ },
+ "credits": {
+ "name": "credits",
+ "type": "numeric(10, 6)",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'0'"
+ },
+ "child_run_ids": {
+ "name": "child_run_ids",
+ "type": "text[]",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "spawned_count": {
+ "name": "spawned_count",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "array_length(child_run_ids, 1)",
+ "type": "stored"
+ }
+ },
+ "message_id": {
+ "name": "message_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "status": {
+ "name": "status",
+ "type": "agent_step_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'completed'"
+ },
+ "error_message": {
+ "name": "error_message",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "completed_at": {
+ "name": "completed_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "unique_step_number_per_run": {
+ "name": "unique_step_number_per_run",
+ "columns": [
+ {
+ "expression": "agent_run_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "step_number",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_step_run_id": {
+ "name": "idx_agent_step_run_id",
+ "columns": [
+ {
+ "expression": "agent_run_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_agent_step_children_gin": {
+ "name": "idx_agent_step_children_gin",
+ "columns": [
+ {
+ "expression": "child_run_ids",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "gin",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "agent_step_agent_run_id_agent_run_id_fk": {
+ "name": "agent_step_agent_run_id_agent_run_id_fk",
+ "tableFrom": "agent_step",
+ "tableTo": "agent_run",
+ "columnsFrom": [
+ "agent_run_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.credit_ledger": {
+ "name": "credit_ledger",
+ "schema": "",
+ "columns": {
+ "operation_id": {
+ "name": "operation_id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "principal": {
+ "name": "principal",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "balance": {
+ "name": "balance",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "type": {
+ "name": "type",
+ "type": "grant_type",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "priority": {
+ "name": "priority",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "stripe_subscription_id": {
+ "name": "stripe_subscription_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "idx_credit_ledger_active_balance": {
+ "name": "idx_credit_ledger_active_balance",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "balance",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "expires_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "priority",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_credit_ledger_org": {
+ "name": "idx_credit_ledger_org",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_credit_ledger_subscription": {
+ "name": "idx_credit_ledger_subscription",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "type",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "credit_ledger_user_id_user_id_fk": {
+ "name": "credit_ledger_user_id_user_id_fk",
+ "tableFrom": "credit_ledger",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "credit_ledger_org_id_org_id_fk": {
+ "name": "credit_ledger_org_id_org_id_fk",
+ "tableFrom": "credit_ledger",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.encrypted_api_keys": {
+ "name": "encrypted_api_keys",
+ "schema": "",
+ "columns": {
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "type": {
+ "name": "type",
+ "type": "api_key_type",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "api_key": {
+ "name": "api_key",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "encrypted_api_keys_user_id_user_id_fk": {
+ "name": "encrypted_api_keys_user_id_user_id_fk",
+ "tableFrom": "encrypted_api_keys",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "encrypted_api_keys_user_id_type_pk": {
+ "name": "encrypted_api_keys_user_id_type_pk",
+ "columns": [
+ "user_id",
+ "type"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.fingerprint": {
+ "name": "fingerprint",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "sig_hash": {
+ "name": "sig_hash",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.git_eval_results": {
+ "name": "git_eval_results",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "cost_mode": {
+ "name": "cost_mode",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "reasoner_model": {
+ "name": "reasoner_model",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "agent_model": {
+ "name": "agent_model",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "metadata": {
+ "name": "metadata",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "cost": {
+ "name": "cost",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "is_public": {
+ "name": "is_public",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.limit_override": {
+ "name": "limit_override",
+ "schema": "",
+ "columns": {
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "credits_per_block": {
+ "name": "credits_per_block",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "block_duration_hours": {
+ "name": "block_duration_hours",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "weekly_credit_limit": {
+ "name": "weekly_credit_limit",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "limit_override_user_id_user_id_fk": {
+ "name": "limit_override_user_id_user_id_fk",
+ "tableFrom": "limit_override",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.message": {
+ "name": "message",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "finished_at": {
+ "name": "finished_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "client_id": {
+ "name": "client_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "client_request_id": {
+ "name": "client_request_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "model": {
+ "name": "model",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "agent_id": {
+ "name": "agent_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "request": {
+ "name": "request",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "last_message": {
+ "name": "last_message",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false,
+ "generated": {
+ "as": "\"message\".\"request\" -> -1",
+ "type": "stored"
+ }
+ },
+ "reasoning_text": {
+ "name": "reasoning_text",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "response": {
+ "name": "response",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "input_tokens": {
+ "name": "input_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "cache_creation_input_tokens": {
+ "name": "cache_creation_input_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "cache_read_input_tokens": {
+ "name": "cache_read_input_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "reasoning_tokens": {
+ "name": "reasoning_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "output_tokens": {
+ "name": "output_tokens",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "cost": {
+ "name": "cost",
+ "type": "numeric(100, 20)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "credits": {
+ "name": "credits",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "byok": {
+ "name": "byok",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "latency_ms": {
+ "name": "latency_ms",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "ttft_ms": {
+ "name": "ttft_ms",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "repo_url": {
+ "name": "repo_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "message_user_id_idx": {
+ "name": "message_user_id_idx",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "message_finished_at_user_id_idx": {
+ "name": "message_finished_at_user_id_idx",
+ "columns": [
+ {
+ "expression": "finished_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "message_org_id_idx": {
+ "name": "message_org_id_idx",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "message_org_id_finished_at_idx": {
+ "name": "message_org_id_finished_at_idx",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "finished_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "message_user_id_user_id_fk": {
+ "name": "message_user_id_user_id_fk",
+ "tableFrom": "message",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "message_org_id_org_id_fk": {
+ "name": "message_org_id_org_id_fk",
+ "tableFrom": "message",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org": {
+ "name": "org",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "slug": {
+ "name": "slug",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "owner_id": {
+ "name": "owner_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "stripe_customer_id": {
+ "name": "stripe_customer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "stripe_subscription_id": {
+ "name": "stripe_subscription_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "current_period_start": {
+ "name": "current_period_start",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "current_period_end": {
+ "name": "current_period_end",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "auto_topup_enabled": {
+ "name": "auto_topup_enabled",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "auto_topup_threshold": {
+ "name": "auto_topup_threshold",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "auto_topup_amount": {
+ "name": "auto_topup_amount",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "credit_limit": {
+ "name": "credit_limit",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "billing_alerts": {
+ "name": "billing_alerts",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": true
+ },
+ "usage_alerts": {
+ "name": "usage_alerts",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": true
+ },
+ "weekly_reports": {
+ "name": "weekly_reports",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "org_owner_id_user_id_fk": {
+ "name": "org_owner_id_user_id_fk",
+ "tableFrom": "org",
+ "tableTo": "user",
+ "columnsFrom": [
+ "owner_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "org_slug_unique": {
+ "name": "org_slug_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "slug"
+ ]
+ },
+ "org_stripe_customer_id_unique": {
+ "name": "org_stripe_customer_id_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "stripe_customer_id"
+ ]
+ }
+ },
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org_feature": {
+ "name": "org_feature",
+ "schema": "",
+ "columns": {
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "feature": {
+ "name": "feature",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "config": {
+ "name": "config",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "is_active": {
+ "name": "is_active",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "idx_org_feature_active": {
+ "name": "idx_org_feature_active",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "is_active",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "org_feature_org_id_org_id_fk": {
+ "name": "org_feature_org_id_org_id_fk",
+ "tableFrom": "org_feature",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "org_feature_org_id_feature_pk": {
+ "name": "org_feature_org_id_feature_pk",
+ "columns": [
+ "org_id",
+ "feature"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org_invite": {
+ "name": "org_invite",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "role": {
+ "name": "role",
+ "type": "org_role",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "token": {
+ "name": "token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "invited_by": {
+ "name": "invited_by",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "expires_at": {
+ "name": "expires_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "accepted_at": {
+ "name": "accepted_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "accepted_by": {
+ "name": "accepted_by",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "idx_org_invite_token": {
+ "name": "idx_org_invite_token",
+ "columns": [
+ {
+ "expression": "token",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_org_invite_email": {
+ "name": "idx_org_invite_email",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "email",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_org_invite_expires": {
+ "name": "idx_org_invite_expires",
+ "columns": [
+ {
+ "expression": "expires_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "org_invite_org_id_org_id_fk": {
+ "name": "org_invite_org_id_org_id_fk",
+ "tableFrom": "org_invite",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "org_invite_invited_by_user_id_fk": {
+ "name": "org_invite_invited_by_user_id_fk",
+ "tableFrom": "org_invite",
+ "tableTo": "user",
+ "columnsFrom": [
+ "invited_by"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "org_invite_accepted_by_user_id_fk": {
+ "name": "org_invite_accepted_by_user_id_fk",
+ "tableFrom": "org_invite",
+ "tableTo": "user",
+ "columnsFrom": [
+ "accepted_by"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "org_invite_token_unique": {
+ "name": "org_invite_token_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "token"
+ ]
+ }
+ },
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org_member": {
+ "name": "org_member",
+ "schema": "",
+ "columns": {
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "role": {
+ "name": "role",
+ "type": "org_role",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "joined_at": {
+ "name": "joined_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "org_member_org_id_org_id_fk": {
+ "name": "org_member_org_id_org_id_fk",
+ "tableFrom": "org_member",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "org_member_user_id_user_id_fk": {
+ "name": "org_member_user_id_user_id_fk",
+ "tableFrom": "org_member",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "org_member_org_id_user_id_pk": {
+ "name": "org_member_org_id_user_id_pk",
+ "columns": [
+ "org_id",
+ "user_id"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.org_repo": {
+ "name": "org_repo",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "repo_url": {
+ "name": "repo_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "repo_name": {
+ "name": "repo_name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "repo_owner": {
+ "name": "repo_owner",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "approved_by": {
+ "name": "approved_by",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "approved_at": {
+ "name": "approved_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "is_active": {
+ "name": "is_active",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": true
+ }
+ },
+ "indexes": {
+ "idx_org_repo_active": {
+ "name": "idx_org_repo_active",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "is_active",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_org_repo_unique": {
+ "name": "idx_org_repo_unique",
+ "columns": [
+ {
+ "expression": "org_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "repo_url",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "org_repo_org_id_org_id_fk": {
+ "name": "org_repo_org_id_org_id_fk",
+ "tableFrom": "org_repo",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "org_repo_approved_by_user_id_fk": {
+ "name": "org_repo_approved_by_user_id_fk",
+ "tableFrom": "org_repo",
+ "tableTo": "user",
+ "columnsFrom": [
+ "approved_by"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.publisher": {
+ "name": "publisher",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "verified": {
+ "name": "verified",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "bio": {
+ "name": "bio",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "avatar_url": {
+ "name": "avatar_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "org_id": {
+ "name": "org_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_by": {
+ "name": "created_by",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "publisher_user_id_user_id_fk": {
+ "name": "publisher_user_id_user_id_fk",
+ "tableFrom": "publisher",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "publisher_org_id_org_id_fk": {
+ "name": "publisher_org_id_org_id_fk",
+ "tableFrom": "publisher",
+ "tableTo": "org",
+ "columnsFrom": [
+ "org_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "publisher_created_by_user_id_fk": {
+ "name": "publisher_created_by_user_id_fk",
+ "tableFrom": "publisher",
+ "tableTo": "user",
+ "columnsFrom": [
+ "created_by"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {
+ "publisher_single_owner": {
+ "name": "publisher_single_owner",
+ "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)"
+ }
+ },
+ "isRLSEnabled": false
+ },
+ "public.referral": {
+ "name": "referral",
+ "schema": "",
+ "columns": {
+ "referrer_id": {
+ "name": "referrer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "referred_id": {
+ "name": "referred_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "status": {
+ "name": "status",
+ "type": "referral_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'pending'"
+ },
+ "credits": {
+ "name": "credits",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "is_legacy": {
+ "name": "is_legacy",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "completed_at": {
+ "name": "completed_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "referral_referrer_id_user_id_fk": {
+ "name": "referral_referrer_id_user_id_fk",
+ "tableFrom": "referral",
+ "tableTo": "user",
+ "columnsFrom": [
+ "referrer_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "referral_referred_id_user_id_fk": {
+ "name": "referral_referred_id_user_id_fk",
+ "tableFrom": "referral",
+ "tableTo": "user",
+ "columnsFrom": [
+ "referred_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {
+ "referral_referrer_id_referred_id_pk": {
+ "name": "referral_referrer_id_referred_id_pk",
+ "columns": [
+ "referrer_id",
+ "referred_id"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.session": {
+ "name": "session",
+ "schema": "",
+ "columns": {
+ "sessionToken": {
+ "name": "sessionToken",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "userId": {
+ "name": "userId",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "expires": {
+ "name": "expires",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "fingerprint_id": {
+ "name": "fingerprint_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "type": {
+ "name": "type",
+ "type": "session_type",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'web'"
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {
+ "session_userId_user_id_fk": {
+ "name": "session_userId_user_id_fk",
+ "tableFrom": "session",
+ "tableTo": "user",
+ "columnsFrom": [
+ "userId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "session_fingerprint_id_fingerprint_id_fk": {
+ "name": "session_fingerprint_id_fingerprint_id_fk",
+ "tableFrom": "session",
+ "tableTo": "fingerprint",
+ "columnsFrom": [
+ "fingerprint_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.subscription": {
+ "name": "subscription",
+ "schema": "",
+ "columns": {
+ "stripe_subscription_id": {
+ "name": "stripe_subscription_id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "stripe_customer_id": {
+ "name": "stripe_customer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "stripe_price_id": {
+ "name": "stripe_price_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "tier": {
+ "name": "tier",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "scheduled_tier": {
+ "name": "scheduled_tier",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "status": {
+ "name": "status",
+ "type": "subscription_status",
+ "typeSchema": "public",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'active'"
+ },
+ "billing_period_start": {
+ "name": "billing_period_start",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "billing_period_end": {
+ "name": "billing_period_end",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "cancel_at_period_end": {
+ "name": "cancel_at_period_end",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "canceled_at": {
+ "name": "canceled_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "idx_subscription_customer": {
+ "name": "idx_subscription_customer",
+ "columns": [
+ {
+ "expression": "stripe_customer_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_subscription_user": {
+ "name": "idx_subscription_user",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "idx_subscription_status": {
+ "name": "idx_subscription_status",
+ "columns": [
+ {
+ "expression": "status",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"subscription\".\"status\" = 'active'",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "subscription_user_id_user_id_fk": {
+ "name": "subscription_user_id_user_id_fk",
+ "tableFrom": "subscription",
+ "tableTo": "user",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.sync_failure": {
+ "name": "sync_failure",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "provider": {
+ "name": "provider",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "last_attempt_at": {
+ "name": "last_attempt_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "retry_count": {
+ "name": "retry_count",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 1
+ },
+ "last_error": {
+ "name": "last_error",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {
+ "idx_sync_failure_retry": {
+ "name": "idx_sync_failure_retry",
+ "columns": [
+ {
+ "expression": "retry_count",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "last_attempt_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "where": "\"sync_failure\".\"retry_count\" < 5",
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.user": {
+ "name": "user",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "password": {
+ "name": "password",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "emailVerified": {
+ "name": "emailVerified",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "image": {
+ "name": "image",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "stripe_customer_id": {
+ "name": "stripe_customer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "next_quota_reset": {
+ "name": "next_quota_reset",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "now() + INTERVAL '1 month'"
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "referral_code": {
+ "name": "referral_code",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "'ref-' || gen_random_uuid()"
+ },
+ "referral_limit": {
+ "name": "referral_limit",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 5
+ },
+ "discord_id": {
+ "name": "discord_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "handle": {
+ "name": "handle",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "auto_topup_enabled": {
+ "name": "auto_topup_enabled",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "auto_topup_threshold": {
+ "name": "auto_topup_threshold",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "auto_topup_amount": {
+ "name": "auto_topup_amount",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "banned": {
+ "name": "banned",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "fallback_to_a_la_carte": {
+ "name": "fallback_to_a_la_carte",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "user_email_unique": {
+ "name": "user_email_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "email"
+ ]
+ },
+ "user_stripe_customer_id_unique": {
+ "name": "user_stripe_customer_id_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "stripe_customer_id"
+ ]
+ },
+ "user_referral_code_unique": {
+ "name": "user_referral_code_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "referral_code"
+ ]
+ },
+ "user_discord_id_unique": {
+ "name": "user_discord_id_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "discord_id"
+ ]
+ },
+ "user_handle_unique": {
+ "name": "user_handle_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "handle"
+ ]
+ }
+ },
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ },
+ "public.verificationToken": {
+ "name": "verificationToken",
+ "schema": "",
+ "columns": {
+ "identifier": {
+ "name": "identifier",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "token": {
+ "name": "token",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "expires": {
+ "name": "expires",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {
+ "verificationToken_identifier_token_pk": {
+ "name": "verificationToken_identifier_token_pk",
+ "columns": [
+ "identifier",
+ "token"
+ ]
+ }
+ },
+ "uniqueConstraints": {},
+ "policies": {},
+ "checkConstraints": {},
+ "isRLSEnabled": false
+ }
+ },
+ "enums": {
+ "public.referral_status": {
+ "name": "referral_status",
+ "schema": "public",
+ "values": [
+ "pending",
+ "completed"
+ ]
+ },
+ "public.agent_run_status": {
+ "name": "agent_run_status",
+ "schema": "public",
+ "values": [
+ "running",
+ "completed",
+ "failed",
+ "cancelled"
+ ]
+ },
+ "public.agent_step_status": {
+ "name": "agent_step_status",
+ "schema": "public",
+ "values": [
+ "running",
+ "completed",
+ "skipped"
+ ]
+ },
+ "public.api_key_type": {
+ "name": "api_key_type",
+ "schema": "public",
+ "values": [
+ "anthropic",
+ "gemini",
+ "openai"
+ ]
+ },
+ "public.grant_type": {
+ "name": "grant_type",
+ "schema": "public",
+ "values": [
+ "free",
+ "referral",
+ "referral_legacy",
+ "subscription",
+ "purchase",
+ "admin",
+ "organization",
+ "ad"
+ ]
+ },
+ "public.org_role": {
+ "name": "org_role",
+ "schema": "public",
+ "values": [
+ "owner",
+ "admin",
+ "member"
+ ]
+ },
+ "public.session_type": {
+ "name": "session_type",
+ "schema": "public",
+ "values": [
+ "web",
+ "pat",
+ "cli"
+ ]
+ },
+ "public.subscription_status": {
+ "name": "subscription_status",
+ "schema": "public",
+ "values": [
+ "incomplete",
+ "incomplete_expired",
+ "trialing",
+ "active",
+ "past_due",
+ "canceled",
+ "unpaid",
+ "paused"
+ ]
+ }
+ },
+ "schemas": {},
+ "sequences": {},
+ "roles": {},
+ "policies": {},
+ "views": {},
+ "_meta": {
+ "columns": {},
+ "schemas": {},
+ "tables": {}
+ }
+}
\ No newline at end of file
diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json
index bce61005a2..8952549c98 100644
--- a/packages/internal/src/db/migrations/meta/_journal.json
+++ b/packages/internal/src/db/migrations/meta/_journal.json
@@ -295,6 +295,13 @@
"when": 1770334047429,
"tag": "0041_nappy_nebula",
"breakpoints": true
+ },
+ {
+ "idx": 42,
+ "version": "7",
+ "when": 1773878149145,
+ "tag": "0042_needy_jack_murdock",
+ "breakpoints": true
}
]
}
\ No newline at end of file
diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts
index 1fa381c5df..0033314f00 100644
--- a/packages/internal/src/db/schema.ts
+++ b/packages/internal/src/db/schema.ts
@@ -232,6 +232,7 @@ export const message = pgTable(
credits: integer('credits').notNull(),
byok: boolean('byok').notNull().default(false),
latency_ms: integer('latency_ms'),
+ ttft_ms: integer('ttft_ms'),
user_id: text('user_id').references(() => user.id, { onDelete: 'cascade' }),
org_id: text('org_id').references(() => org.id, { onDelete: 'cascade' }),
diff --git a/scripts/query-minimax-cache-stats.ts b/scripts/query-minimax-cache-stats.ts
new file mode 100644
index 0000000000..7c742c2ccc
--- /dev/null
+++ b/scripts/query-minimax-cache-stats.ts
@@ -0,0 +1,138 @@
+import { db } from '@codebuff/internal/db'
+import { sql } from 'drizzle-orm'
+
+async function queryMinimaxCacheStats() {
+ console.log('Querying minimax/minimax-m2.5 usage (last 19 hours)...\n')
+
+ // 1. Overall stats
+ const overallResult = await db.execute(sql`
+ SELECT
+ COUNT(*) AS total_requests,
+ ROUND(AVG(input_tokens)) AS avg_input_tokens,
+ ROUND(AVG(output_tokens)) AS avg_output_tokens,
+ ROUND(
+ CASE
+ WHEN SUM(input_tokens) > 0
+ THEN SUM(cache_read_input_tokens)::numeric / SUM(input_tokens) * 100
+ ELSE 0
+ END, 1
+ ) AS overall_cache_rate_pct,
+ COUNT(DISTINCT client_id) AS unique_clients
+ FROM message
+ WHERE finished_at >= NOW() - INTERVAL '19 hours'
+ AND model = 'minimax/minimax-m2.5'
+ `)
+
+ const overall = overallResult[0]
+ if (!overall || Number(overall.total_requests) === 0) {
+ console.log('No data found for minimax/minimax-m2.5 in the last 19 hours.')
+ return
+ }
+
+ console.log('Overall Stats')
+ console.log('═══════════════════════════════════════════')
+ console.log(`Total requests: ${overall.total_requests}`)
+ console.log(`Unique clients: ${overall.unique_clients}`)
+ console.log(`Avg input tokens: ${overall.avg_input_tokens}`)
+ console.log(`Avg output tokens: ${overall.avg_output_tokens}`)
+ console.log(`Overall cache rate: ${overall.overall_cache_rate_pct}%`)
+
+ // 2. Per-client stats, ordered by lowest cache rate
+ const clientResult = await db.execute(sql`
+ SELECT
+ client_id,
+ COUNT(*) AS request_count,
+ MIN(finished_at) AS first_seen,
+ MAX(finished_at) AS last_seen,
+ ROUND(AVG(input_tokens)) AS avg_input,
+ ROUND(
+ CASE
+ WHEN SUM(input_tokens) > 0
+ THEN SUM(cache_read_input_tokens)::numeric / SUM(input_tokens) * 100
+ ELSE 0
+ END, 1
+ ) AS cache_rate_pct,
+ SUM(cache_read_input_tokens) AS total_cache_read,
+ SUM(input_tokens) AS total_input
+ FROM message
+ WHERE finished_at >= NOW() - INTERVAL '19 hours'
+ AND model = 'minimax/minimax-m2.5'
+ AND client_id IS NOT NULL
+ GROUP BY client_id
+ ORDER BY cache_rate_pct ASC, request_count DESC
+ `)
+
+ console.log('\n\nPer-Client Cache Rates (lowest first)')
+ console.log('═══════════════════════════════════════════')
+
+ if (clientResult.length === 0) {
+ console.log('No client-level data found.')
+ return
+ }
+
+ for (const row of clientResult) {
+ const clientId = String(row.client_id).slice(0, 12)
+ const reqs = String(row.request_count).padStart(4)
+ const cacheRate = String(row.cache_rate_pct).padStart(6)
+ const avgInput = String(row.avg_input).padStart(8)
+ const firstSeen = row.first_seen
+ ? new Date(String(row.first_seen)).toISOString().slice(0, 16)
+ : 'N/A'
+ const lastSeen = row.last_seen
+ ? new Date(String(row.last_seen)).toISOString().slice(0, 16)
+ : 'N/A'
+ console.log(
+ ` ${clientId}… reqs: ${reqs} cache: ${cacheRate}% avg_input: ${avgInput} range: ${firstSeen} → ${lastSeen}`,
+ )
+ }
+
+ // 3. Recent requests in time order
+ const recentResult = await db.execute(sql`
+ SELECT
+ client_id,
+ finished_at,
+ input_tokens,
+ cache_read_input_tokens,
+ COALESCE(cache_creation_input_tokens, 0) AS cache_creation_input_tokens,
+ output_tokens,
+ ROUND(
+ CASE
+ WHEN input_tokens > 0
+ THEN cache_read_input_tokens::numeric / input_tokens * 100
+ ELSE 0
+ END, 1
+ ) AS cache_rate_pct
+ FROM message
+ WHERE finished_at >= NOW() - INTERVAL '19 hours'
+ AND model = 'minimax/minimax-m2.5'
+ ORDER BY client_id, finished_at DESC
+ LIMIT 100
+ `)
+
+ console.log('\n\nRecent Requests (newest first, last 100)')
+ console.log('═══════════════════════════════════════════')
+
+ for (const row of recentResult) {
+ const clientId = row.client_id
+ ? String(row.client_id).slice(0, 12)
+ : 'unknown '
+ const time = row.finished_at
+ ? new Date(String(row.finished_at)).toISOString().slice(0, 19)
+ : 'N/A'
+ const cacheRate = String(row.cache_rate_pct).padStart(6)
+ const input = String(row.input_tokens).padStart(7)
+ const cached = String(row.cache_read_input_tokens).padStart(7)
+ const creation = String(row.cache_creation_input_tokens).padStart(7)
+ const output = String(row.output_tokens).padStart(6)
+ console.log(
+ ` ${time} ${clientId}… cache: ${cacheRate}% input: ${input} cached: ${cached} creation: ${creation} output: ${output}`,
+ )
+ }
+}
+
+queryMinimaxCacheStats()
+ .then(() => process.exit(0))
+ .catch((err) => {
+ console.error(err)
+ process.exit(1)
+ })
diff --git a/scripts/query-usage-stats.ts b/scripts/query-usage-stats.ts
index 371701902d..15a35703b8 100644
--- a/scripts/query-usage-stats.ts
+++ b/scripts/query-usage-stats.ts
@@ -22,14 +22,13 @@ async function queryUsageStats() {
token_stats AS (
SELECT
- ROUND(AVG(input_tokens + cache_read_input_tokens + cache_creation_input_tokens))
+ ROUND(AVG(input_tokens))
AS avg_total_input_tokens,
ROUND(
AVG(
CASE
- WHEN (input_tokens + cache_read_input_tokens + cache_creation_input_tokens) > 0
- THEN cache_read_input_tokens::numeric
- / (input_tokens + cache_read_input_tokens + cache_creation_input_tokens)
+ WHEN input_tokens > 0
+ THEN cache_read_input_tokens::numeric / input_tokens
ELSE 0
END
) * 100, 1
@@ -42,7 +41,9 @@ async function queryUsageStats() {
client_stats AS (
SELECT
- ROUND(AVG(cnt)) AS avg_requests_per_client
+ ROUND(AVG(cnt)) AS avg_requests_per_client,
+ PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cnt) AS median_requests_per_client,
+ MAX(cnt) AS max_requests_per_client
FROM (
SELECT client_id, COUNT(*) AS cnt
FROM recent
@@ -70,6 +71,8 @@ async function queryUsageStats() {
t.avg_cache_rate_pct,
t.avg_output_tokens,
c.avg_requests_per_client,
+ c.median_requests_per_client,
+ c.max_requests_per_client,
r.median_rps,
r.peak_rps,
t.total_requests
@@ -90,6 +93,8 @@ async function queryUsageStats() {
console.log(`Median RPS: ${row.median_rps}`)
console.log(`Peak RPS: ${row.peak_rps}`)
console.log(`Avg requests/client: ${row.avg_requests_per_client}`)
+ console.log(`Median requests/client: ${row.median_requests_per_client}`)
+ console.log(`Max requests/client: ${row.max_requests_per_client}`)
console.log(`Total requests (7d): ${row.total_requests}`)
}
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
index f28eb55a6e..9fb5ebc8bd 100644
--- a/scripts/test-fireworks-long.ts
+++ b/scripts/test-fireworks-long.ts
@@ -13,7 +13,7 @@
export { }
const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
-// const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/qne3jo8v'
+// const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9'
const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
// Pricing constants — https://fireworks.ai/pricing
@@ -23,6 +23,9 @@ const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
const MAX_TOKENS = 100
+// Stable session ID so all turns route to the same machine for prompt caching
+const SESSION_ID = `bench-${Math.random().toString(36).slice(2, 10)}`
+
function computeCost(usage: Record): { cost: number; breakdown: string } {
const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0
const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0
@@ -175,6 +178,7 @@ async function makeConversationStreamRequest(
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
+ 'x-session-affinity': SESSION_ID,
},
body: JSON.stringify({
model: FIREWORKS_MODEL,
@@ -220,16 +224,13 @@ async function makeConversationStreamRequest(
const chunk = JSON.parse(raw)
chunkCount++
const delta = chunk.choices?.[0]?.delta
+ if (delta && firstContentChunkTime === undefined) {
+ firstContentChunkTime = Date.now()
+ ttftMs = firstContentChunkTime - startTime
+ }
if (delta?.content) {
- if (firstContentChunkTime === undefined) {
- firstContentChunkTime = Date.now()
- ttftMs = firstContentChunkTime - startTime
- }
streamContent += delta.content
}
- if (delta?.reasoning_content) {
- // Skip reasoning content for this test
- }
if (chunk.usage) streamUsage = chunk.usage
} catch {
// skip non-JSON lines
@@ -242,12 +243,9 @@ async function makeConversationStreamRequest(
? streamUsage.completion_tokens
: 0
- const generationTimeMs = firstContentChunkTime !== undefined
- ? Date.now() - firstContentChunkTime
- : elapsedMs
- const outputTokensPerSec = generationTimeMs > 0
- ? (outputTokens / (generationTimeMs / 1000))
- : 0
+ const outputTokensPerSec = firstContentChunkTime !== undefined
+ ? (outputTokens / ((Date.now() - firstContentChunkTime) / 1000))
+ : undefined
// Print compact per-turn stats
const inputTokens = streamUsage && typeof streamUsage.prompt_tokens === 'number' ? streamUsage.prompt_tokens : 0
@@ -256,7 +254,7 @@ async function makeConversationStreamRequest(
const cacheRate = inputTokens > 0 ? ((cachedTokens / inputTokens) * 100).toFixed(1) : '0.0'
const cost = streamUsage ? `$${computeCost(streamUsage).cost.toFixed(6)}` : 'err'
- console.log(` ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec.toFixed(1)} tok/s | ${cost}`)
+ console.log(` ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec !== undefined ? outputTokensPerSec.toFixed(1) + ' tok/s' : 'n/a'} | ${cost}`)
console.log(` Response: ${streamContent.slice(0, 150)}${streamContent.length > 150 ? '...' : ''}`)
console.log()
@@ -277,6 +275,7 @@ async function main() {
console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`)
console.log(`Turns: ${TURN_PROMPTS.length}`)
console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`)
+ console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`)
console.log('='.repeat(60))
console.log()
diff --git a/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts b/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts
new file mode 100644
index 0000000000..0d9802b58b
--- /dev/null
+++ b/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts
@@ -0,0 +1,317 @@
+import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test'
+
+import {
+ checkFreeModeRateLimit,
+ FREE_MODE_RATE_LIMITS,
+ resetFreeModeRateLimits,
+} from '../free-mode-rate-limiter'
+
+const SECOND_MS = 1000
+const MINUTE_MS = 60 * SECOND_MS
+const HOUR_MS = 60 * MINUTE_MS
+
+describe('free-mode-rate-limiter', () => {
+ let nowSpy: ReturnType
+ let fakeNow: number
+
+ beforeEach(() => {
+ resetFreeModeRateLimits()
+ fakeNow = 1_000_000_000_000
+ nowSpy = spyOn(Date, 'now').mockImplementation(() => fakeNow)
+ })
+
+ afterEach(() => {
+ nowSpy.mockRestore()
+ })
+
+ function advanceTime(ms: number) {
+ fakeNow += ms
+ }
+
+ function makeRequests(userId: string, count: number) {
+ for (let i = 0; i < count; i++) {
+ if (i > 0) {
+ advanceTime(1 * SECOND_MS + 1)
+ }
+ const result = checkFreeModeRateLimit(userId)
+ if (result.limited) {
+ throw new Error(`Unexpectedly rate limited on request ${i + 1}`)
+ }
+ }
+ }
+
+ describe('checkFreeModeRateLimit', () => {
+ it('allows the first request', () => {
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(false)
+ })
+
+ it('limits when per-second limit is exceeded', () => {
+ // Make all requests within the same second (no time advancement)
+ for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+ expect(checkFreeModeRateLimit('user-1').limited).toBe(false)
+ }
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(true)
+ if (result.limited) {
+ expect(result.windowName).toBe('1 second')
+ }
+ })
+
+ it('resets per-second window after expiry', () => {
+ for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+ checkFreeModeRateLimit('user-1')
+ }
+ expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
+
+ advanceTime(1 * SECOND_MS + 1)
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(false)
+ })
+
+ it('allows requests up to the per-minute limit', () => {
+ for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_MINUTE; i++) {
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(false)
+ if (i < FREE_MODE_RATE_LIMITS.PER_MINUTE - 1) {
+ advanceTime(1 * SECOND_MS + 1)
+ }
+ }
+ })
+
+ it('limits when per-minute limit is exceeded', () => {
+ makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+ // Advance past the 1-second window so the per-minute window is the one that triggers
+ advanceTime(1 * SECOND_MS + 1)
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(true)
+ if (result.limited) {
+ expect(result.windowName).toBe('1 minute')
+ }
+ })
+
+ it('limits when per-30-minute limit is exceeded', () => {
+ const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
+ const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
+
+ // Spread requests across multiple 1-minute windows to avoid hitting the per-minute limit
+ let sent = 0
+ while (sent < per30Min) {
+ const batch = Math.min(perMinute, per30Min - sent)
+ makeRequests('user-1', batch)
+ sent += batch
+ if (sent < per30Min) {
+ // Advance past the 1-minute window so it resets
+ advanceTime(1 * MINUTE_MS + 1)
+ }
+ }
+
+ // Advance past the 1-minute window so the per-30-minute window is the one that triggers
+ advanceTime(1 * MINUTE_MS + 1)
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(true)
+ if (result.limited) {
+ expect(result.windowName).toBe('30 minutes')
+ }
+ })
+
+ it('limits when per-5-hour limit is exceeded', () => {
+ const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
+ const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
+ const per5Hours = FREE_MODE_RATE_LIMITS.PER_5_HOURS
+
+ // Spread requests across multiple 30-minute windows
+ let sent = 0
+ while (sent < per5Hours) {
+ const batchFor30Min = Math.min(per30Min, per5Hours - sent)
+ // Within each 30-min window, spread across 1-min windows
+ let sentInWindow = 0
+ while (sentInWindow < batchFor30Min) {
+ const batch = Math.min(perMinute, batchFor30Min - sentInWindow)
+ makeRequests('user-1', batch)
+ sentInWindow += batch
+ if (sentInWindow < batchFor30Min) {
+ advanceTime(1 * MINUTE_MS + 1)
+ }
+ }
+ sent += sentInWindow
+ // Always advance past 30-min window to reset it for the next batch
+ // (stays well within the 5-hour window)
+ advanceTime(30 * MINUTE_MS + 1)
+ }
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(true)
+ if (result.limited) {
+ expect(result.windowName).toBe('5 hours')
+ }
+ })
+
+ it('limits when per-7-day limit is exceeded', () => {
+ const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
+ const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
+ const per5Hours = FREE_MODE_RATE_LIMITS.PER_5_HOURS
+ const per7Days = FREE_MODE_RATE_LIMITS.PER_7_DAYS
+
+ // Spread requests across multiple 5-hour windows
+ let sent = 0
+ while (sent < per7Days) {
+ const batchFor5Hours = Math.min(per5Hours, per7Days - sent)
+ let sentIn5Hr = 0
+ while (sentIn5Hr < batchFor5Hours) {
+ const batchFor30Min = Math.min(per30Min, batchFor5Hours - sentIn5Hr)
+ let sentIn30Min = 0
+ while (sentIn30Min < batchFor30Min) {
+ const batch = Math.min(perMinute, batchFor30Min - sentIn30Min)
+ makeRequests('user-1', batch)
+ sentIn30Min += batch
+ if (sentIn30Min < batchFor30Min) {
+ advanceTime(1 * MINUTE_MS + 1)
+ }
+ }
+ sentIn5Hr += sentIn30Min
+ advanceTime(30 * MINUTE_MS + 1)
+ }
+ sent += sentIn5Hr
+ // Advance past the 5-hour window (stays within 7-day window)
+ advanceTime(5 * HOUR_MS + 1)
+ }
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(true)
+ if (result.limited) {
+ expect(result.windowName).toBe('7 days')
+ }
+ })
+
+ it('does not increment counters when rate limited', () => {
+ makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+ // Advance past the 1-second window so the per-minute window blocks
+ advanceTime(1 * SECOND_MS + 1)
+
+ // These should all be rejected without changing state
+ for (let i = 0; i < 5; i++) {
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(true)
+ }
+
+ // After the 1-minute window expires, the user should only have used PER_MINUTE requests
+ // against the 30-minute window, not PER_MINUTE + 5
+ advanceTime(1 * MINUTE_MS + 1)
+
+ // Should be allowed again (1-min window reset)
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(false)
+ })
+
+ it('returns correct retryAfterMs for the violated window', () => {
+ makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+ // makeRequests advanced time by (PER_MINUTE - 1) * (SECOND_MS + 1)
+ const elapsedInMakeRequests = (FREE_MODE_RATE_LIMITS.PER_MINUTE - 1) * (1 * SECOND_MS + 1)
+
+ // Advance past the 1-second window, then a bit more
+ const additionalAdvance = 2 * SECOND_MS
+ advanceTime(additionalAdvance)
+
+ const totalElapsed = elapsedInMakeRequests + additionalAdvance
+ const expectedRetryAfterMs = 1 * MINUTE_MS - totalElapsed
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(true)
+ if (result.limited) {
+ expect(result.windowName).toBe('1 minute')
+ expect(result.retryAfterMs).toBe(expectedRetryAfterMs)
+ }
+ })
+
+ it('resets per-minute window after expiry', () => {
+ makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+ advanceTime(1 * SECOND_MS + 1)
+
+ const limited = checkFreeModeRateLimit('user-1')
+ expect(limited.limited).toBe(true)
+
+ // Advance past the 1-minute window
+ advanceTime(1 * MINUTE_MS + 1)
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(false)
+ })
+
+ it('isolates different users', () => {
+ makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
+ advanceTime(1 * SECOND_MS + 1)
+
+ // user-1 is rate limited
+ expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
+
+ // user-2 should not be affected
+ const result = checkFreeModeRateLimit('user-2')
+ expect(result.limited).toBe(false)
+ })
+
+ it('retryAfterMs is never negative', () => {
+ for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+ checkFreeModeRateLimit('user-1')
+ }
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(true)
+ if (result.limited) {
+ expect(result.retryAfterMs).toBeGreaterThanOrEqual(0)
+ }
+ })
+
+ it('tracks counts across all windows simultaneously', () => {
+ // Make some requests
+ makeRequests('user-1', 5)
+
+ // Advance past 1-minute window but within 30-minute window
+ advanceTime(1 * MINUTE_MS + 1)
+
+ // Make more requests — 1-min counter resets, but 30-min counter keeps accumulating
+ makeRequests('user-1', 5)
+
+ // Advance past 1-minute again
+ advanceTime(1 * MINUTE_MS + 1)
+
+ // The 30-min window should now have 10 requests counted
+ // and the 1-min window should be fresh
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(false)
+ })
+ })
+
+ describe('resetFreeModeRateLimits', () => {
+ it('clears all rate limit state', () => {
+ for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+ checkFreeModeRateLimit('user-1')
+ }
+ expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
+
+ resetFreeModeRateLimits()
+
+ const result = checkFreeModeRateLimit('user-1')
+ expect(result.limited).toBe(false)
+ })
+
+ it('clears state for all users', () => {
+ for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+ checkFreeModeRateLimit('user-1')
+ checkFreeModeRateLimit('user-2')
+ }
+
+ expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
+ expect(checkFreeModeRateLimit('user-2').limited).toBe(true)
+
+ resetFreeModeRateLimits()
+
+ expect(checkFreeModeRateLimit('user-1').limited).toBe(false)
+ expect(checkFreeModeRateLimit('user-2').limited).toBe(false)
+ })
+ })
+})
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index bf36ae417f..8553aa69e3 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -65,6 +65,8 @@ import {
OpenRouterError,
} from '@/llm-api/openrouter'
import { extractApiKeyFromHeader } from '@/util/auth'
+import { withDefaultProperties } from '@codebuff/common/analytics'
+import { checkFreeModeRateLimit } from './free-mode-rate-limiter'
const FREE_MODE_ALLOWED_COUNTRIES = new Set([
'US', 'CA',
@@ -80,7 +82,13 @@ function extractClientIp(req: NextRequest): string | undefined {
return req.headers.get('x-real-ip') ?? undefined
}
-function getCountryFromIp(clientIp: string | undefined): string | null {
+function getCountryCode(req: NextRequest): string | null {
+ const cfCountry = req.headers.get('cf-ipcountry')
+ if (cfCountry && cfCountry !== 'XX' && cfCountry !== 'T1') {
+ return cfCountry.toUpperCase()
+ }
+
+ const clientIp = extractClientIp(req)
if (!clientIp) {
return null
}
@@ -141,7 +149,6 @@ export async function postChatCompletions(params: {
req,
getUserInfoFromApiKey,
loggerWithContext,
- trackEvent,
getUserUsageData,
getAgentRunFromId,
fetch,
@@ -150,6 +157,7 @@ export async function postChatCompletions(params: {
getUserPreferences,
} = params
let { logger } = params
+ let { trackEvent } = params
try {
// Parse request body
@@ -175,6 +183,12 @@ export async function postChatCompletions(params: {
const bodyStream = typedBody.stream ?? false
const runId = typedBody.codebuff_metadata?.run_id
+ // Check if the request is in FREE mode (costs 0 credits for allowed agent+model combos)
+ const costMode = typedBody.codebuff_metadata?.cost_mode
+ const isFreeModeRequest = isFreeMode(costMode)
+
+ trackEvent = withDefaultProperties(trackEvent, { freebuff: isFreeModeRequest })
+
// Extract and validate API key
const apiKey = extractApiKeyFromHeader(req)
if (!apiKey) {
@@ -242,14 +256,17 @@ export async function postChatCompletions(params: {
logger,
})
- // Check if the request is in FREE mode (costs 0 credits for allowed agent+model combos)
- const costMode = typedBody.codebuff_metadata?.cost_mode
- const isFreeModeRequest = isFreeMode(costMode)
-
// For free mode requests, check if user is in US or Canada
if (isFreeModeRequest) {
+ const countryCode = getCountryCode(req)
const clientIp = extractClientIp(req)
- const countryCode = getCountryFromIp(clientIp)
+
+ const cfHeader = req.headers.get('cf-ipcountry')
+ const geoipResult = clientIp ? geoip.lookup(clientIp)?.country ?? null : null
+ logger.info(
+ { cfHeader, geoipResult, resolvedCountry: countryCode, clientIp: clientIp ? '[redacted]' : undefined },
+ 'Free mode country detection',
+ )
// If we couldn't determine country (null), allow the request (fail open)
// This handles users behind VPNs, corporate proxies, or localhost
@@ -273,6 +290,7 @@ export async function postChatCompletions(params: {
{ status: 403 },
)
}
+
}
// Extract and validate agent run ID
@@ -333,6 +351,38 @@ export async function postChatCompletions(params: {
)
}
+ // Rate limit free mode requests (after validation so invalid requests don't consume quota)
+ if (isFreeModeRequest) {
+ const rateLimitResult = checkFreeModeRateLimit(userId)
+ if (rateLimitResult.limited) {
+ const retryAfterSeconds = Math.ceil(rateLimitResult.retryAfterMs / 1000)
+ const resetTime = new Date(Date.now() + rateLimitResult.retryAfterMs).toISOString()
+ const resetCountdown = formatQuotaResetCountdown(resetTime)
+
+ trackEvent({
+ event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR,
+ userId,
+ properties: {
+ error: 'free_mode_rate_limited',
+ windowName: rateLimitResult.windowName,
+ retryAfterSeconds,
+ },
+ logger,
+ })
+
+ return NextResponse.json(
+ {
+ error: 'free_mode_rate_limited',
+ message: `Free mode rate limit exceeded (${rateLimitResult.windowName} limit). Try again ${resetCountdown}.`,
+ },
+ {
+ status: 429,
+ headers: { 'Retry-After': String(retryAfterSeconds) },
+ },
+ )
+ }
+ }
+
// For subscribers, ensure a block grant exists before processing the request.
// This is done AFTER validation so malformed requests don't start a new 5-hour block.
// When the function is provided, always include subscription credits in the balance:
diff --git a/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts b/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts
new file mode 100644
index 0000000000..b299291cd4
--- /dev/null
+++ b/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts
@@ -0,0 +1,167 @@
+/**
+ * In-memory rate limiter for FREE mode requests.
+ *
+ * Enforces multiple fixed-window limits per user to prevent abuse.
+ * Each window is anchored to the user's first request in that window
+ * and resets once the window duration elapses.
+ *
+ * Adjust the constants below to tune the limits.
+ */
+
+// ---------------------------------------------------------------------------
+// Configurable rate-limit constants
+// ---------------------------------------------------------------------------
+
+export const FREE_MODE_RATE_LIMITS = {
+ /** Max requests per 1-second window */
+ PER_SECOND: 2,
+ /** Max requests per 1-minute window */
+ PER_MINUTE: 20,
+ /** Max requests per 30-minute window */
+ PER_30_MINUTES: 200,
+ /** Max requests per 5-hour window */
+ PER_5_HOURS: 1_000,
+ /** Max requests per 7-day window */
+ PER_7_DAYS: 10_000,
+} as const
+
+// ---------------------------------------------------------------------------
+// Internal types
+// ---------------------------------------------------------------------------
+
+interface RateWindow {
+ name: string
+ windowMs: number
+ maxRequests: number
+}
+
+interface WindowTracker {
+ count: number
+ windowStart: number
+}
+
+export type RateLimitResult = {
+ limited: false
+} | {
+ limited: true
+ windowName: string
+ retryAfterMs: number
+}
+
+// ---------------------------------------------------------------------------
+// Window definitions (derived from the constants above)
+// ---------------------------------------------------------------------------
+
+const SECOND_MS = 1000
+const MINUTE_MS = 60 * SECOND_MS
+const HOUR_MS = 60 * MINUTE_MS
+const DAY_MS = 24 * HOUR_MS
+
+const RATE_WINDOWS: RateWindow[] = [
+ { name: '1 second', windowMs: 1 * SECOND_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_SECOND },
+ { name: '1 minute', windowMs: 1 * MINUTE_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_MINUTE },
+ { name: '30 minutes', windowMs: 30 * MINUTE_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_30_MINUTES },
+ { name: '5 hours', windowMs: 5 * HOUR_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_5_HOURS },
+ { name: '7 days', windowMs: 7 * DAY_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_7_DAYS },
+]
+
+// ---------------------------------------------------------------------------
+// In-memory state
+// ---------------------------------------------------------------------------
+
+// userId -> (windowName -> tracker)
+const userWindows = new Map>()
+
+let lastCleanupTime = 0
+const CLEANUP_INTERVAL_MS = 5 * MINUTE_MS
+
+// ---------------------------------------------------------------------------
+// Cleanup
+// ---------------------------------------------------------------------------
+
+function cleanupExpiredEntries(): void {
+ const now = Date.now()
+ for (const [userId, windows] of userWindows) {
+ for (const [windowName, tracker] of windows) {
+ const matchingWindow = RATE_WINDOWS.find((w) => w.name === windowName)
+ if (!matchingWindow) {
+ windows.delete(windowName)
+ continue
+ }
+ if (now - tracker.windowStart >= matchingWindow.windowMs) {
+ windows.delete(windowName)
+ }
+ }
+ if (windows.size === 0) {
+ userWindows.delete(userId)
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Check whether a free-mode request from `userId` should be rate-limited.
+ *
+ * If the request is allowed, each window's counter is incremented.
+ * If any window is exceeded, the request is rejected and no counters change.
+ */
+export function checkFreeModeRateLimit(userId: string): RateLimitResult {
+ const now = Date.now()
+
+ // Periodic cleanup to prevent memory leaks
+ if (now - lastCleanupTime > CLEANUP_INTERVAL_MS) {
+ cleanupExpiredEntries()
+ lastCleanupTime = now
+ }
+
+ let windows = userWindows.get(userId)
+ if (!windows) {
+ windows = new Map()
+ userWindows.set(userId, windows)
+ }
+
+ // First pass: check all windows without mutating
+ for (const rateWindow of RATE_WINDOWS) {
+ let tracker = windows.get(rateWindow.name)
+
+ // Reset the window if it has expired
+ if (tracker && now - tracker.windowStart >= rateWindow.windowMs) {
+ windows.delete(rateWindow.name)
+ tracker = undefined
+ }
+
+ const currentCount = tracker?.count ?? 0
+ if (currentCount >= rateWindow.maxRequests) {
+ const windowStart = tracker!.windowStart
+ const retryAfterMs = rateWindow.windowMs - (now - windowStart)
+ return {
+ limited: true,
+ windowName: rateWindow.name,
+ retryAfterMs: Math.max(0, retryAfterMs),
+ }
+ }
+ }
+
+ // Second pass: increment all window counters (request is allowed)
+ for (const rateWindow of RATE_WINDOWS) {
+ let tracker = windows.get(rateWindow.name)
+ if (!tracker) {
+ tracker = { count: 0, windowStart: now }
+ windows.set(rateWindow.name, tracker)
+ }
+ tracker.count++
+ }
+
+ return { limited: false }
+}
+
+/**
+ * Reset all rate-limit state. Exposed for testing.
+ */
+export function resetFreeModeRateLimits(): void {
+ userWindows.clear()
+ lastCleanupTime = 0
+}
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index df8f356d17..2108d408a2 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -13,7 +13,7 @@ import {
import type { Logger } from '@codebuff/common/types/contracts/logger'
const STANDARD_MODEL_ID = 'accounts/fireworks/models/minimax-m2p5'
-const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/qne3jo8v'
+const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/lnfid5h9'
function createMockLogger(): Logger {
return {
diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 8582645944..52fe1885c3 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -39,7 +39,7 @@ function getCanopyWaveModelId(openrouterModel: string): string {
return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel
}
-type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean }
+type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
type LineResult = {
state: StreamState
@@ -170,6 +170,7 @@ export async function handleCanopyWaveNonStream({
byok: false,
logger,
costMode,
+ ttftMs: null, // Non-stream - no TTFT to report
})
// Overwrite cost so SDK calculates exact credits we charged
@@ -218,7 +219,7 @@ export async function handleCanopyWaveStream({
}
let heartbeatInterval: NodeJS.Timeout
- let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false }
+ let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null, billedAlready: false }
let clientDisconnected = false
const stream = new ReadableStream({
@@ -439,7 +440,7 @@ async function handleResponse({
logger: Logger
insertMessage: InsertMessageBigqueryFn
}): Promise<{ state: StreamState; billedCredits?: number }> {
- state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
+ state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel })
// Some providers send cumulative usage on EVERY chunk (not just the final one),
// so we must only bill once on the final chunk to avoid charging N times.
@@ -486,6 +487,7 @@ async function handleResponse({
byok: false,
logger,
costMode,
+ ttftMs: state.ttftMs,
})
return { state, billedCredits }
@@ -494,6 +496,7 @@ async function handleResponse({
function handleStreamChunk({
data,
state,
+ startTime,
logger,
userId,
agentId,
@@ -501,6 +504,7 @@ function handleStreamChunk({
}: {
data: Record
state: StreamState
+ startTime: Date
logger: Logger
userId: string
agentId: string
@@ -544,6 +548,13 @@ function handleStreamChunk({
const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
: typeof delta?.reasoning === 'string' ? delta.reasoning
: ''
+
+ // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+ const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0
+ if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) {
+ state.ttftMs = Date.now() - startTime.getTime()
+ }
+
if (state.reasoningText.length < MAX_BUFFER_SIZE) {
state.reasoningText += reasoningDelta
if (state.reasoningText.length >= MAX_BUFFER_SIZE) {
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 2b28937415..fccfd7892e 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -36,7 +36,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = false
/** Custom deployment IDs for models with dedicated Fireworks deployments */
const FIREWORKS_DEPLOYMENT_MAP: Record = {
- 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/qne3jo8v',
+ 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
}
/** Check if current time is within deployment hours (10am–8pm ET) */
@@ -79,7 +79,7 @@ function getFireworksModelId(openrouterModel: string): string {
return FIREWORKS_MODEL_MAP[openrouterModel] ?? openrouterModel
}
-type StreamState = { responseText: string; reasoningText: string }
+type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null }
type LineResult = {
state: StreamState
@@ -210,6 +210,7 @@ export async function handleFireworksNonStream({
byok: false,
logger,
costMode,
+ ttftMs: null, // Non-stream - no TTFT to report
})
// Overwrite cost so SDK calculates exact credits we charged
@@ -258,7 +259,7 @@ export async function handleFireworksStream({
}
let heartbeatInterval: NodeJS.Timeout
- let state: StreamState = { responseText: '', reasoningText: '' }
+ let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null }
let clientDisconnected = false
const stream = new ReadableStream({
@@ -473,7 +474,7 @@ async function handleResponse({
logger: Logger
insertMessage: InsertMessageBigqueryFn
}): Promise<{ state: StreamState; billedCredits?: number }> {
- state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
+ state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel })
if ('error' in data || !data.usage) {
return { state }
@@ -511,6 +512,7 @@ async function handleResponse({
byok: false,
logger,
costMode,
+ ttftMs: state.ttftMs,
})
return { state, billedCredits }
@@ -519,6 +521,7 @@ async function handleResponse({
function handleStreamChunk({
data,
state,
+ startTime,
logger,
userId,
agentId,
@@ -526,6 +529,7 @@ function handleStreamChunk({
}: {
data: Record
state: StreamState
+ startTime: Date
logger: Logger
userId: string
agentId: string
@@ -569,6 +573,13 @@ function handleStreamChunk({
const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
: typeof delta?.reasoning === 'string' ? delta.reasoning
: ''
+
+ // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+ const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0
+ if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) {
+ state.ttftMs = Date.now() - startTime.getTime()
+ }
+
if (state.reasoningText.length < MAX_BUFFER_SIZE) {
state.reasoningText += reasoningDelta
if (state.reasoningText.length >= MAX_BUFFER_SIZE) {
diff --git a/web/src/llm-api/helpers.ts b/web/src/llm-api/helpers.ts
index 1ba912cf57..14e578fa9b 100644
--- a/web/src/llm-api/helpers.ts
+++ b/web/src/llm-api/helpers.ts
@@ -114,6 +114,7 @@ export async function consumeCreditsForMessage(params: {
byok: boolean
logger: Logger
costMode?: string
+ ttftMs?: number | null
}): Promise {
const {
messageId,
@@ -130,6 +131,7 @@ export async function consumeCreditsForMessage(params: {
byok,
logger,
costMode,
+ ttftMs,
} = params
// Calculate initial credits based on cost
@@ -172,6 +174,7 @@ export async function consumeCreditsForMessage(params: {
outputTokens: usageData.outputTokens,
byok,
logger,
+ ttftMs: ttftMs ?? null,
})
return credits
diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts
index 7ac2f1afeb..8f619e8357 100644
--- a/web/src/llm-api/openai.ts
+++ b/web/src/llm-api/openai.ts
@@ -304,6 +304,7 @@ export async function handleOpenAINonStream({
byok: false,
logger,
costMode,
+ ttftMs: null, // Non-stream - no TTFT to report
})
return {
@@ -359,6 +360,7 @@ export async function handleOpenAINonStream({
byok: false,
logger,
costMode,
+ ttftMs: null, // Non-stream - no TTFT to report
})
if (data.usage) {
@@ -424,6 +426,7 @@ export async function handleOpenAIStream({
let heartbeatInterval: NodeJS.Timeout
let responseText = ''
let reasoningText = ''
+ let ttftMs: number | null = null
let clientDisconnected = false
const MAX_BUFFER_SIZE = 1 * 1024 * 1024 // 1MB
@@ -477,6 +480,14 @@ export async function handleOpenAIStream({
const obj = JSON.parse(raw)
const delta = obj.choices?.[0]?.delta
+ // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+ const hasContentDelta = delta?.content && responseText.length === 0
+ const hasReasoningDelta = delta?.reasoning && reasoningText.length === 0
+ const hasToolCallsDelta = delta?.tool_calls && delta.tool_calls.length > 0
+ if (ttftMs === null && (hasContentDelta || hasReasoningDelta || hasToolCallsDelta)) {
+ ttftMs = Date.now() - startTime.getTime()
+ }
+
if (delta?.content && responseText.length < MAX_BUFFER_SIZE) {
responseText += delta.content
if (responseText.length >= MAX_BUFFER_SIZE) {
@@ -544,6 +555,7 @@ export async function handleOpenAIStream({
byok: false,
logger,
costMode,
+ ttftMs,
})
}
} catch {
@@ -631,6 +643,7 @@ export async function handleOpenAIStream({
byok: false,
logger,
costMode,
+ ttftMs,
})
}
} catch {
diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts
index c99200f1b0..08b7a31ef5 100644
--- a/web/src/llm-api/openrouter.ts
+++ b/web/src/llm-api/openrouter.ts
@@ -23,7 +23,7 @@ import type {
OpenRouterErrorMetadata,
} from './types'
-type StreamState = { responseText: string; reasoningText: string }
+type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null }
// Extended timeout for deep-thinking models (e.g., gpt-5) that can take
// a long time to start streaming.
@@ -186,6 +186,7 @@ export async function handleOpenRouterNonStream({
byok,
logger,
costMode,
+ ttftMs: null, // Non-stream - no TTFT to report
})
// Return the first response with aggregated data
@@ -257,6 +258,7 @@ export async function handleOpenRouterNonStream({
byok,
logger,
costMode,
+ ttftMs: null, // Non-stream - no TTFT to report
})
// Overwrite cost so SDK calculates exact credits we charged
@@ -313,7 +315,7 @@ export async function handleOpenRouterStream({
}
let heartbeatInterval: NodeJS.Timeout
- let state: StreamState = { responseText: '', reasoningText: '' }
+ let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null }
let clientDisconnected = false
// Create a ReadableStream that Next.js can handle
@@ -540,6 +542,7 @@ async function handleResponse({
state = await handleStreamChunk({
data,
state,
+ startTime,
logger,
userId,
agentId,
@@ -584,6 +587,7 @@ async function handleResponse({
byok,
logger,
costMode,
+ ttftMs: state.ttftMs,
})
return { state, billedCredits }
@@ -592,6 +596,7 @@ async function handleResponse({
async function handleStreamChunk({
data,
state,
+ startTime,
logger,
userId,
agentId,
@@ -599,6 +604,7 @@ async function handleStreamChunk({
}: {
data: OpenRouterStreamChatCompletionChunk
state: StreamState
+ startTime: Date
logger: Logger
userId: string
agentId: string
@@ -641,6 +647,14 @@ async function handleStreamChunk({
}
const choice = data.choices[0]
+ // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+ const hasContentDelta = choice?.delta?.content != null && choice?.delta?.content !== ''
+ const hasReasoningDelta = choice?.delta?.reasoning != null && choice?.delta?.reasoning !== ''
+ const hasToolCallsDelta = choice?.delta?.tool_calls != null && (choice?.delta?.tool_calls as unknown[])?.length > 0
+ if (state.ttftMs === null && (hasContentDelta || hasReasoningDelta || hasToolCallsDelta)) {
+ state.ttftMs = Date.now() - startTime.getTime()
+ }
+
// Append content and reasoning, but only up to the buffer limit.
const contentDelta = choice.delta?.content ?? ''
if (state.responseText.length < MAX_BUFFER_SIZE) {
diff --git a/web/src/llm-api/siliconflow.ts b/web/src/llm-api/siliconflow.ts
index 1146bbe3df..6398fe184f 100644
--- a/web/src/llm-api/siliconflow.ts
+++ b/web/src/llm-api/siliconflow.ts
@@ -39,7 +39,7 @@ function getSiliconFlowModelId(openrouterModel: string): string {
return SILICONFLOW_MODEL_MAP[openrouterModel] ?? openrouterModel
}
-type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean }
+type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
type LineResult = {
state: StreamState
@@ -171,6 +171,7 @@ export async function handleSiliconFlowNonStream({
byok: false,
logger,
costMode,
+ ttftMs: null, // Non-stream - no TTFT to report
})
// Overwrite cost so SDK calculates exact credits we charged
@@ -219,7 +220,7 @@ export async function handleSiliconFlowStream({
}
let heartbeatInterval: NodeJS.Timeout
- let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false }
+ let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null, billedAlready: false }
let clientDisconnected = false
const stream = new ReadableStream({
@@ -440,7 +441,7 @@ async function handleResponse({
logger: Logger
insertMessage: InsertMessageBigqueryFn
}): Promise<{ state: StreamState; billedCredits?: number }> {
- state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
+ state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel })
// Some providers send cumulative usage on EVERY chunk (not just the final one),
// so we must only bill once on the final chunk to avoid charging N times.
@@ -487,6 +488,7 @@ async function handleResponse({
byok: false,
logger,
costMode,
+ ttftMs: state.ttftMs,
})
return { state, billedCredits }
@@ -495,6 +497,7 @@ async function handleResponse({
function handleStreamChunk({
data,
state,
+ startTime,
logger,
userId,
agentId,
@@ -502,6 +505,7 @@ function handleStreamChunk({
}: {
data: Record
state: StreamState
+ startTime: Date
logger: Logger
userId: string
agentId: string
@@ -545,6 +549,13 @@ function handleStreamChunk({
const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
: typeof delta?.reasoning === 'string' ? delta.reasoning
: ''
+
+ // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
+ const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0
+ if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) {
+ state.ttftMs = Date.now() - startTime.getTime()
+ }
+
if (state.reasoningText.length < MAX_BUFFER_SIZE) {
state.reasoningText += reasoningDelta
if (state.reasoningText.length >= MAX_BUFFER_SIZE) {