diff --git a/agents/__tests__/context-pruner.test.ts b/agents/__tests__/context-pruner.test.ts index 45c61b4b9f..b691f33a9f 100644 --- a/agents/__tests__/context-pruner.test.ts +++ b/agents/__tests__/context-pruner.test.ts @@ -219,6 +219,7 @@ describe('context-pruner handleSteps', () => { messages: Message[], contextTokenCount?: number, maxContextLength?: number, + budgets?: { assistantToolBudget?: number; userBudget?: number }, ) => { mockAgentState.messageHistory = messages // If contextTokenCount not provided, estimate from messages @@ -233,7 +234,10 @@ describe('context-pruner handleSteps', () => { const generator = contextPruner.handleSteps!({ agentState: mockAgentState, logger: mockLogger, - params: maxContextLength ? { maxContextLength } : {}, + params: { + ...(maxContextLength ? { maxContextLength } : {}), + ...budgets, + }, }) const results: any[] = [] let result = generator.next() @@ -381,36 +385,6 @@ describe('context-pruner handleSteps', () => { expect(content).toContain('[USER] [with image(s)]') }) - test('truncates summary when it exceeds target size', () => { - // Create many messages to generate a large summary - const messages: Message[] = [] - for (let i = 0; i < 100; i++) { - messages.push( - createMessage( - 'user', - `User message number ${i} with some additional content to make it longer`, - ), - ) - messages.push( - createMessage( - 'assistant', - `Assistant response number ${i} with detailed explanation`, - ), - ) - } - - // Use a very small max context to force truncation - const results = runHandleSteps(messages, 500000, 5000) - const content = results[0].input.messages[0].content[0].text - - // Should contain truncation notice - expect(content).toContain('[CONVERSATION TRUNCATED') - - // Should still have the wrapper tags - expect(content).toContain('') - expect(content).toContain('') - }) - test('removes only INSTRUCTIONS_PROMPT and SUBAGENT_SPAWN when under context limit', () => { const messages: Message[] = [ createMessage('user', 'Hello'), @@ -700,6 +674,7 @@ describe('context-pruner long message truncation', () => { messages: Message[], contextTokenCount: number, maxContextLength: number, + budgets?: { assistantToolBudget?: number; userBudget?: number }, ) => { mockAgentState.messageHistory = messages mockAgentState.contextTokenCount = contextTokenCount @@ -712,7 +687,7 @@ describe('context-pruner long message truncation', () => { const generator = contextPruner.handleSteps!({ agentState: mockAgentState, logger: mockLogger, - params: { maxContextLength }, + params: { maxContextLength, ...budgets }, }) const results: any[] = [] let result = generator.next() @@ -726,8 +701,8 @@ describe('context-pruner long message truncation', () => { } test('truncates very long user messages with 80-20 ratio', () => { - // Create a message that exceeds 20k chars - const longText = 'A'.repeat(25000) + // Create a message that exceeds the user message token limit (~13k tokens = ~39k chars) + const longText = 'A'.repeat(45000) const messages = [ createMessage('user', longText), createMessage('assistant', 'Got it'), @@ -1118,6 +1093,7 @@ describe('context-pruner repeated compaction', () => { messages: Message[], contextTokenCount: number, maxContextLength: number, + budgets?: { assistantToolBudget?: number; userBudget?: number }, ) => { mockAgentState.messageHistory = messages mockAgentState.contextTokenCount = contextTokenCount @@ -1130,7 +1106,7 @@ describe('context-pruner repeated compaction', () => { const generator = contextPruner.handleSteps!({ agentState: mockAgentState, logger: mockLogger, - params: { maxContextLength }, + params: { maxContextLength, ...budgets }, }) const results: any[] = [] let result = generator.next() @@ -1208,6 +1184,135 @@ First assistant response expect(summaryTagCount).toBe(1) }) + test('drops old entries each cycle when budgets are tight', () => { + const simulateCompaction = ( + inputMessages: Message[], + budgets: { assistantToolBudget: number; userBudget: number }, + ): Message => { + const result = runHandleSteps(inputMessages, 250000, 200000, budgets) + return result[0].input.messages[0] + } + + const tightBudgets = { assistantToolBudget: 25, userBudget: 25 } + + // === CYCLE 1: 3 pairs of messages, tight budgets drop the oldest === + const cycle1Messages = [ + createMessage('user', 'Cycle1-Request-A'), + createMessage('assistant', 'Cycle1-Response-A'), + createMessage('user', 'Cycle1-Request-B'), + createMessage('assistant', 'Cycle1-Response-B'), + createMessage('user', 'Cycle1-Request-C'), + createMessage('assistant', 'Cycle1-Response-C'), + ] + const summary1 = simulateCompaction(cycle1Messages, tightBudgets) + const summary1Text = (summary1.content[0] as { type: 'text'; text: string }) + .text + + // Most recent entries should survive + expect(summary1Text).toContain('Cycle1-Request-C') + expect(summary1Text).toContain('Cycle1-Response-C') + // Oldest entries should be dropped + expect(summary1Text).not.toContain('Cycle1-Request-A') + expect(summary1Text).not.toContain('Cycle1-Response-A') + + // === CYCLE 2: Add new messages, compact again === + const cycle2Messages = [ + summary1, + createMessage('user', 'Cycle2-Request-D'), + createMessage('assistant', 'Cycle2-Response-D'), + ] + const summary2 = simulateCompaction(cycle2Messages, tightBudgets) + const summary2Text = (summary2.content[0] as { type: 'text'; text: string }) + .text + + // Newest entries from cycle 2 should survive + expect(summary2Text).toContain('Cycle2-Request-D') + expect(summary2Text).toContain('Cycle2-Response-D') + // Cycle 1's oldest survivors should now be dropped + expect(summary2Text).not.toContain('Cycle1-Request-A') + expect(summary2Text).not.toContain('Cycle1-Response-A') + + // === CYCLE 3: Add more, compact again === + const cycle3Messages = [ + summary2, + createMessage('user', 'Cycle3-Request-E'), + createMessage('assistant', 'Cycle3-Response-E'), + ] + const summary3 = simulateCompaction(cycle3Messages, tightBudgets) + const summary3Text = (summary3.content[0] as { type: 'text'; text: string }) + .text + + // Newest entries from cycle 3 should survive + expect(summary3Text).toContain('Cycle3-Request-E') + expect(summary3Text).toContain('Cycle3-Response-E') + // Very old entries should definitely be gone + expect(summary3Text).not.toContain('Cycle1-Request-A') + expect(summary3Text).not.toContain('Cycle1-Response-A') + + // Verify only one conversation_summary tag (no nesting) + const summaryTagCount = ( + summary3Text.match(//g) || [] + ).length + expect(summaryTagCount).toBe(1) + }) + + test('keeps multi-part tool entries grouped across compaction cycles', () => { + const simulateCompaction = ( + inputMessages: Message[], + ): Message => { + const result = runHandleSteps(inputMessages, 250000, 200000) + return result[0].input.messages[0] + } + + // Create a tool result that produces multiple entryParts: + // both an error AND a non-zero exit code + const cycle1Messages: Message[] = [ + createMessage('user', 'Run tests'), + createToolCallMessage('call-1', 'run_terminal_command', { + command: 'npm test', + }), + createToolResultMessage('call-1', 'run_terminal_command', { + errorMessage: 'Test suite failed', + exitCode: 1, + }), + createMessage('user', 'Fix the tests'), + createMessage('assistant', 'I will fix them'), + ] + + // Cycle 1: compact + const summary1 = simulateCompaction(cycle1Messages) + const summary1Text = (summary1.content[0] as { type: 'text'; text: string }) + .text + + // Both parts should be present in cycle 1 + expect(summary1Text).toContain('[TOOL ERROR: run_terminal_command] Test suite failed') + expect(summary1Text).toContain('[COMMAND FAILED] Exit code: 1') + + // Cycle 2: re-compact — the multi-part entry should stay as one entry + const cycle2Messages: Message[] = [ + summary1, + createMessage('user', 'Try again'), + createMessage('assistant', 'Running tests again'), + ] + const summary2 = simulateCompaction(cycle2Messages) + const summary2Text = (summary2.content[0] as { type: 'text'; text: string }) + .text + + // Both parts should still be present together after re-compaction + expect(summary2Text).toContain('[TOOL ERROR: run_terminal_command] Test suite failed') + expect(summary2Text).toContain('[COMMAND FAILED] Exit code: 1') + + // They should be within the same --- delimited chunk (not split apart) + const separator = '\n\n---\n\n' + const chunks = summary2Text + .replace(/[\s\S]*?\n\n/, '') + .replace(/<\/conversation_summary>[\s\S]*/, '') + .split(separator) + const errorChunk = chunks.find((c) => c.includes('[TOOL ERROR:')) + expect(errorChunk).toBeDefined() + expect(errorChunk).toContain('[COMMAND FAILED] Exit code: 1') + }) + test('handles 3+ compaction cycles without nested PREVIOUS SUMMARY markers', () => { // Helper to simulate running the context pruner and getting the output const simulateCompaction = (inputMessages: Message[]): Message => { @@ -1355,6 +1460,7 @@ describe('context-pruner threshold behavior', () => { messages: Message[], contextTokenCount: number, maxContextLength: number, + budgets?: { assistantToolBudget?: number; userBudget?: number }, ) => { mockAgentState.messageHistory = messages mockAgentState.contextTokenCount = contextTokenCount @@ -1367,7 +1473,7 @@ describe('context-pruner threshold behavior', () => { const generator = contextPruner.handleSteps!({ agentState: mockAgentState, logger: mockLogger, - params: { maxContextLength }, + params: { maxContextLength, ...budgets }, }) const results: any[] = [] let result = generator.next() @@ -1446,7 +1552,7 @@ describe('context-pruner str_replace and write_file tool results', () => { return results } - test('includes str_replace diff in summary', () => { + test('includes str_replace result in summary', () => { const messages = [ createMessage('user', 'Edit this file'), createToolCallMessage('call-1', 'str_replace', { @@ -1454,19 +1560,22 @@ describe('context-pruner str_replace and write_file tool results', () => { replacements: [{ old: 'foo', new: 'bar' }], }), createToolResultMessage('call-1', 'str_replace', { - diff: '--- a/src/utils.ts\n+++ b/src/utils.ts\n@@ -1,1 +1,1 @@\n-foo\n+bar', + file: 'src/utils.ts', + message: 'Updated file', + unifiedDiff: '--- a/src/utils.ts\n+++ b/src/utils.ts\n@@ -1,1 +1,1 @@\n-foo\n+bar', }), ] const results = runHandleSteps(messages) const content = results[0].input.messages[0].content[0].text - expect(content).toContain('[EDIT RESULT]') + expect(content).toContain('[EDIT RESULT: str_replace]') + expect(content).toContain('unifiedDiff') expect(content).toContain('-foo') expect(content).toContain('+bar') }) - test('includes write_file diff in summary', () => { + test('includes write_file result in summary', () => { const messages = [ createMessage('user', 'Create a new file'), createToolCallMessage('call-1', 'write_file', { @@ -1474,18 +1583,20 @@ describe('context-pruner str_replace and write_file tool results', () => { content: 'export const hello = "world"', }), createToolResultMessage('call-1', 'write_file', { - diff: '--- /dev/null\n+++ b/src/new-file.ts\n@@ -0,0 +1 @@\n+export const hello = "world"', + file: 'src/new-file.ts', + message: 'Created file', + unifiedDiff: '--- /dev/null\n+++ b/src/new-file.ts\n@@ -0,0 +1 @@\n+export const hello = "world"', }), ] const results = runHandleSteps(messages) const content = results[0].input.messages[0].content[0].text - expect(content).toContain('[WRITE RESULT]') - expect(content).toContain('+export const hello = "world"') + expect(content).toContain('[EDIT RESULT: write_file]') + expect(content).toContain('export const hello') }) - test('truncates very long str_replace diffs', () => { + test('truncates very long str_replace results', () => { const longDiff = 'X'.repeat(3000) const messages = [ createMessage('user', 'Make big changes'), @@ -1494,20 +1605,65 @@ describe('context-pruner str_replace and write_file tool results', () => { replacements: [], }), createToolResultMessage('call-1', 'str_replace', { - diff: longDiff, + file: 'src/big-file.ts', + message: 'Updated file', + unifiedDiff: longDiff, }), ] const results = runHandleSteps(messages) const content = results[0].input.messages[0].content[0].text - expect(content).toContain('[EDIT RESULT]') + expect(content).toContain('[EDIT RESULT: str_replace]') expect(content).toContain('...') // Should not contain the full diff expect(content).not.toContain(longDiff) }) - test('does not include edit result when no diff is present', () => { + test('truncates very large tool entries to 5k token limit', () => { + // spawn_agents with multiple non-blacklisted agents producing large outputs + // Each agent output is capped at ~3,900 chars, but 5 agents × 3,900 = ~19,500 chars + // which exceeds the 5k token (15k char) TOOL_ENTRY_LIMIT + const largeAgentResults = Array.from({ length: 5 }, (_, i) => ({ + agentType: `editor`, + value: { + type: 'string', + value: `AGENT_${i}_START_` + 'X'.repeat(4000) + `_AGENT_${i}_END`, + }, + })) + + const messages: Message[] = [ + createMessage('user', 'Spawn many agents'), + createToolCallMessage('call-1', 'spawn_agents', { + agents: [ + { agent_type: 'editor' }, + { agent_type: 'editor' }, + { agent_type: 'editor' }, + { agent_type: 'editor' }, + { agent_type: 'editor' }, + ], + }), + { + role: 'tool', + toolCallId: 'call-1', + toolName: 'spawn_agents', + content: [{ type: 'json', value: largeAgentResults }], + } as ToolMessage, + ] + + const results = runHandleSteps(messages) + const content = results[0].input.messages[0].content[0].text + + // Should contain truncation notice from the TOOL_ENTRY_LIMIT cap + expect(content).toContain('[...truncated') + // The last agent's start marker should be cut by the overall entry cap + // (per-agent truncation only cuts within each agent's output, not across agents) + expect(content).not.toContain('AGENT_4_START_') + // The first agent's start should survive (80% prefix) + expect(content).toContain('AGENT_0_START_') + }) + + test('includes all result properties even without unifiedDiff', () => { const messages = [ createMessage('user', 'Edit file'), createToolCallMessage('call-1', 'str_replace', { @@ -1515,16 +1671,19 @@ describe('context-pruner str_replace and write_file tool results', () => { replacements: [], }), createToolResultMessage('call-1', 'str_replace', { - success: true, + file: 'src/file.ts', + errorMessage: 'No match found for old string', }), ] const results = runHandleSteps(messages) const content = results[0].input.messages[0].content[0].text - // Should have the tool call summary but not the result + // Should have both the tool call summary and the full result expect(content).toContain('Edited file: src/file.ts') - expect(content).not.toContain('[EDIT RESULT]') + expect(content).toContain('[EDIT RESULT: str_replace]') + expect(content).toContain('errorMessage') + expect(content).toContain('No match found for old string') }) }) @@ -1560,11 +1719,11 @@ describe('context-pruner glob and list_directory tools', () => { return results } - test('summarizes glob tool with patterns', () => { + test('summarizes glob tool with pattern', () => { const messages = [ createMessage('user', 'Find files'), createToolCallMessage('call-1', 'glob', { - patterns: [{ pattern: '*.ts' }, { pattern: '*.js' }], + pattern: '**/*.ts', }), createToolResultMessage('call-1', 'glob', { files: [] }), ] @@ -1572,14 +1731,14 @@ describe('context-pruner glob and list_directory tools', () => { const results = runHandleSteps(messages) const content = results[0].input.messages[0].content[0].text - expect(content).toContain('Glob: *.ts, *.js') + expect(content).toContain('Glob: **/*.ts') }) - test('summarizes list_directory tool with paths', () => { + test('summarizes list_directory tool with path', () => { const messages = [ createMessage('user', 'List directories'), createToolCallMessage('call-1', 'list_directory', { - directories: [{ path: 'src' }, { path: 'lib' }], + path: 'src', }), createToolResultMessage('call-1', 'list_directory', { entries: [] }), ] @@ -1587,7 +1746,7 @@ describe('context-pruner glob and list_directory tools', () => { const results = runHandleSteps(messages) const content = results[0].input.messages[0].content[0].text - expect(content).toContain('Listed dirs: src, lib') + expect(content).toContain('Listed dir: src') }) test('summarizes read_subtree tool with paths', () => { @@ -1605,3 +1764,597 @@ describe('context-pruner glob and list_directory tools', () => { expect(content).toContain('Read subtree: src/components, src/utils') }) }) + +describe('context-pruner dual-budget behavior', () => { + let mockAgentState: AgentState + + beforeEach(() => { + mockAgentState = createMockAgentState([], 0) + }) + + const runHandleSteps = ( + messages: Message[], + contextTokenCount: number, + maxContextLength: number, + budgets?: { assistantToolBudget?: number; userBudget?: number }, + ) => { + mockAgentState.messageHistory = messages + mockAgentState.contextTokenCount = contextTokenCount + const mockLogger = { + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + } + const generator = contextPruner.handleSteps!({ + agentState: mockAgentState, + logger: mockLogger, + params: { maxContextLength, ...budgets }, + }) + const results: any[] = [] + let result = generator.next() + while (!result.done) { + if (typeof result.value === 'object') { + results.push(result.value) + } + result = generator.next() + } + return results + } + + test('includes recent messages in summary and drops older ones', () => { + const messages = [ + createMessage('user', 'Old user message 1'), + createMessage('assistant', 'Old assistant response 1'), + createMessage('user', 'Old user message 2'), + createMessage('assistant', 'Old assistant response 2'), + createMessage('user', 'Recent user message'), + createMessage('assistant', 'Recent assistant response'), + ] + + // Small budgets on summarized sizes: only the most recent entries fit + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 15, + userBudget: 15, + }) + + const resultMessages = results[0].input.messages + + // Should be a single summary message (no verbatim messages) + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + expect(content).toContain('') + + // Recent messages should be in the summary + expect(content).toContain('Recent user message') + expect(content).toContain('Recent assistant response') + + // Older messages should be dropped entirely (not in summary) + expect(content).not.toContain('Old user message 1') + expect(content).not.toContain('Old assistant response 1') + expect(content).not.toContain('Old user message 2') + expect(content).not.toContain('Old assistant response 2') + }) + + test('summarizes all messages when they fit within budgets', () => { + const messages = [ + createMessage('user', 'Hello'), + createMessage('assistant', 'Hi there!'), + createMessage('user', 'How are you?'), + createMessage('assistant', 'I am fine!'), + ] + + // Large budgets: all messages fit in summary + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 20000, + userBudget: 50000, + }) + + const resultMessages = results[0].input.messages + + // All messages summarized into one + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + expect(content).toContain('Hello') + expect(content).toContain('Hi there!') + expect(content).toContain('How are you?') + expect(content).toContain('I am fine!') + }) + + test('respects user budget separately from assistant+tool budget', () => { + const largeUserText = 'U'.repeat(600) // ~200 tokens + const messages = [ + createMessage('user', largeUserText), + createMessage('assistant', 'Short response'), + createMessage('user', 'Recent short question'), + createMessage('assistant', 'Recent short answer'), + ] + + // User budget small enough to exclude the large user message + // Assistant budget large enough to include all assistant messages + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 5000, + userBudget: 100, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + expect(content).toContain('') + // The large user message should be dropped (not in summary) + expect(content).not.toContain(largeUserText) + // Recent messages should be in the summary + expect(content).toContain('Recent short question') + expect(content).toContain('Recent short answer') + }) + + test('drops tool entries beyond budget at the cutoff boundary', () => { + const messages = [ + createMessage('user', 'Old message'), + createToolCallMessage('call-1', 'read_files', { paths: ['old.ts'] }), + createToolResultMessage('call-1', 'read_files', { content: 'old file' }), + createMessage('user', 'Recent message'), + createMessage('assistant', 'Recent response'), + ] + + // Budget that excludes the older tool call entry + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 15, + userBudget: 15, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + + // Recent messages should be in the summary + expect(content).toContain('Recent message') + expect(content).toContain('Recent response') + + // Tool call summary should be dropped (beyond budget) + expect(content).not.toContain('old.ts') + }) + + test('counts tool result summaries against assistant+tool budget', () => { + // Use str_replace with a large result — this produces a summarized [EDIT RESULT] entry + const largeDiff = 'LARGE_DIFF_CONTENT_' + 'X'.repeat(900) + const messages = [ + createMessage('user', 'Do something'), + createToolCallMessage('call-1', 'str_replace', { path: 'big.ts', replacements: [] }), + createToolResultMessage('call-1', 'str_replace', { file: 'big.ts', message: 'Updated', unifiedDiff: largeDiff }), + createMessage('user', 'Recent question'), + createMessage('assistant', 'Recent answer'), + ] + + // Assistant budget too small for the large [EDIT RESULT] summary entry + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 100, + userBudget: 5000, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + expect(content).toContain('') + // Recent messages should be in the summary + expect(content).toContain('Recent question') + expect(content).toContain('Recent answer') + // Large edit result entry should be dropped (exceeds assistant+tool budget) + expect(content).not.toContain('LARGE_DIFF_CONTENT_') + }) + + test('drops older messages and includes recent ones in summary', () => { + const messages = [ + createMessage('user', 'First request about feature A'), + createMessage('assistant', 'Working on feature A'), + createMessage('user', 'Second request about feature B'), + createMessage('assistant', 'Working on feature B'), + ] + + // Budget only fits the last pair of summarized entries + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 15, + userBudget: 15, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + expect(content).toContain('') + + // Recent messages should be in the summary + expect(content).toContain('Second request about feature B') + expect(content).toContain('Working on feature B') + + // Older messages should be dropped + expect(content).not.toContain('First request about feature A') + expect(content).not.toContain('Working on feature A') + }) + + test('excludes STEP_PROMPT tagged messages from budget calculation', () => { + const largeStepPrompt = 'S'.repeat(900) // ~300 tokens + const messages: Message[] = [ + createMessage('user', 'User request'), + createMessage('assistant', 'Assistant response'), + { + role: 'user', + content: [{ type: 'text', text: largeStepPrompt }], + tags: ['STEP_PROMPT'], + }, + createMessage('user', 'Recent question'), + createMessage('assistant', 'Recent answer'), + ] + + // Budget is small but the STEP_PROMPT should NOT count against it, + // so both real user messages and both assistant messages should fit + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 200, + userBudget: 200, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + // Both real messages should be in the summary + expect(content).toContain('User request') + expect(content).toContain('Assistant response') + expect(content).toContain('Recent question') + expect(content).toContain('Recent answer') + // STEP_PROMPT content should NOT be in the summary + expect(content).not.toContain(largeStepPrompt) + }) + + test('excludes SUBAGENT_SPAWN tagged messages from budget calculation', () => { + const messages: Message[] = [ + createMessage('user', 'User request'), + createMessage('assistant', 'First response'), + { + role: 'assistant', + content: [{ type: 'text', text: 'A'.repeat(900) }], + tags: ['SUBAGENT_SPAWN'], + }, + createMessage('user', 'Follow up'), + createMessage('assistant', 'Second response'), + ] + + // Budget is small but SUBAGENT_SPAWN should NOT count against it + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 200, + userBudget: 200, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + expect(content).toContain('User request') + expect(content).toContain('First response') + expect(content).toContain('Follow up') + expect(content).toContain('Second response') + }) + + test('charges old summary entries against their correct budgets', () => { + // Previous summary with a large [USER] entry that exceeds user budget + const largeUserContent = 'X'.repeat(900) + const previousSummary: Message = { + role: 'user', + content: [ + { + type: 'text', + text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\n${largeUserContent}\n\n---\n\n[ASSISTANT]\nOld assistant response\n`, + }, + ], + } + + const messages: Message[] = [ + previousSummary, + createMessage('user', 'After summary request'), + createMessage('assistant', 'After summary response'), + ] + + // User budget is small — the large [USER] entry from the old summary + // should be dropped because it exceeds the user budget. + // The [ASSISTANT] entry from the old summary charges against assistant budget. + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 5000, + userBudget: 50, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + // Recent messages should be in the summary + expect(content).toContain('After summary request') + expect(content).toContain('After summary response') + // The old [ASSISTANT] entry fits the assistant budget and is after the cutoff + expect(content).toContain('Old assistant response') + // The large old [USER] entry should be dropped (exceeded user budget) + expect(content).not.toContain(largeUserContent) + }) + + test('drops old summary entries individually based on budget walk', () => { + // Previous summary with identifiable oldest and middle entries + const previousSummary: Message = { + role: 'user', + content: [ + { + type: 'text', + text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLDEST_USER_ENTRY\n\n---\n\n[ASSISTANT]\nOLDEST_ASSISTANT_ENTRY\n\n---\n\n[USER]\nMIDDLE_USER_ENTRY\n\n---\n\n[ASSISTANT]\nMIDDLE_ASSISTANT_ENTRY\n`, + }, + ], + } + + const messages: Message[] = [ + previousSummary, + createMessage('user', 'Recent request'), + createMessage('assistant', 'Recent response'), + ] + + // Budget large enough for middle + recent entries but not oldest + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 25, + userBudget: 25, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + // Middle and recent entries should survive + expect(content).toContain('MIDDLE_USER_ENTRY') + expect(content).toContain('MIDDLE_ASSISTANT_ENTRY') + expect(content).toContain('Recent request') + expect(content).toContain('Recent response') + // Oldest entries should be dropped + expect(content).not.toContain('OLDEST_USER_ENTRY') + expect(content).not.toContain('OLDEST_ASSISTANT_ENTRY') + }) + + test('handles complex scenario with long messages of all types and previous summary', () => { + // Previous summary with 4 identifiable entries + const previousSummary: Message = { + role: 'user', + content: [ + { + type: 'text', + text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLD_USER_REQUEST_1: The user asked about setting up authentication with OAuth2 and JWT tokens for the API.\n\n---\n\n[ASSISTANT]\nOLD_ASSISTANT_RESPONSE_1: Explained OAuth2 flow and implemented JWT token generation.\nTools: Read files: src/auth.ts, src/middleware.ts; Edited file: src/auth.ts\n\n---\n\n[USER]\nOLD_USER_REQUEST_2: Asked for unit tests for the auth module.\n\n---\n\n[ASSISTANT]\nOLD_ASSISTANT_RESPONSE_2: Created comprehensive test suite for authentication.\nTools: Wrote file: src/__tests__/auth.test.ts\n`, + }, + ], + } + + // Long user message (~45k chars, exceeds USER_MESSAGE_LIMIT of 13k tokens = 39k chars) + // Middle marker placed ~85% through so it falls in the truncated gap + // (past the 80% prefix but before the 20% suffix) + const longUserMessage = 'LONG_USER_START_' + 'Here is a detailed specification for the new feature. '.repeat(650) + '_LONG_USER_MIDDLE_MARKER_' + 'Here is a detailed specification for the new feature. '.repeat(150) + + // Long assistant message with text (~8k chars, exceeds ASSISTANT_MESSAGE_LIMIT of 1.3k tokens = 3.9k chars) + // plus multiple tool calls. Middle marker placed ~60% through so it falls in the truncated gap. + const longAssistantText = 'LONG_ASSISTANT_START_' + 'I will implement this step by step, starting with the data model changes. '.repeat(60) + '_LONG_ASST_MIDDLE_MARKER_' + 'I will implement this step by step, starting with the data model changes. '.repeat(40) + const assistantWithToolCalls: Message = { + role: 'assistant', + content: [ + { type: 'text', text: longAssistantText }, + { + type: 'tool-call', + toolCallId: 'call-1', + toolName: 'read_files', + input: { paths: ['src/model.ts', 'src/service.ts'] }, + }, + { + type: 'tool-call', + toolCallId: 'call-2', + toolName: 'str_replace', + input: { path: 'src/model.ts', replacements: [] }, + }, + { + type: 'tool-call', + toolCallId: 'call-3', + toolName: 'spawn_agents', + input: { + agents: [ + { agent_type: 'editor' }, + { agent_type: 'editor' }, + { agent_type: 'editor' }, + { agent_type: 'editor' }, + { agent_type: 'editor' }, + ], + }, + }, + ], + } + + // str_replace result with a large diff (~3k chars, exceeds 2k truncation limit) + const largeDiff = 'DIFF_START_MARKER_' + '+added line\n'.repeat(250) + '_DIFF_END_MARKER' + + // spawn_agents result with 5 non-blacklisted agents producing large outputs + // Each ~4k chars, total ~20k, exceeds TOOL_ENTRY_LIMIT of 5k tokens = 15k chars + const largeAgentResults = Array.from({ length: 5 }, (_, i) => ({ + agentType: 'editor', + value: { + type: 'string', + value: `AGENT_${i}_OUTPUT_START_` + 'Implementation details. '.repeat(160) + `_AGENT_${i}_OUTPUT_END`, + }, + })) + + const messages: Message[] = [ + previousSummary, + createMessage('user', longUserMessage), + assistantWithToolCalls, + createToolResultMessage('call-1', 'read_files', { content: 'file data' } as JSONValue), + createToolResultMessage('call-2', 'str_replace', { file: 'src/model.ts', message: 'Updated', unifiedDiff: largeDiff }), + { + role: 'tool', + toolCallId: 'call-3', + toolName: 'spawn_agents', + content: [{ type: 'json', value: largeAgentResults }], + } as ToolMessage, + createMessage('user', 'FINAL_USER_REQUEST: Now run the tests'), + createMessage('assistant', 'FINAL_ASSISTANT_RESPONSE: Running tests now'), + ] + + // Use default budgets — everything should fit + const results = runHandleSteps(messages, 250000, 200000) + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + + // === Structure checks === + expect(content).toContain('') + expect(content).toContain('') + const summaryTagCount = (content.match(//g) || []).length + expect(summaryTagCount).toBe(1) + + // === Previous summary entries preserved === + expect(content).toContain('OLD_USER_REQUEST_1') + expect(content).toContain('OLD_ASSISTANT_RESPONSE_1') + expect(content).toContain('OLD_USER_REQUEST_2') + expect(content).toContain('OLD_ASSISTANT_RESPONSE_2') + + // === Long user message: truncated with 80/20 split === + expect(content).toContain('LONG_USER_START_') + expect(content).not.toContain('_LONG_USER_MIDDLE_MARKER_') // Middle marker falls in truncated gap + expect(content).toContain('[...truncated') + + // === Long assistant text: truncated === + expect(content).toContain('LONG_ASSISTANT_START_') + expect(content).not.toContain('_LONG_ASST_MIDDLE_MARKER_') // Middle marker falls in truncated gap + + // === Tool call summaries present === + expect(content).toContain('Read files: src/model.ts, src/service.ts') + expect(content).toContain('Edited file: src/model.ts') + expect(content).toContain('Spawned agents:') + + // === str_replace result: present but truncated at 2k chars === + expect(content).toContain('[EDIT RESULT: str_replace]') + expect(content).toContain('DIFF_START_MARKER_') + expect(content).not.toContain('_DIFF_END_MARKER') // Truncated by 2k result limit + + // === spawn_agents tool entry: truncated by TOOL_ENTRY_LIMIT === + expect(content).toContain('AGENT_0_OUTPUT_START_') // First agent's start in 80% prefix + expect(content).not.toContain('AGENT_4_OUTPUT_START_') // Last agent's start falls in truncated gap + + // === Final messages present === + expect(content).toContain('FINAL_USER_REQUEST') + expect(content).toContain('FINAL_ASSISTANT_RESPONSE') + + // === Entries are separated by --- === + expect(content).toContain('---') + }) + + test('with tight budgets, drops old summary entries while keeping truncated new entries', () => { + // Same setup but with tight budgets: old summary entries get dropped, + // new entries survive (individually truncated) + const previousSummary: Message = { + role: 'user', + content: [ + { + type: 'text', + text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLD_DROPPED_USER: ${'X'.repeat(600)}\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT: ${'Y'.repeat(600)}\n\n---\n\n[USER]\nOLD_DROPPED_USER_2: Asked about deployment\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT_2: Explained deployment process\n`, + }, + ], + } + + // Long user message (~12k chars, under truncation limit but uses significant budget) + const longUserMessage = 'SURVIVED_USER_START_' + 'Feature request details. '.repeat(400) + '_SURVIVED_USER_END' + + // Assistant with tool calls + const assistantMsg: Message = { + role: 'assistant', + content: [ + { type: 'text', text: 'SURVIVED_ASSISTANT: Working on it' }, + { + type: 'tool-call', + toolCallId: 'call-1', + toolName: 'str_replace', + input: { path: 'src/app.ts', replacements: [] }, + }, + ], + } + + // Tool result with a diff + const toolResult = createToolResultMessage('call-1', 'str_replace', { + file: 'src/app.ts', + message: 'Updated file', + unifiedDiff: '--- a/src/app.ts\n+++ b/src/app.ts\n@@ -1 +1 @@\n-old\n+SURVIVED_DIFF_CONTENT', + }) + + const messages: Message[] = [ + previousSummary, + createMessage('user', longUserMessage), + assistantMsg, + toolResult, + createMessage('user', 'SURVIVED_FINAL_USER'), + createMessage('assistant', 'SURVIVED_FINAL_ASSISTANT'), + ] + + // Tight budgets: enough for new entries but not old summary entries + // New assistant entries: ~25 (assistant text+tool) + ~56 (edit result JSON) + ~13 (final) = ~94 tokens + // Old assistant entries: ~20 for OLD_DROPPED_ASSISTANT_2 would push over budget of 100 + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 100, + userBudget: 4200, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + + // === New entries survived === + expect(content).toContain('SURVIVED_USER_START_') + expect(content).toContain('SURVIVED_ASSISTANT') + expect(content).toContain('SURVIVED_DIFF_CONTENT') + expect(content).toContain('SURVIVED_FINAL_USER') + expect(content).toContain('SURVIVED_FINAL_ASSISTANT') + + // === Old summary entries dropped by budget walk === + expect(content).not.toContain('OLD_DROPPED_USER:') + expect(content).not.toContain('OLD_DROPPED_ASSISTANT:') + expect(content).not.toContain('OLD_DROPPED_USER_2:') + expect(content).not.toContain('OLD_DROPPED_ASSISTANT_2:') + }) + + test('fully includes conversation summary when it fits within user budget', () => { + const previousSummary: Message = { + role: 'user', + content: [ + { + type: 'text', + text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOld request about feature A\n\n---\n\n[ASSISTANT]\nWorked on feature A\n`, + }, + ], + } + + const messages: Message[] = [ + previousSummary, + createMessage('user', 'New request about feature B'), + createMessage('assistant', 'Working on feature B'), + ] + + // Large budget — everything fits + const results = runHandleSteps(messages, 250000, 200000, { + assistantToolBudget: 20000, + userBudget: 50000, + }) + + const resultMessages = results[0].input.messages + expect(resultMessages).toHaveLength(1) + + const content = (resultMessages[0].content[0] as { text: string }).text + // Previous summary content should be fully included + expect(content).toContain('Old request about feature A') + expect(content).toContain('Worked on feature A') + // New messages should also be included + expect(content).toContain('New request about feature B') + expect(content).toContain('Working on feature B') + }) +}) diff --git a/agents/browser-use/browser-use.ts b/agents/browser-use/browser-use.ts index 7b11db0f89..1536e3e361 100644 --- a/agents/browser-use/browser-use.ts +++ b/agents/browser-use/browser-use.ts @@ -127,7 +127,7 @@ const definition: AgentDefinition = { mcpServers: { 'chrome-devtools': { command: 'npx', - args: ['-y', 'chrome-devtools-mcp@latest', '--headless'], + args: ['-y', 'chrome-devtools-mcp@latest', '--headless', '--isolated'], }, }, diff --git a/agents/context-pruner.ts b/agents/context-pruner.ts index bbf495baa1..55b1dd6bf7 100644 --- a/agents/context-pruner.ts +++ b/agents/context-pruner.ts @@ -10,259 +10,6 @@ import type { UserMessage, } from './types/util-types' -// ============================================================================= -// Helper Functions (exported for testing) -// ============================================================================= - -/** - * Truncates long text with 80% from the beginning and 20% from the end. - * Preserves context from both ends of the text while indicating what was removed. - * - * @param text - The text to truncate - * @param limit - Maximum character length - * @returns Truncated text with notice of how many chars were removed - */ -export function truncateLongText(text: string, limit: number): string { - if (text.length <= limit) { - return text - } - const availableChars = limit - 50 // 50 chars for the truncation notice - const prefixLength = Math.floor(availableChars * 0.8) - const suffixLength = availableChars - prefixLength - const prefix = text.slice(0, prefixLength) - const suffix = text.slice(-suffixLength) - const truncatedChars = text.length - prefixLength - suffixLength - return `${prefix}\n\n[...truncated ${truncatedChars} chars...]\n\n${suffix}` -} - -/** - * Estimates token count from a JSON-serializable object. - * Uses a simple heuristic of ~3 characters per token. - * - * @param obj - The object to estimate tokens for - * @returns Estimated token count - */ -export function estimateTokens(obj: unknown): number { - return Math.ceil(JSON.stringify(obj).length / 3) -} - -/** - * Extracts text content from a message, handling both string and array formats. - * - * @param message - The message to extract text from - * @returns Combined text content from the message - */ -export function getTextContent(message: Message): string { - if (typeof message.content === 'string') { - return message.content - } - if (Array.isArray(message.content)) { - return message.content - .filter( - (part: Record) => - part.type === 'text' && typeof part.text === 'string', - ) - .map((part: Record) => part.text as string) - .join('\n') - } - return '' -} - -/** - * Summarizes a tool call into a human-readable description. - * Handles various tool types with appropriate formatting. - * - * @param toolName - The name of the tool - * @param input - The tool's input parameters - * @returns A concise summary of the tool call - */ -export function summarizeToolCall( - toolName: string, - input: Record, -): string { - switch (toolName) { - case 'read_files': { - const paths = input.paths as string[] | undefined - if (paths && paths.length > 0) { - return `Read files: ${paths.join(', ')}` - } - return 'Read files' - } - case 'write_file': { - const path = input.path as string | undefined - return path ? `Wrote file: ${path}` : 'Wrote file' - } - case 'str_replace': { - const path = input.path as string | undefined - return path ? `Edited file: ${path}` : 'Edited file' - } - case 'propose_write_file': { - const path = input.path as string | undefined - return path ? `Proposed write to: ${path}` : 'Proposed file write' - } - case 'propose_str_replace': { - const path = input.path as string | undefined - return path ? `Proposed edit to: ${path}` : 'Proposed file edit' - } - case 'read_subtree': { - const paths = input.paths as string[] | undefined - if (paths && paths.length > 0) { - return `Read subtree: ${paths.join(', ')}` - } - return 'Read subtree' - } - case 'code_search': { - const pattern = input.pattern as string | undefined - const flags = input.flags as string | undefined - if (pattern && flags) { - return `Code search: "${pattern}" (${flags})` - } - return pattern ? `Code search: "${pattern}"` : 'Code search' - } - case 'glob': { - const patterns = input.patterns as - | Array<{ pattern: string }> - | undefined - if (patterns && patterns.length > 0) { - return `Glob: ${patterns.map((p) => p.pattern).join(', ')}` - } - return 'Glob search' - } - case 'list_directory': { - const directories = input.directories as - | Array<{ path: string }> - | undefined - if (directories && directories.length > 0) { - return `Listed dirs: ${directories.map((d) => d.path).join(', ')}` - } - return 'Listed directory' - } - case 'find_files': { - const pattern = input.pattern as string | undefined - return pattern ? `Find files: "${pattern}"` : 'Find files' - } - case 'run_terminal_command': { - const command = input.command as string | undefined - if (command) { - const shortCmd = - command.length > 50 ? command.slice(0, 50) + '...' : command - return `Ran command: ${shortCmd}` - } - return 'Ran terminal command' - } - case 'spawn_agents': - case 'spawn_agent_inline': { - const agents = input.agents as - | Array<{ - agent_type: string - prompt?: string - params?: Record - }> - | undefined - const agentType = input.agent_type as string | undefined - const prompt = input.prompt as string | undefined - const agentParams = input.params as - | Record - | undefined - - if (agents && agents.length > 0) { - const agentDetails = agents.map((a) => { - let detail = a.agent_type - const extras: string[] = [] - if (a.prompt) { - const truncatedPrompt = - a.prompt.length > 1000 - ? a.prompt.slice(0, 1000) + '...' - : a.prompt - extras.push(`prompt: "${truncatedPrompt}"`) - } - if (a.params && Object.keys(a.params).length > 0) { - const paramsStr = JSON.stringify(a.params) - const truncatedParams = - paramsStr.length > 1000 - ? paramsStr.slice(0, 1000) + '...' - : paramsStr - extras.push(`params: ${truncatedParams}`) - } - if (extras.length > 0) { - detail += ` (${extras.join(', ')})` - } - return detail - }) - return `Spawned agents:\n${agentDetails.map((d) => `- ${d}`).join('\n')}` - } - if (agentType) { - const extras: string[] = [] - if (prompt) { - const truncatedPrompt = - prompt.length > 1000 ? prompt.slice(0, 1000) + '...' : prompt - extras.push(`prompt: "${truncatedPrompt}"`) - } - if (agentParams && Object.keys(agentParams).length > 0) { - const paramsStr = JSON.stringify(agentParams) - const truncatedParams = - paramsStr.length > 1000 - ? paramsStr.slice(0, 1000) + '...' - : paramsStr - extras.push(`params: ${truncatedParams}`) - } - if (extras.length > 0) { - return `Spawned agent: ${agentType} (${extras.join(', ')})` - } - return `Spawned agent: ${agentType}` - } - return 'Spawned agent(s)' - } - case 'write_todos': { - const todos = input.todos as - | Array<{ task: string; completed: boolean }> - | undefined - if (todos) { - const completed = todos.filter((t) => t.completed).length - const incomplete = todos.filter((t) => !t.completed) - if (incomplete.length === 0) { - return `Todos: ${completed}/${todos.length} complete (all done!)` - } - const remainingTasks = incomplete - .map((t) => `- ${t.task}`) - .join('\n') - return `Todos: ${completed}/${todos.length} complete. Remaining:\n${remainingTasks}` - } - return 'Updated todos' - } - case 'ask_user': { - const questions = input.questions as - | Array<{ question: string }> - | undefined - if (questions && questions.length > 0) { - const questionTexts = questions.map((q) => q.question).join('; ') - const truncated = - questionTexts.length > 200 - ? questionTexts.slice(0, 200) + '...' - : questionTexts - return `Asked user: ${truncated}` - } - return 'Asked user question' - } - case 'suggest_followups': - return 'Suggested followups' - case 'web_search': { - const query = input.query as string | undefined - return query ? `Web search: "${query}"` : 'Web search' - } - case 'read_docs': { - const query = input.query as string | undefined - return query ? `Read docs: "${query}"` : 'Read docs' - } - case 'set_output': - return 'Set output' - case 'set_messages': - return 'Set messages' - default: - return `Used tool: ${toolName}` - } -} - const definition: AgentDefinition = { id: 'context-pruner', publisher, @@ -278,6 +25,12 @@ const definition: AgentDefinition = { maxContextLength: { type: 'number', }, + assistantToolBudget: { + type: 'number', + }, + userBudget: { + type: 'number', + }, }, required: [], }, @@ -291,9 +44,6 @@ const definition: AgentDefinition = { // Constants (must be inside handleSteps since it's serialized to a string) // ============================================================================= - /** Target: summarized messages should be at most 10% of max context */ - const TARGET_SUMMARY_FACTOR = 0.1 - /** Agent IDs whose output should be excluded from spawn_agents results */ const SPAWN_AGENTS_OUTPUT_BLACKLIST = [ 'file-picker', @@ -302,11 +52,27 @@ const definition: AgentDefinition = { 'basher', 'code-reviewer', 'code-reviewer-multi-prompt', + 'librarian', + 'tmux-cli', + 'browser-use', ] - /** Limits for truncating long messages (chars) */ - const USER_MESSAGE_LIMIT = 15000 - const ASSISTANT_MESSAGE_LIMIT = 4000 + /** Limits for truncating long messages in the summary (estimated tokens) */ + const USER_MESSAGE_LIMIT = 13_000 + const ASSISTANT_MESSAGE_LIMIT = 1_300 + const TOOL_ENTRY_LIMIT = 5_000 + + /** Approximate characters per token (matches estimateTokens heuristic) */ + const CHARS_PER_TOKEN = 3 + + /** Token budget for assistant + tool content in the conversation summary */ + const ASSISTANT_TOOL_BUDGET = 20_000 + + /** Token budget for user content in the conversation summary */ + const USER_BUDGET = 50_000 + + /** Fudge factor for token count threshold to trigger pruning earlier */ + const TOKEN_COUNT_FUDGE_FACTOR = 1_000 /** Prompt cache expiry time (Anthropic caches for 5 minutes) */ const CACHE_EXPIRY_MS = 5 * 60 * 1000 @@ -315,8 +81,6 @@ const definition: AgentDefinition = { const SUMMARY_HEADER = 'This is a summary of the conversation so far. The original messages have been condensed to save context space.' - /** Fudge factor for token count threshold to trigger pruning earlier */ - const TOKEN_COUNT_FUDGE_FACTOR = 1000 // ============================================================================= // Helper Functions (must be inside handleSteps since it's serialized to a string) @@ -338,13 +102,6 @@ const definition: AgentDefinition = { return `${prefix}\n\n[...truncated ${truncatedChars} chars...]\n\n${suffix}` } - /** - * Estimates token count from a JSON-serializable object. - */ - function estimateTokens(obj: unknown): number { - return Math.ceil(JSON.stringify(obj).length / 3) - } - /** * Extracts text content from a message. */ @@ -411,22 +168,12 @@ const definition: AgentDefinition = { return pattern ? `Code search: "${pattern}"` : 'Code search' } case 'glob': { - const patterns = input.patterns as - | Array<{ pattern: string }> - | undefined - if (patterns && patterns.length > 0) { - return `Glob: ${patterns.map((p) => p.pattern).join(', ')}` - } - return 'Glob search' + const pattern = input.pattern as string | undefined + return pattern ? `Glob: ${pattern}` : 'Glob search' } case 'list_directory': { - const directories = input.directories as - | Array<{ path: string }> - | undefined - if (directories && directories.length > 0) { - return `Listed dirs: ${directories.map((d) => d.path).join(', ')}` - } - return 'Listed directory' + const path = input.path as string | undefined + return path ? `Listed dir: ${path}` : 'Listed directory' } case 'find_files': { const pattern = input.pattern as string | undefined @@ -627,69 +374,80 @@ const definition: AgentDefinition = { } // === SUMMARIZATION STRATEGY === - // Convert entire conversation to a single summarized user message - // If there's already a summary from a previous compaction, extract and preserve it + // 1. Summarize ALL messages (apply transformations: truncation, tool summaries, etc.) + // 2. Walk backwards through summarized parts to apply token budgets + // 3. Older summarized parts beyond the budgets are dropped - // Check for existing conversation summary and extract its content - let previousSummary = '' - for (const message of currentMessages) { - if (message.role === 'user' && Array.isArray(message.content)) { - for (const part of message.content) { - if (part.type === 'text' && typeof part.text === 'string') { - const text = part.text as string - const summaryMatch = text.match( - /([\s\S]*?)<\/conversation_summary>/, - ) - if (summaryMatch) { - let summaryContent = summaryMatch[1].trim() - // Remove the standard header if present - if (summaryContent.startsWith(SUMMARY_HEADER)) { - summaryContent = summaryContent - .slice(SUMMARY_HEADER.length) - .trim() - } - // Remove [PREVIOUS SUMMARY] prefix if present (from earlier compaction) - // to avoid nested markers - if (summaryContent.startsWith('[PREVIOUS SUMMARY]')) { - summaryContent = summaryContent - .slice('[PREVIOUS SUMMARY]'.length) - .trim() - } - previousSummary = summaryContent - } - } - } + const assistantToolBudget: number = params?.assistantToolBudget ?? ASSISTANT_TOOL_BUDGET + const userBudget: number = params?.userBudget ?? USER_BUDGET + + function shouldExcludeMessage(message: Message): boolean { + if (message.tags?.includes('INSTRUCTIONS_PROMPT')) return true + if (message.tags?.includes('STEP_PROMPT')) return true + if (message.tags?.includes('SUBAGENT_SPAWN')) return true + return false + } + + function isConversationSummary(message: Message): boolean { + if (message.role !== 'user') return false + return getTextContent(message).includes('') + } + + function extractSummaryContent(message: Message): string { + const text = getTextContent(message) + const match = text.match( + /([\s\S]*?)<\/conversation_summary>/, + ) + if (!match) return '' + let content = match[1].trim() + if (content.startsWith(SUMMARY_HEADER)) { + content = content.slice(SUMMARY_HEADER.length).trim() } + return content } - // Filter out messages that are previous summaries or have special tags to exclude - const messagesWithoutOldSummaries = currentMessages.filter((message) => { - // Exclude messages with special tags that shouldn't be in the summary - if (message.tags?.includes('INSTRUCTIONS_PROMPT')) return false - if (message.tags?.includes('STEP_PROMPT')) return false - if (message.tags?.includes('SUBAGENT_SPAWN')) return false - - // Exclude previous conversation summaries - if (message.role === 'user' && Array.isArray(message.content)) { - for (const part of message.content) { - if (part.type === 'text' && typeof part.text === 'string') { - if ((part.text as string).includes('')) { - return false - } - } + /** + * Parses a previous summary text blob into role-tagged entries. + * Splits on the --- separator and determines each chunk's role + * based on its prefix marker. + */ + function parseSummaryIntoEntries( + summaryText: string, + ): Array<{ role: 'user' | 'assistant_tool'; parts: string[] }> { + if (!summaryText.trim()) return [] + + const separator = '\n\n---\n\n' + const chunks = summaryText.split(separator).filter((c) => c.trim()) + + return chunks.map((chunk) => { + const trimmed = chunk.trim() + const isUser = + trimmed.startsWith('[USER]\n') || + trimmed.startsWith('[USER] [with image') + return { + role: isUser ? ('user' as const) : ('assistant_tool' as const), + parts: [trimmed], } + }) + } + + // Extract previous summary content from all messages + let previousSummaryContent = '' + for (const message of currentMessages) { + if (isConversationSummary(message)) { + previousSummaryContent = extractSummaryContent(message) } - return true - }) + } - // Build the summary - const summaryParts: string[] = [] + // Filter out excluded and conversation summary messages for summarization + const messagesToSummarize = currentMessages.filter( + (message) => !shouldExcludeMessage(message) && !isConversationSummary(message), + ) // Find the last user message with images to preserve in the final output - // We preserve the most recent user's images since they're likely the most relevant let lastUserImageParts: Array> = [] - for (let i = messagesWithoutOldSummaries.length - 1; i >= 0; i--) { - const msg = messagesWithoutOldSummaries[i] + for (let i = messagesToSummarize.length - 1; i >= 0; i--) { + const msg = messagesToSummarize[i] if (msg.role === 'user' && Array.isArray(msg.content)) { const imageParts = msg.content.filter( (part: Record) => @@ -702,18 +460,14 @@ const definition: AgentDefinition = { } } - // If there was a previous summary, include it first (no marker needed, already chronological) - if (previousSummary) { - summaryParts.push(previousSummary) - } + // Phase 1: Summarize ALL messages into tagged entries + const summarizedEntries: Array<{ role: 'user' | 'assistant_tool'; parts: string[] }> = [] - for (const message of messagesWithoutOldSummaries) { + for (const message of messagesToSummarize) { if (message.role === 'user') { let text = getTextContent(message).trim() if (text) { - // Truncate very long user messages (80% prefix, 20% suffix) - text = truncateLongText(text, USER_MESSAGE_LIMIT) - // Check for images in the message + text = truncateLongText(text, USER_MESSAGE_LIMIT * CHARS_PER_TOKEN) let hasImages = false if (Array.isArray(message.content)) { hasImages = message.content.some( @@ -722,7 +476,10 @@ const definition: AgentDefinition = { ) } const imageNote = hasImages ? ' [with image(s)]' : '' - summaryParts.push(`[USER]${imageNote}\n${text}`) + summarizedEntries.push({ + role: 'user', + parts: [`[USER]${imageNote}\n${text}`], + }) } } else if (message.role === 'assistant') { const textParts: string[] = [] @@ -731,7 +488,6 @@ const definition: AgentDefinition = { if (Array.isArray(message.content)) { for (const part of message.content) { if (part.type === 'text' && typeof part.text === 'string') { - // Remove tags and their contents before summarizing const textWithoutThinkTags = (part.text as string) .replace(/[\s\S]*?<\/think>/g, '') .trim() @@ -748,9 +504,8 @@ const definition: AgentDefinition = { const parts: string[] = [] if (textParts.length > 0) { - // Truncate very long assistant text (80% prefix, 20% suffix) let combinedText = textParts.join('\n') - combinedText = truncateLongText(combinedText, ASSISTANT_MESSAGE_LIMIT) + combinedText = truncateLongText(combinedText, ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) parts.push(combinedText) } if (toolSummaries.length > 0) { @@ -758,44 +513,43 @@ const definition: AgentDefinition = { } if (parts.length > 0) { - summaryParts.push(`[ASSISTANT]\n${parts.join('\n')}`) + summarizedEntries.push({ + role: 'assistant_tool', + parts: [`[ASSISTANT]\n${parts.join('\n')}`], + }) } } else if (message.role === 'tool') { - // Tool results are already captured via the tool-call summaries - // But we capture errors, terminal exit codes, and ask_user answers const toolMessage = message as ToolMessage + const entryParts: string[] = [] + if (Array.isArray(toolMessage.content)) { for (const part of toolMessage.content) { if (part.type === 'json' && part.value) { const value = part.value as Record - // Capture errors if (value.errorMessage || value.error) { let errorText = String(value.errorMessage || value.error) - // Truncate long error messages to 100 chars if (errorText.length > 100) { errorText = errorText.slice(0, 100) + '...' } - summaryParts.push( + entryParts.push( `[TOOL ERROR: ${toolMessage.toolName}] ${errorText}`, ) } - // Capture terminal command exit codes (non-zero = failure) if ( toolMessage.toolName === 'run_terminal_command' && 'exitCode' in value ) { const exitCode = value.exitCode as number if (exitCode !== 0) { - summaryParts.push(`[COMMAND FAILED] Exit code: ${exitCode}`) + entryParts.push(`[COMMAND FAILED] Exit code: ${exitCode}`) } } - // Capture ask_user answers or skipped if (toolMessage.toolName === 'ask_user') { if (value.skipped) { - summaryParts.push('[USER SKIPPED QUESTION]') + entryParts.push('[USER SKIPPED QUESTION]') } else if ('answers' in value) { const answers = value.answers as | Array<{ @@ -814,43 +568,34 @@ const definition: AgentDefinition = { return '(no answer)' }) .join('; ') - // Truncate long answers to 10,000 chars const truncated = answerTexts.length > 10_000 ? answerTexts.slice(0, 10_000) + '...' : answerTexts - summaryParts.push(`[USER ANSWERED] ${truncated}`) + entryParts.push(`[USER ANSWERED] ${truncated}`) } } } - // Capture str_replace results (diff of changes made) - if (toolMessage.toolName === 'str_replace') { - const diff = value.diff as string | undefined - if (diff) { - // Truncate long diffs to 2000 chars - const truncatedDiff = - diff.length > 2000 ? diff.slice(0, 2000) + '...' : diff - summaryParts.push(`[EDIT RESULT]\n${truncatedDiff}`) - } - } - - // Capture write_file results (diff of changes made) - if (toolMessage.toolName === 'write_file') { - const diff = value.diff as string | undefined - if (diff) { - // Truncate long diffs to 2000 chars - const truncatedDiff = - diff.length > 2000 ? diff.slice(0, 2000) + '...' : diff - summaryParts.push(`[WRITE RESULT]\n${truncatedDiff}`) - } + if ( + toolMessage.toolName === 'str_replace' || + toolMessage.toolName === 'propose_str_replace' || + toolMessage.toolName === 'write_file' || + toolMessage.toolName === 'propose_write_file' + ) { + const resultStr = JSON.stringify(value) + const truncatedResult = + resultStr.length > 2000 + ? resultStr.slice(0, 2000) + '...' + : resultStr + entryParts.push( + `[EDIT RESULT: ${toolMessage.toolName}]\n${truncatedResult}`, + ) } } } } - // Capture spawn_agents results (excluding blacklisted agents) - // The tool result value is an array of agent results at the top level if ( toolMessage.toolName === 'spawn_agents' && Array.isArray(toolMessage.content) @@ -873,72 +618,88 @@ const definition: AgentDefinition = { if (includedResults.length > 0) { const resultSummaries = includedResults.map((r) => { let outputStr = '' - // Extract the actual output from value.value (e.g., lastMessage content) if (r.value?.value !== undefined && r.value?.value !== null) { if (typeof r.value.value === 'string') { outputStr = r.value.value } else { outputStr = JSON.stringify(r.value.value) } - // Remove tags and their contents to save context tokens outputStr = outputStr .replace(/[\s\S]*?<\/think>/g, '') .trim() - // Truncate long outputs to ASSISTANT_MESSAGE_LIMIT chars - if (outputStr.length > ASSISTANT_MESSAGE_LIMIT) { + if (outputStr.length > ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) { outputStr = - outputStr.slice(0, ASSISTANT_MESSAGE_LIMIT) + '...' + outputStr.slice(0, ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) + '...' } } return `- ${r.agentType}: ${outputStr || '(no output)'}` }) - summaryParts.push( + entryParts.push( `[AGENT RESULTS]\n${resultSummaries.join('\n')}`, ) } } } } + + if (entryParts.length > 0) { + const joinedToolEntry = truncateLongText( + entryParts.join('\n\n'), + TOOL_ENTRY_LIMIT * CHARS_PER_TOKEN, + ) + summarizedEntries.push({ + role: 'assistant_tool', + parts: [joinedToolEntry], + }) + } } } - let summaryText = summaryParts.join('\n\n---\n\n') - - // Calculate target size (10% of max context, for messages only) - const targetTokens = maxContextLength * TARGET_SUMMARY_FACTOR - let summaryTokens = estimateTokens(summaryText) + // Parse previous summary into role-tagged entries and combine with new entries + const allEntries = [ + ...parseSummaryIntoEntries(previousSummaryContent), + ...summarizedEntries, + ] - // If summary is too big, truncate from the beginning - if (summaryTokens > targetTokens) { - const truncationMessage = - '[CONVERSATION TRUNCATED - Earlier messages omitted due to length]\n\n' - const truncationTokens = estimateTokens(truncationMessage) - const availableTokens = targetTokens - truncationTokens + // Phase 2: Walk backwards through all entries to apply token budgets + let assistantToolTokens = 0 + let userTokens = 0 + let cutoffIndex = 0 - // Estimate characters to keep (rough: 3 chars per token) - const charsToKeep = Math.floor(availableTokens * 3) + for (let i = allEntries.length - 1; i >= 0; i--) { + const entry = allEntries[i] + const entryText = entry.parts.join('\n\n---\n\n') + const entryTokens = Math.ceil(entryText.length / CHARS_PER_TOKEN) - if (charsToKeep > 0 && charsToKeep < summaryText.length) { - // Truncate from the beginning, try to find a clean break point - const truncatedText = summaryText.slice(-charsToKeep) - // Find the first separator to make a clean cut - const separatorIndex = truncatedText.indexOf('\n\n---\n\n') - if ( - separatorIndex !== -1 && - separatorIndex < truncatedText.length / 2 - ) { - summaryText = - truncationMessage + - truncatedText.slice(separatorIndex + '\n\n---\n\n'.length) - } else { - summaryText = truncationMessage + truncatedText + if (entry.role === 'user') { + if (userTokens + entryTokens > userBudget) { + cutoffIndex = i + 1 + break + } + userTokens += entryTokens + } else { + if (assistantToolTokens + entryTokens > assistantToolBudget) { + cutoffIndex = i + 1 + break } - } else if (charsToKeep <= 0) { - summaryText = - truncationMessage + '[Summary too large - content omitted]' + assistantToolTokens += entryTokens } } + // Phase 3: Build final summary from included entries + const summaryParts: string[] = [] + + for (let i = cutoffIndex; i < allEntries.length; i++) { + summaryParts.push(...allEntries[i].parts) + } + + // Fallback: if nothing fit within budgets, always include at least the newest entry + if (summaryParts.length === 0 && allEntries.length > 0) { + summaryParts.push(...allEntries[allEntries.length - 1].parts) + } + + const summaryText = summaryParts.join('\n\n---\n\n') + // Create the summarized message with fresh sentAt timestamp // Include any images from the last user message that had images const now = Date.now() diff --git a/cli/release/package.json b/cli/release/package.json index f51779ae8b..e737956880 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.631", + "version": "1.0.633", "description": "AI coding agent", "license": "MIT", "bin": { diff --git a/cli/src/commands/__tests__/router-input.test.ts b/cli/src/commands/__tests__/router-input.test.ts index ac1310a795..653063abbc 100644 --- a/cli/src/commands/__tests__/router-input.test.ts +++ b/cli/src/commands/__tests__/router-input.test.ts @@ -372,22 +372,16 @@ describe('command-registry', () => { } }) - test('connect slash command presence matches feature flag', () => { - const { CHATGPT_OAUTH_ENABLED } = require('@codebuff/common/constants/chatgpt-oauth') + test('connect command is not available in codebuff (freebuff-only)', () => { const hasConnectSlashCommand = SLASH_COMMANDS.some( (cmd) => cmd.id === 'connect', ) - expect(hasConnectSlashCommand).toBe(CHATGPT_OAUTH_ENABLED) + expect(hasConnectSlashCommand).toBe(false) }) - test('connect:chatgpt command registry availability matches feature flag', () => { - const { CHATGPT_OAUTH_ENABLED } = require('@codebuff/common/constants/chatgpt-oauth') + test('connect:chatgpt command is not available in codebuff (freebuff-only)', () => { const command = findCommand('connect:chatgpt') - if (CHATGPT_OAUTH_ENABLED) { - expect(command).toBeDefined() - } else { - expect(command).toBeUndefined() - } + expect(command).toBeUndefined() }) }) }) diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index b5b81d5800..69b8857b2e 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -179,6 +179,7 @@ const FREEBUFF_REMOVED_COMMANDS = new Set([ ]) const FREEBUFF_ONLY_COMMANDS = new Set([ + 'connect', 'plan', ]) diff --git a/cli/src/components/help-banner.tsx b/cli/src/components/help-banner.tsx index 0e0ee17007..ccf39bdf82 100644 --- a/cli/src/components/help-banner.tsx +++ b/cli/src/components/help-banner.tsx @@ -38,6 +38,7 @@ export const HelpBanner = () => { const theme = useTheme() const { data: subscriptionData } = useSubscriptionQuery() const hasSubscription = subscriptionData?.hasSubscription ?? false + const chatGptOAuth = getChatGptOAuthStatus() // Auto-hide after timeout React.useEffect(() => { @@ -79,11 +80,16 @@ export const HelpBanner = () => { Tips - {IS_FREEBUFF && !getChatGptOAuthStatus().connected && ( + {IS_FREEBUFF && !chatGptOAuth.connected && ( Connect via /connect to unlock /plan & /review )} + {IS_FREEBUFF && chatGptOAuth.connected && ( + + Try workflow: /interview → /plan → implement → /review + + )} Use @ to reference agents to spawn or files to read diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts index 50dd90f0d2..6893640516 100644 --- a/cli/src/data/slash-commands.ts +++ b/cli/src/data/slash-commands.ts @@ -47,6 +47,7 @@ const FREEBUFF_REMOVED_COMMAND_IDS = new Set([ ]) const FREEBUFF_ONLY_COMMAND_IDS = new Set([ + 'connect', 'plan', ]) diff --git a/cli/src/hooks/use-auth-state.ts b/cli/src/hooks/use-auth-state.ts index e800b3355f..5f5ef29d01 100644 --- a/cli/src/hooks/use-auth-state.ts +++ b/cli/src/hooks/use-auth-state.ts @@ -6,6 +6,7 @@ import { useLoginStore } from '../state/login-store' import { identifyUser, trackEvent } from '../utils/analytics' import { getUserCredentials } from '../utils/auth' import { resetCodebuffClient } from '../utils/codebuff-client' +import { IS_FREEBUFF } from '../utils/constants' import { loggerContext } from '../utils/logger' import type { MultilineInputHandle } from '../components/multiline-input' @@ -14,7 +15,7 @@ import type { User } from '../utils/auth' const setAuthLoggerContext = (params: { userId: string; email: string }) => { loggerContext.userId = params.userId loggerContext.userEmail = params.email - identifyUser(params.userId, { email: params.email }) + identifyUser(params.userId, { email: params.email, freebuff: IS_FREEBUFF }) } const clearAuthLoggerContext = () => { diff --git a/cli/src/index.tsx b/cli/src/index.tsx index 62579dba34..7f2e3de77c 100644 --- a/cli/src/index.tsx +++ b/cli/src/index.tsx @@ -23,7 +23,7 @@ import { handlePublish } from './commands/publish' import { runPlainLogin } from './login/plain-login' import { initializeApp } from './init/init-app' import { getProjectRoot, setProjectRoot } from './project-files' -import { initAnalytics, trackEvent } from './utils/analytics' +import { trackEvent } from './utils/analytics' import { getAuthToken, getAuthTokenDetails } from './utils/auth' import { resetCodebuffClient } from './utils/codebuff-client' import { setApiClientAuthToken } from './utils/codebuff-api' @@ -66,7 +66,7 @@ function loadPackageVersion(): string { // Without this, refetchInterval won't work because TanStack Query thinks the app is "unfocused" focusManager.setEventListener(() => { // No-op: no event listeners in CLI environment (no window focus/visibility events) - return () => {} + return () => { } }) focusManager.setFocused(true) @@ -222,26 +222,17 @@ async function main(): Promise { const startCwd = process.cwd() const showProjectPicker = shouldShowProjectPicker(startCwd, homeDir) - // Initialize analytics early, before anything that might use the logger - // (the logger calls trackEvent, which throws if analytics isn't initialized) - try { - initAnalytics() - - // Track app launch event - trackEvent(AnalyticsEvent.APP_LAUNCHED, { - version: loadPackageVersion(), - platform: process.platform, - arch: process.arch, - hasInitialPrompt: Boolean(initialPrompt), - hasAgentOverride: hasAgentOverride, - continueChat, - initialMode: initialMode ?? 'DEFAULT', - isFreeBuff: IS_FREEBUFF, - }) - } catch (error) { - // Analytics initialization is optional - don't fail the app if it errors - logger.debug(error, 'Failed to initialize analytics') - } + // Requires analytics to be initialized, which is done in initializeApp + trackEvent(AnalyticsEvent.APP_LAUNCHED, { + version: loadPackageVersion(), + platform: process.platform, + arch: process.arch, + hasInitialPrompt: Boolean(initialPrompt), + hasAgentOverride: hasAgentOverride, + continueChat, + initialMode: initialMode ?? 'DEFAULT', + isFreeBuff: IS_FREEBUFF, + }) // Initialize agent registry (loads user agents via SDK). // When --agent is provided, skip local .agents to avoid overrides. diff --git a/cli/src/init/init-app.ts b/cli/src/init/init-app.ts index 133c3ca181..1b8ae41efa 100644 --- a/cli/src/init/init-app.ts +++ b/cli/src/init/init-app.ts @@ -12,6 +12,7 @@ import { initializeThemeStore } from '../hooks/use-theme' import { setProjectRoot } from '../project-files' import { initTimestampFormatter } from '../utils/helpers' import { enableManualThemeRefresh } from '../utils/theme-system' +import { initAnalytics } from '../utils/analytics' import { initializeDirenv } from './init-direnv' export async function initializeApp(params: { cwd?: string }): Promise { @@ -21,6 +22,14 @@ export async function initializeApp(params: { cwd?: string }): Promise { const baseCwd = process.cwd() setProjectRoot(baseCwd) + // Initialize analytics before direnv, because direnv uses the logger + // which calls trackEvent — analytics must be ready first. + try { + initAnalytics() + } catch (error) { + console.debug('Failed to initialize analytics:', error) + } + // Initialize direnv environment before anything else initializeDirenv() diff --git a/common/src/analytics.ts b/common/src/analytics.ts index 46965bd17d..ea88cf7e59 100644 --- a/common/src/analytics.ts +++ b/common/src/analytics.ts @@ -3,6 +3,7 @@ import { env, DEBUG_ANALYTICS } from '@codebuff/common/env' import { createPostHogClient, type AnalyticsClient } from './analytics-core' import { AnalyticsEvent } from './constants/analytics-events' +import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' import type { Logger } from '@codebuff/common/types/contracts/logger' let client: AnalyticsClient | undefined @@ -32,6 +33,18 @@ export async function flushAnalytics(logger?: Logger) { } } +export function withDefaultProperties( + trackEventFn: TrackEventFn, + defaultProperties: Record, +): TrackEventFn { + return (params) => { + trackEventFn({ + ...params, + properties: { ...defaultProperties, ...params.properties }, + }) + } +} + export function trackEvent({ event, userId, diff --git a/common/src/tools/params/tool/set-output.ts b/common/src/tools/params/tool/set-output.ts index d9a69ea5da..1171f63dc3 100644 --- a/common/src/tools/params/tool/set-output.ts +++ b/common/src/tools/params/tool/set-output.ts @@ -6,6 +6,21 @@ import type { $ToolParams } from '../../constants' const toolName = 'set_output' const endsAgentStep = false + +// WHY `data` EXISTS IN THE INPUT SCHEMA: +// Subagents inherit their parent's tool definitions, and because of prompt caching +// we cannot modify or add tools mid-conversation. OpenAI models enforce the tool's +// input schema strictly, so we need a permissive shape that any model can call. +// An empty schema or `z.object({}).passthrough()` would be rejected by OpenAI's +// strict schema enforcement. The `data: z.record(...)` field is a deliberately +// vague shape that satisfies OpenAI while allowing us to inject the real +// outputSchema later in the conversation (in the instructions prompt). +// +// At runtime, the handler (`packages/agent-runtime/src/tools/handlers/tool/set-output.ts`) +// tries parsing against the real outputSchema in two ways: +// 1. Parse the raw output (agent passed fields at top level) +// 2. Fallback: parse `output.data` (agent wrapped fields in `data`) +// This means both `{ results: [...] }` and `{ data: { results: [...] } }` are accepted. const inputSchema = z .looseObject({ data: z.record(z.string(), z.any()).optional(), diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 25a1e24696..d29c729fc8 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.20", + "version": "0.0.21", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { diff --git a/packages/agent-runtime/src/templates/strings.ts b/packages/agent-runtime/src/templates/strings.ts index 313c20b220..6ac005a151 100644 --- a/packages/agent-runtime/src/templates/strings.ts +++ b/packages/agent-runtime/src/templates/strings.ts @@ -226,7 +226,7 @@ export async function getAgentPrompt( if (outputSchema) { addendum += '\n\n## Output Schema\n\n' addendum += - 'When using the set_output tool, your output must conform to this schema:\n\n' + 'When using the set_output tool, your output must conform to this schema. You may pass the fields either directly as top-level parameters or inside a `data` field — both are accepted.\n\n' addendum += '```json\n' try { // Convert Zod schema to JSON schema for display diff --git a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts index 2def7b1d51..8dec297118 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts @@ -52,13 +52,24 @@ export const handleSetOutput = (async (params: { agentTemplate.outputSchema.parse(data) finalOutput = data } catch (error2) { - const errorMessage = `Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: ${error}` + // Show whichever error has fewer issues — that represents the "closer" parse + // attempt and gives the agent more actionable feedback for retrying. + const issues1 = getZodIssueCount(error) + const issues2 = getZodIssueCount(error2) + const usedData = issues2 < issues1 + const bestError = usedData ? error2 : error + const prefix = usedData + ? 'Output validation error: Your output was found inside the `data` field but still failed validation. Please fix the issues and try again without wrapping in `data`. Issues: ' + : 'Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: ' + const errorMessage = `${prefix}${bestError}` logger.error( { output, agentType: agentState.agentType, agentId: agentState.agentId, - error, + topLevelError: error, + dataFieldError: error2, + usedDataFieldError: usedData, }, 'set_output validation error', ) @@ -78,3 +89,15 @@ export const handleSetOutput = (async (params: { return { output: jsonToolResult({ message: 'Output set' }) } }) satisfies CodebuffToolHandlerFunction + +function getZodIssueCount(error: unknown): number { + if ( + error != null && + typeof error === 'object' && + 'issues' in error && + Array.isArray((error as { issues: unknown }).issues) + ) { + return (error as { issues: unknown[] }).issues.length + } + return Infinity +} diff --git a/packages/billing/src/balance-calculator.ts b/packages/billing/src/balance-calculator.ts index 7a96617128..1a2439f66a 100644 --- a/packages/billing/src/balance-calculator.ts +++ b/packages/billing/src/balance-calculator.ts @@ -536,6 +536,7 @@ export async function consumeCreditsAndAddAgentStep(params: { cacheReadInputTokens: number reasoningTokens: number | null outputTokens: number + ttftMs: number | null logger: Logger }): Promise> { @@ -561,6 +562,7 @@ export async function consumeCreditsAndAddAgentStep(params: { cacheReadInputTokens, reasoningTokens, outputTokens, + ttftMs, logger, } = params @@ -650,6 +652,7 @@ export async function consumeCreditsAndAddAgentStep(params: { credits, byok, latency_ms: latencyMs, + ttft_ms: ttftMs, user_id: userId, }) } catch (error) { diff --git a/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql b/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql new file mode 100644 index 0000000000..77648859f6 --- /dev/null +++ b/packages/internal/src/db/migrations/0042_needy_jack_murdock.sql @@ -0,0 +1 @@ +ALTER TABLE "message" ADD COLUMN "ttft_ms" integer; \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/0042_snapshot.json b/packages/internal/src/db/migrations/meta/0042_snapshot.json new file mode 100644 index 0000000000..abb7dceabe --- /dev/null +++ b/packages/internal/src/db/migrations/meta/0042_snapshot.json @@ -0,0 +1,3078 @@ +{ + "id": "c7772899-6ae6-4a07-890e-a1ca64dc6e61", + "prevId": "db3b93eb-3ed2-4468-80d1-0d082f4cecbd", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.account": { + "name": "account", + "schema": "", + "columns": { + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "providerAccountId": { + "name": "providerAccountId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "refresh_token": { + "name": "refresh_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_token": { + "name": "access_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "token_type": { + "name": "token_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "id_token": { + "name": "id_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "session_state": { + "name": "session_state", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "account_userId_user_id_fk": { + "name": "account_userId_user_id_fk", + "tableFrom": "account", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "account_provider_providerAccountId_pk": { + "name": "account_provider_providerAccountId_pk", + "columns": [ + "provider", + "providerAccountId" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ad_impression": { + "name": "ad_impression", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ad_text": { + "name": "ad_text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "cta": { + "name": "cta", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "favicon": { + "name": "favicon", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "click_url": { + "name": "click_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "imp_url": { + "name": "imp_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "payout": { + "name": "payout", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true + }, + "credits_granted": { + "name": "credits_granted", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "grant_operation_id": { + "name": "grant_operation_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "served_at": { + "name": "served_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "impression_fired_at": { + "name": "impression_fired_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "clicked_at": { + "name": "clicked_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_ad_impression_user": { + "name": "idx_ad_impression_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "served_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_ad_impression_imp_url": { + "name": "idx_ad_impression_imp_url", + "columns": [ + { + "expression": "imp_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "ad_impression_user_id_user_id_fk": { + "name": "ad_impression_user_id_user_id_fk", + "tableFrom": "ad_impression", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "ad_impression_imp_url_unique": { + "name": "ad_impression_imp_url_unique", + "nullsNotDistinct": false, + "columns": [ + "imp_url" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_config": { + "name": "agent_config", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "version": { + "name": "version", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "major": { + "name": "major", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 1) AS INTEGER)", + "type": "stored" + } + }, + "minor": { + "name": "minor", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 2) AS INTEGER)", + "type": "stored" + } + }, + "patch": { + "name": "patch", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CAST(SPLIT_PART(\"agent_config\".\"version\", '.', 3) AS INTEGER)", + "type": "stored" + } + }, + "data": { + "name": "data", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_agent_config_publisher": { + "name": "idx_agent_config_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_config_publisher_id_publisher_id_fk": { + "name": "agent_config_publisher_id_publisher_id_fk", + "tableFrom": "agent_config", + "tableTo": "publisher", + "columnsFrom": [ + "publisher_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "agent_config_publisher_id_id_version_pk": { + "name": "agent_config_publisher_id_id_version_pk", + "columns": [ + "publisher_id", + "id", + "version" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_run": { + "name": "agent_run", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "publisher_id": { + "name": "publisher_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '/', 1)\n ELSE NULL\n END", + "type": "stored" + } + }, + "agent_name": { + "name": "agent_name", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(split_part(agent_id, '/', 2), '@', 1)\n ELSE agent_id\n END", + "type": "stored" + } + }, + "agent_version": { + "name": "agent_version", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE\n WHEN agent_id ~ '^[^/@]+/[^/@]+@[^/@]+$'\n THEN split_part(agent_id, '@', 2)\n ELSE NULL\n END", + "type": "stored" + } + }, + "ancestor_run_ids": { + "name": "ancestor_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "root_run_id": { + "name": "root_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[1] ELSE id END", + "type": "stored" + } + }, + "parent_run_id": { + "name": "parent_run_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN array_length(ancestor_run_ids, 1) >= 1 THEN ancestor_run_ids[array_length(ancestor_run_ids, 1)] ELSE NULL END", + "type": "stored" + } + }, + "depth": { + "name": "depth", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "COALESCE(array_length(ancestor_run_ids, 1), 1)", + "type": "stored" + } + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "total_steps": { + "name": "total_steps", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "direct_credits": { + "name": "direct_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "total_credits": { + "name": "total_credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "status": { + "name": "status", + "type": "agent_run_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'running'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_agent_run_user_id": { + "name": "idx_agent_run_user_id", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_parent": { + "name": "idx_agent_run_parent", + "columns": [ + { + "expression": "parent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_root": { + "name": "idx_agent_run_root", + "columns": [ + { + "expression": "root_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_agent_id": { + "name": "idx_agent_run_agent_id", + "columns": [ + { + "expression": "agent_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_publisher": { + "name": "idx_agent_run_publisher", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_status": { + "name": "idx_agent_run_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'running'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_ancestors_gin": { + "name": "idx_agent_run_ancestors_gin", + "columns": [ + { + "expression": "ancestor_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_agent_run_completed_publisher_agent": { + "name": "idx_agent_run_completed_publisher_agent", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_recent": { + "name": "idx_agent_run_completed_recent", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_version": { + "name": "idx_agent_run_completed_version", + "columns": [ + { + "expression": "publisher_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "agent_version", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_run_completed_user": { + "name": "idx_agent_run_completed_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"agent_run\".\"status\" = 'completed'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "agent_run_user_id_user_id_fk": { + "name": "agent_run_user_id_user_id_fk", + "tableFrom": "agent_run", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.agent_step": { + "name": "agent_step", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "agent_run_id": { + "name": "agent_run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "step_number": { + "name": "step_number", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "CASE WHEN completed_at IS NOT NULL THEN EXTRACT(EPOCH FROM (completed_at - created_at)) * 1000 ELSE NULL END::integer", + "type": "stored" + } + }, + "credits": { + "name": "credits", + "type": "numeric(10, 6)", + "primaryKey": false, + "notNull": true, + "default": "'0'" + }, + "child_run_ids": { + "name": "child_run_ids", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "spawned_count": { + "name": "spawned_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "array_length(child_run_ids, 1)", + "type": "stored" + } + }, + "message_id": { + "name": "message_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "agent_step_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'completed'" + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "unique_step_number_per_run": { + "name": "unique_step_number_per_run", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "step_number", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_run_id": { + "name": "idx_agent_step_run_id", + "columns": [ + { + "expression": "agent_run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_agent_step_children_gin": { + "name": "idx_agent_step_children_gin", + "columns": [ + { + "expression": "child_run_ids", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": { + "agent_step_agent_run_id_agent_run_id_fk": { + "name": "agent_step_agent_run_id_agent_run_id_fk", + "tableFrom": "agent_step", + "tableTo": "agent_run", + "columnsFrom": [ + "agent_run_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.credit_ledger": { + "name": "credit_ledger", + "schema": "", + "columns": { + "operation_id": { + "name": "operation_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "principal": { + "name": "principal", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "balance": { + "name": "balance", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "grant_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_credit_ledger_active_balance": { + "name": "idx_credit_ledger_active_balance", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "balance", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "priority", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"credit_ledger\".\"balance\" != 0 AND \"credit_ledger\".\"expires_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_org": { + "name": "idx_credit_ledger_org", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_credit_ledger_subscription": { + "name": "idx_credit_ledger_subscription", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "credit_ledger_user_id_user_id_fk": { + "name": "credit_ledger_user_id_user_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "credit_ledger_org_id_org_id_fk": { + "name": "credit_ledger_org_id_org_id_fk", + "tableFrom": "credit_ledger", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.encrypted_api_keys": { + "name": "encrypted_api_keys", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "api_key_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "api_key": { + "name": "api_key", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "encrypted_api_keys_user_id_user_id_fk": { + "name": "encrypted_api_keys_user_id_user_id_fk", + "tableFrom": "encrypted_api_keys", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "encrypted_api_keys_user_id_type_pk": { + "name": "encrypted_api_keys_user_id_type_pk", + "columns": [ + "user_id", + "type" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.fingerprint": { + "name": "fingerprint", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "sig_hash": { + "name": "sig_hash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.git_eval_results": { + "name": "git_eval_results", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "cost_mode": { + "name": "cost_mode", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reasoner_model": { + "name": "reasoner_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent_model": { + "name": "agent_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "cost": { + "name": "cost", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "is_public": { + "name": "is_public", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.limit_override": { + "name": "limit_override", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "credits_per_block": { + "name": "credits_per_block", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "block_duration_hours": { + "name": "block_duration_hours", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "weekly_credit_limit": { + "name": "weekly_credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "limit_override_user_id_user_id_fk": { + "name": "limit_override_user_id_user_id_fk", + "tableFrom": "limit_override", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.message": { + "name": "message", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "client_id": { + "name": "client_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "client_request_id": { + "name": "client_request_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "agent_id": { + "name": "agent_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "request": { + "name": "request", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "last_message": { + "name": "last_message", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "generated": { + "as": "\"message\".\"request\" -> -1", + "type": "stored" + } + }, + "reasoning_text": { + "name": "reasoning_text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "response": { + "name": "response", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "input_tokens": { + "name": "input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "cache_creation_input_tokens": { + "name": "cache_creation_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "cache_read_input_tokens": { + "name": "cache_read_input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "reasoning_tokens": { + "name": "reasoning_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "output_tokens": { + "name": "output_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cost": { + "name": "cost", + "type": "numeric(100, 20)", + "primaryKey": false, + "notNull": true + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "byok": { + "name": "byok", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "latency_ms": { + "name": "latency_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "ttft_ms": { + "name": "ttft_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "message_user_id_idx": { + "name": "message_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_finished_at_user_id_idx": { + "name": "message_finished_at_user_id_idx", + "columns": [ + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_idx": { + "name": "message_org_id_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "message_org_id_finished_at_idx": { + "name": "message_org_id_finished_at_idx", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "finished_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "message_user_id_user_id_fk": { + "name": "message_user_id_user_id_fk", + "tableFrom": "message", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "message_org_id_org_id_fk": { + "name": "message_org_id_org_id_fk", + "tableFrom": "message", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org": { + "name": "org", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "owner_id": { + "name": "owner_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "current_period_start": { + "name": "current_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "current_period_end": { + "name": "current_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "credit_limit": { + "name": "credit_limit", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "billing_alerts": { + "name": "billing_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "usage_alerts": { + "name": "usage_alerts", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "weekly_reports": { + "name": "weekly_reports", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_owner_id_user_id_fk": { + "name": "org_owner_id_user_id_fk", + "tableFrom": "org", + "tableTo": "user", + "columnsFrom": [ + "owner_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_slug_unique": { + "name": "org_slug_unique", + "nullsNotDistinct": false, + "columns": [ + "slug" + ] + }, + "org_stripe_customer_id_unique": { + "name": "org_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_feature": { + "name": "org_feature", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "feature": { + "name": "feature", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "config": { + "name": "config", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_org_feature_active": { + "name": "idx_org_feature_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_feature_org_id_org_id_fk": { + "name": "org_feature_org_id_org_id_fk", + "tableFrom": "org_feature", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_feature_org_id_feature_pk": { + "name": "org_feature_org_id_feature_pk", + "columns": [ + "org_id", + "feature" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_invite": { + "name": "org_invite", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "invited_by": { + "name": "invited_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "accepted_at": { + "name": "accepted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "accepted_by": { + "name": "accepted_by", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_org_invite_token": { + "name": "idx_org_invite_token", + "columns": [ + { + "expression": "token", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_email": { + "name": "idx_org_invite_email", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_invite_expires": { + "name": "idx_org_invite_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_invite_org_id_org_id_fk": { + "name": "org_invite_org_id_org_id_fk", + "tableFrom": "org_invite", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_invite_invited_by_user_id_fk": { + "name": "org_invite_invited_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "invited_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "org_invite_accepted_by_user_id_fk": { + "name": "org_invite_accepted_by_user_id_fk", + "tableFrom": "org_invite", + "tableTo": "user", + "columnsFrom": [ + "accepted_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "org_invite_token_unique": { + "name": "org_invite_token_unique", + "nullsNotDistinct": false, + "columns": [ + "token" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_member": { + "name": "org_member", + "schema": "", + "columns": { + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "org_role", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "joined_at": { + "name": "joined_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "org_member_org_id_org_id_fk": { + "name": "org_member_org_id_org_id_fk", + "tableFrom": "org_member", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_member_user_id_user_id_fk": { + "name": "org_member_user_id_user_id_fk", + "tableFrom": "org_member", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "org_member_org_id_user_id_pk": { + "name": "org_member_org_id_user_id_pk", + "columns": [ + "org_id", + "user_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.org_repo": { + "name": "org_repo", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_url": { + "name": "repo_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_name": { + "name": "repo_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repo_owner": { + "name": "repo_owner", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "approved_by": { + "name": "approved_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "approved_at": { + "name": "approved_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "is_active": { + "name": "is_active", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + } + }, + "indexes": { + "idx_org_repo_active": { + "name": "idx_org_repo_active", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_active", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_org_repo_unique": { + "name": "idx_org_repo_unique", + "columns": [ + { + "expression": "org_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "repo_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "org_repo_org_id_org_id_fk": { + "name": "org_repo_org_id_org_id_fk", + "tableFrom": "org_repo", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "org_repo_approved_by_user_id_fk": { + "name": "org_repo_approved_by_user_id_fk", + "tableFrom": "org_repo", + "tableTo": "user", + "columnsFrom": [ + "approved_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.publisher": { + "name": "publisher", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "verified": { + "name": "verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "bio": { + "name": "bio", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "avatar_url": { + "name": "avatar_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "org_id": { + "name": "org_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_by": { + "name": "created_by", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "publisher_user_id_user_id_fk": { + "name": "publisher_user_id_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_org_id_org_id_fk": { + "name": "publisher_org_id_org_id_fk", + "tableFrom": "publisher", + "tableTo": "org", + "columnsFrom": [ + "org_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "publisher_created_by_user_id_fk": { + "name": "publisher_created_by_user_id_fk", + "tableFrom": "publisher", + "tableTo": "user", + "columnsFrom": [ + "created_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": { + "publisher_single_owner": { + "name": "publisher_single_owner", + "value": "(\"publisher\".\"user_id\" IS NOT NULL AND \"publisher\".\"org_id\" IS NULL) OR\n (\"publisher\".\"user_id\" IS NULL AND \"publisher\".\"org_id\" IS NOT NULL)" + } + }, + "isRLSEnabled": false + }, + "public.referral": { + "name": "referral", + "schema": "", + "columns": { + "referrer_id": { + "name": "referrer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "referred_id": { + "name": "referred_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "referral_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + }, + "credits": { + "name": "credits", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_legacy": { + "name": "is_legacy", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "referral_referrer_id_user_id_fk": { + "name": "referral_referrer_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referrer_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "referral_referred_id_user_id_fk": { + "name": "referral_referred_id_user_id_fk", + "tableFrom": "referral", + "tableTo": "user", + "columnsFrom": [ + "referred_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "referral_referrer_id_referred_id_pk": { + "name": "referral_referrer_id_referred_id_pk", + "columns": [ + "referrer_id", + "referred_id" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.session": { + "name": "session", + "schema": "", + "columns": { + "sessionToken": { + "name": "sessionToken", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + }, + "fingerprint_id": { + "name": "fingerprint_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "session_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'web'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "session_userId_user_id_fk": { + "name": "session_userId_user_id_fk", + "tableFrom": "session", + "tableTo": "user", + "columnsFrom": [ + "userId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "session_fingerprint_id_fingerprint_id_fk": { + "name": "session_fingerprint_id_fingerprint_id_fk", + "tableFrom": "session", + "tableTo": "fingerprint", + "columnsFrom": [ + "fingerprint_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.subscription": { + "name": "subscription", + "schema": "", + "columns": { + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_price_id": { + "name": "stripe_price_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "tier": { + "name": "tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "scheduled_tier": { + "name": "scheduled_tier", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "subscription_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'active'" + }, + "billing_period_start": { + "name": "billing_period_start", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "billing_period_end": { + "name": "billing_period_end", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "cancel_at_period_end": { + "name": "cancel_at_period_end", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "canceled_at": { + "name": "canceled_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_subscription_customer": { + "name": "idx_subscription_customer", + "columns": [ + { + "expression": "stripe_customer_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_user": { + "name": "idx_subscription_user", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_subscription_status": { + "name": "idx_subscription_status", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"subscription\".\"status\" = 'active'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "subscription_user_id_user_id_fk": { + "name": "subscription_user_id_user_id_fk", + "tableFrom": "subscription", + "tableTo": "user", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sync_failure": { + "name": "sync_failure", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_attempt_at": { + "name": "last_attempt_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "retry_count": { + "name": "retry_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "last_error": { + "name": "last_error", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_sync_failure_retry": { + "name": "idx_sync_failure_retry", + "columns": [ + { + "expression": "retry_count", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "last_attempt_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"sync_failure\".\"retry_count\" < 5", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.user": { + "name": "user", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "emailVerified": { + "name": "emailVerified", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "image": { + "name": "image", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "next_quota_reset": { + "name": "next_quota_reset", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "default": "now() + INTERVAL '1 month'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "referral_code": { + "name": "referral_code", + "type": "text", + "primaryKey": false, + "notNull": false, + "default": "'ref-' || gen_random_uuid()" + }, + "referral_limit": { + "name": "referral_limit", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "discord_id": { + "name": "discord_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "handle": { + "name": "handle", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "auto_topup_enabled": { + "name": "auto_topup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "auto_topup_threshold": { + "name": "auto_topup_threshold", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "auto_topup_amount": { + "name": "auto_topup_amount", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "banned": { + "name": "banned", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "fallback_to_a_la_carte": { + "name": "fallback_to_a_la_carte", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "user_email_unique": { + "name": "user_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + }, + "user_stripe_customer_id_unique": { + "name": "user_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + }, + "user_referral_code_unique": { + "name": "user_referral_code_unique", + "nullsNotDistinct": false, + "columns": [ + "referral_code" + ] + }, + "user_discord_id_unique": { + "name": "user_discord_id_unique", + "nullsNotDistinct": false, + "columns": [ + "discord_id" + ] + }, + "user_handle_unique": { + "name": "user_handle_unique", + "nullsNotDistinct": false, + "columns": [ + "handle" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.verificationToken": { + "name": "verificationToken", + "schema": "", + "columns": { + "identifier": { + "name": "identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires": { + "name": "expires", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "verificationToken_identifier_token_pk": { + "name": "verificationToken_identifier_token_pk", + "columns": [ + "identifier", + "token" + ] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.referral_status": { + "name": "referral_status", + "schema": "public", + "values": [ + "pending", + "completed" + ] + }, + "public.agent_run_status": { + "name": "agent_run_status", + "schema": "public", + "values": [ + "running", + "completed", + "failed", + "cancelled" + ] + }, + "public.agent_step_status": { + "name": "agent_step_status", + "schema": "public", + "values": [ + "running", + "completed", + "skipped" + ] + }, + "public.api_key_type": { + "name": "api_key_type", + "schema": "public", + "values": [ + "anthropic", + "gemini", + "openai" + ] + }, + "public.grant_type": { + "name": "grant_type", + "schema": "public", + "values": [ + "free", + "referral", + "referral_legacy", + "subscription", + "purchase", + "admin", + "organization", + "ad" + ] + }, + "public.org_role": { + "name": "org_role", + "schema": "public", + "values": [ + "owner", + "admin", + "member" + ] + }, + "public.session_type": { + "name": "session_type", + "schema": "public", + "values": [ + "web", + "pat", + "cli" + ] + }, + "public.subscription_status": { + "name": "subscription_status", + "schema": "public", + "values": [ + "incomplete", + "incomplete_expired", + "trialing", + "active", + "past_due", + "canceled", + "unpaid", + "paused" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/packages/internal/src/db/migrations/meta/_journal.json b/packages/internal/src/db/migrations/meta/_journal.json index bce61005a2..8952549c98 100644 --- a/packages/internal/src/db/migrations/meta/_journal.json +++ b/packages/internal/src/db/migrations/meta/_journal.json @@ -295,6 +295,13 @@ "when": 1770334047429, "tag": "0041_nappy_nebula", "breakpoints": true + }, + { + "idx": 42, + "version": "7", + "when": 1773878149145, + "tag": "0042_needy_jack_murdock", + "breakpoints": true } ] } \ No newline at end of file diff --git a/packages/internal/src/db/schema.ts b/packages/internal/src/db/schema.ts index 1fa381c5df..0033314f00 100644 --- a/packages/internal/src/db/schema.ts +++ b/packages/internal/src/db/schema.ts @@ -232,6 +232,7 @@ export const message = pgTable( credits: integer('credits').notNull(), byok: boolean('byok').notNull().default(false), latency_ms: integer('latency_ms'), + ttft_ms: integer('ttft_ms'), user_id: text('user_id').references(() => user.id, { onDelete: 'cascade' }), org_id: text('org_id').references(() => org.id, { onDelete: 'cascade' }), diff --git a/scripts/query-minimax-cache-stats.ts b/scripts/query-minimax-cache-stats.ts new file mode 100644 index 0000000000..7c742c2ccc --- /dev/null +++ b/scripts/query-minimax-cache-stats.ts @@ -0,0 +1,138 @@ +import { db } from '@codebuff/internal/db' +import { sql } from 'drizzle-orm' + +async function queryMinimaxCacheStats() { + console.log('Querying minimax/minimax-m2.5 usage (last 19 hours)...\n') + + // 1. Overall stats + const overallResult = await db.execute(sql` + SELECT + COUNT(*) AS total_requests, + ROUND(AVG(input_tokens)) AS avg_input_tokens, + ROUND(AVG(output_tokens)) AS avg_output_tokens, + ROUND( + CASE + WHEN SUM(input_tokens) > 0 + THEN SUM(cache_read_input_tokens)::numeric / SUM(input_tokens) * 100 + ELSE 0 + END, 1 + ) AS overall_cache_rate_pct, + COUNT(DISTINCT client_id) AS unique_clients + FROM message + WHERE finished_at >= NOW() - INTERVAL '19 hours' + AND model = 'minimax/minimax-m2.5' + `) + + const overall = overallResult[0] + if (!overall || Number(overall.total_requests) === 0) { + console.log('No data found for minimax/minimax-m2.5 in the last 19 hours.') + return + } + + console.log('Overall Stats') + console.log('═══════════════════════════════════════════') + console.log(`Total requests: ${overall.total_requests}`) + console.log(`Unique clients: ${overall.unique_clients}`) + console.log(`Avg input tokens: ${overall.avg_input_tokens}`) + console.log(`Avg output tokens: ${overall.avg_output_tokens}`) + console.log(`Overall cache rate: ${overall.overall_cache_rate_pct}%`) + + // 2. Per-client stats, ordered by lowest cache rate + const clientResult = await db.execute(sql` + SELECT + client_id, + COUNT(*) AS request_count, + MIN(finished_at) AS first_seen, + MAX(finished_at) AS last_seen, + ROUND(AVG(input_tokens)) AS avg_input, + ROUND( + CASE + WHEN SUM(input_tokens) > 0 + THEN SUM(cache_read_input_tokens)::numeric / SUM(input_tokens) * 100 + ELSE 0 + END, 1 + ) AS cache_rate_pct, + SUM(cache_read_input_tokens) AS total_cache_read, + SUM(input_tokens) AS total_input + FROM message + WHERE finished_at >= NOW() - INTERVAL '19 hours' + AND model = 'minimax/minimax-m2.5' + AND client_id IS NOT NULL + GROUP BY client_id + ORDER BY cache_rate_pct ASC, request_count DESC + `) + + console.log('\n\nPer-Client Cache Rates (lowest first)') + console.log('═══════════════════════════════════════════') + + if (clientResult.length === 0) { + console.log('No client-level data found.') + return + } + + for (const row of clientResult) { + const clientId = String(row.client_id).slice(0, 12) + const reqs = String(row.request_count).padStart(4) + const cacheRate = String(row.cache_rate_pct).padStart(6) + const avgInput = String(row.avg_input).padStart(8) + const firstSeen = row.first_seen + ? new Date(String(row.first_seen)).toISOString().slice(0, 16) + : 'N/A' + const lastSeen = row.last_seen + ? new Date(String(row.last_seen)).toISOString().slice(0, 16) + : 'N/A' + console.log( + ` ${clientId}… reqs: ${reqs} cache: ${cacheRate}% avg_input: ${avgInput} range: ${firstSeen} → ${lastSeen}`, + ) + } + + // 3. Recent requests in time order + const recentResult = await db.execute(sql` + SELECT + client_id, + finished_at, + input_tokens, + cache_read_input_tokens, + COALESCE(cache_creation_input_tokens, 0) AS cache_creation_input_tokens, + output_tokens, + ROUND( + CASE + WHEN input_tokens > 0 + THEN cache_read_input_tokens::numeric / input_tokens * 100 + ELSE 0 + END, 1 + ) AS cache_rate_pct + FROM message + WHERE finished_at >= NOW() - INTERVAL '19 hours' + AND model = 'minimax/minimax-m2.5' + ORDER BY client_id, finished_at DESC + LIMIT 100 + `) + + console.log('\n\nRecent Requests (newest first, last 100)') + console.log('═══════════════════════════════════════════') + + for (const row of recentResult) { + const clientId = row.client_id + ? String(row.client_id).slice(0, 12) + : 'unknown ' + const time = row.finished_at + ? new Date(String(row.finished_at)).toISOString().slice(0, 19) + : 'N/A' + const cacheRate = String(row.cache_rate_pct).padStart(6) + const input = String(row.input_tokens).padStart(7) + const cached = String(row.cache_read_input_tokens).padStart(7) + const creation = String(row.cache_creation_input_tokens).padStart(7) + const output = String(row.output_tokens).padStart(6) + console.log( + ` ${time} ${clientId}… cache: ${cacheRate}% input: ${input} cached: ${cached} creation: ${creation} output: ${output}`, + ) + } +} + +queryMinimaxCacheStats() + .then(() => process.exit(0)) + .catch((err) => { + console.error(err) + process.exit(1) + }) diff --git a/scripts/query-usage-stats.ts b/scripts/query-usage-stats.ts index 371701902d..15a35703b8 100644 --- a/scripts/query-usage-stats.ts +++ b/scripts/query-usage-stats.ts @@ -22,14 +22,13 @@ async function queryUsageStats() { token_stats AS ( SELECT - ROUND(AVG(input_tokens + cache_read_input_tokens + cache_creation_input_tokens)) + ROUND(AVG(input_tokens)) AS avg_total_input_tokens, ROUND( AVG( CASE - WHEN (input_tokens + cache_read_input_tokens + cache_creation_input_tokens) > 0 - THEN cache_read_input_tokens::numeric - / (input_tokens + cache_read_input_tokens + cache_creation_input_tokens) + WHEN input_tokens > 0 + THEN cache_read_input_tokens::numeric / input_tokens ELSE 0 END ) * 100, 1 @@ -42,7 +41,9 @@ async function queryUsageStats() { client_stats AS ( SELECT - ROUND(AVG(cnt)) AS avg_requests_per_client + ROUND(AVG(cnt)) AS avg_requests_per_client, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cnt) AS median_requests_per_client, + MAX(cnt) AS max_requests_per_client FROM ( SELECT client_id, COUNT(*) AS cnt FROM recent @@ -70,6 +71,8 @@ async function queryUsageStats() { t.avg_cache_rate_pct, t.avg_output_tokens, c.avg_requests_per_client, + c.median_requests_per_client, + c.max_requests_per_client, r.median_rps, r.peak_rps, t.total_requests @@ -90,6 +93,8 @@ async function queryUsageStats() { console.log(`Median RPS: ${row.median_rps}`) console.log(`Peak RPS: ${row.peak_rps}`) console.log(`Avg requests/client: ${row.avg_requests_per_client}`) + console.log(`Median requests/client: ${row.median_requests_per_client}`) + console.log(`Max requests/client: ${row.max_requests_per_client}`) console.log(`Total requests (7d): ${row.total_requests}`) } diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts index f28eb55a6e..9fb5ebc8bd 100644 --- a/scripts/test-fireworks-long.ts +++ b/scripts/test-fireworks-long.ts @@ -13,7 +13,7 @@ export { } const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1' -// const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/qne3jo8v' +// const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9' const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5' // Pricing constants — https://fireworks.ai/pricing @@ -23,6 +23,9 @@ const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 const MAX_TOKENS = 100 +// Stable session ID so all turns route to the same machine for prompt caching +const SESSION_ID = `bench-${Math.random().toString(36).slice(2, 10)}` + function computeCost(usage: Record): { cost: number; breakdown: string } { const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0 const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0 @@ -175,6 +178,7 @@ async function makeConversationStreamRequest( headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json', + 'x-session-affinity': SESSION_ID, }, body: JSON.stringify({ model: FIREWORKS_MODEL, @@ -220,16 +224,13 @@ async function makeConversationStreamRequest( const chunk = JSON.parse(raw) chunkCount++ const delta = chunk.choices?.[0]?.delta + if (delta && firstContentChunkTime === undefined) { + firstContentChunkTime = Date.now() + ttftMs = firstContentChunkTime - startTime + } if (delta?.content) { - if (firstContentChunkTime === undefined) { - firstContentChunkTime = Date.now() - ttftMs = firstContentChunkTime - startTime - } streamContent += delta.content } - if (delta?.reasoning_content) { - // Skip reasoning content for this test - } if (chunk.usage) streamUsage = chunk.usage } catch { // skip non-JSON lines @@ -242,12 +243,9 @@ async function makeConversationStreamRequest( ? streamUsage.completion_tokens : 0 - const generationTimeMs = firstContentChunkTime !== undefined - ? Date.now() - firstContentChunkTime - : elapsedMs - const outputTokensPerSec = generationTimeMs > 0 - ? (outputTokens / (generationTimeMs / 1000)) - : 0 + const outputTokensPerSec = firstContentChunkTime !== undefined + ? (outputTokens / ((Date.now() - firstContentChunkTime) / 1000)) + : undefined // Print compact per-turn stats const inputTokens = streamUsage && typeof streamUsage.prompt_tokens === 'number' ? streamUsage.prompt_tokens : 0 @@ -256,7 +254,7 @@ async function makeConversationStreamRequest( const cacheRate = inputTokens > 0 ? ((cachedTokens / inputTokens) * 100).toFixed(1) : '0.0' const cost = streamUsage ? `$${computeCost(streamUsage).cost.toFixed(6)}` : 'err' - console.log(` ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec.toFixed(1)} tok/s | ${cost}`) + console.log(` ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec !== undefined ? outputTokensPerSec.toFixed(1) + ' tok/s' : 'n/a'} | ${cost}`) console.log(` Response: ${streamContent.slice(0, 150)}${streamContent.length > 150 ? '...' : ''}`) console.log() @@ -277,6 +275,7 @@ async function main() { console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`) console.log(`Turns: ${TURN_PROMPTS.length}`) console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`) + console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`) console.log('='.repeat(60)) console.log() diff --git a/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts b/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts new file mode 100644 index 0000000000..0d9802b58b --- /dev/null +++ b/web/src/app/api/v1/chat/completions/__tests__/free-mode-rate-limiter.test.ts @@ -0,0 +1,317 @@ +import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test' + +import { + checkFreeModeRateLimit, + FREE_MODE_RATE_LIMITS, + resetFreeModeRateLimits, +} from '../free-mode-rate-limiter' + +const SECOND_MS = 1000 +const MINUTE_MS = 60 * SECOND_MS +const HOUR_MS = 60 * MINUTE_MS + +describe('free-mode-rate-limiter', () => { + let nowSpy: ReturnType + let fakeNow: number + + beforeEach(() => { + resetFreeModeRateLimits() + fakeNow = 1_000_000_000_000 + nowSpy = spyOn(Date, 'now').mockImplementation(() => fakeNow) + }) + + afterEach(() => { + nowSpy.mockRestore() + }) + + function advanceTime(ms: number) { + fakeNow += ms + } + + function makeRequests(userId: string, count: number) { + for (let i = 0; i < count; i++) { + if (i > 0) { + advanceTime(1 * SECOND_MS + 1) + } + const result = checkFreeModeRateLimit(userId) + if (result.limited) { + throw new Error(`Unexpectedly rate limited on request ${i + 1}`) + } + } + } + + describe('checkFreeModeRateLimit', () => { + it('allows the first request', () => { + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(false) + }) + + it('limits when per-second limit is exceeded', () => { + // Make all requests within the same second (no time advancement) + for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) { + expect(checkFreeModeRateLimit('user-1').limited).toBe(false) + } + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(true) + if (result.limited) { + expect(result.windowName).toBe('1 second') + } + }) + + it('resets per-second window after expiry', () => { + for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) { + checkFreeModeRateLimit('user-1') + } + expect(checkFreeModeRateLimit('user-1').limited).toBe(true) + + advanceTime(1 * SECOND_MS + 1) + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(false) + }) + + it('allows requests up to the per-minute limit', () => { + for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_MINUTE; i++) { + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(false) + if (i < FREE_MODE_RATE_LIMITS.PER_MINUTE - 1) { + advanceTime(1 * SECOND_MS + 1) + } + } + }) + + it('limits when per-minute limit is exceeded', () => { + makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE) + // Advance past the 1-second window so the per-minute window is the one that triggers + advanceTime(1 * SECOND_MS + 1) + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(true) + if (result.limited) { + expect(result.windowName).toBe('1 minute') + } + }) + + it('limits when per-30-minute limit is exceeded', () => { + const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE + const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES + + // Spread requests across multiple 1-minute windows to avoid hitting the per-minute limit + let sent = 0 + while (sent < per30Min) { + const batch = Math.min(perMinute, per30Min - sent) + makeRequests('user-1', batch) + sent += batch + if (sent < per30Min) { + // Advance past the 1-minute window so it resets + advanceTime(1 * MINUTE_MS + 1) + } + } + + // Advance past the 1-minute window so the per-30-minute window is the one that triggers + advanceTime(1 * MINUTE_MS + 1) + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(true) + if (result.limited) { + expect(result.windowName).toBe('30 minutes') + } + }) + + it('limits when per-5-hour limit is exceeded', () => { + const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE + const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES + const per5Hours = FREE_MODE_RATE_LIMITS.PER_5_HOURS + + // Spread requests across multiple 30-minute windows + let sent = 0 + while (sent < per5Hours) { + const batchFor30Min = Math.min(per30Min, per5Hours - sent) + // Within each 30-min window, spread across 1-min windows + let sentInWindow = 0 + while (sentInWindow < batchFor30Min) { + const batch = Math.min(perMinute, batchFor30Min - sentInWindow) + makeRequests('user-1', batch) + sentInWindow += batch + if (sentInWindow < batchFor30Min) { + advanceTime(1 * MINUTE_MS + 1) + } + } + sent += sentInWindow + // Always advance past 30-min window to reset it for the next batch + // (stays well within the 5-hour window) + advanceTime(30 * MINUTE_MS + 1) + } + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(true) + if (result.limited) { + expect(result.windowName).toBe('5 hours') + } + }) + + it('limits when per-7-day limit is exceeded', () => { + const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE + const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES + const per5Hours = FREE_MODE_RATE_LIMITS.PER_5_HOURS + const per7Days = FREE_MODE_RATE_LIMITS.PER_7_DAYS + + // Spread requests across multiple 5-hour windows + let sent = 0 + while (sent < per7Days) { + const batchFor5Hours = Math.min(per5Hours, per7Days - sent) + let sentIn5Hr = 0 + while (sentIn5Hr < batchFor5Hours) { + const batchFor30Min = Math.min(per30Min, batchFor5Hours - sentIn5Hr) + let sentIn30Min = 0 + while (sentIn30Min < batchFor30Min) { + const batch = Math.min(perMinute, batchFor30Min - sentIn30Min) + makeRequests('user-1', batch) + sentIn30Min += batch + if (sentIn30Min < batchFor30Min) { + advanceTime(1 * MINUTE_MS + 1) + } + } + sentIn5Hr += sentIn30Min + advanceTime(30 * MINUTE_MS + 1) + } + sent += sentIn5Hr + // Advance past the 5-hour window (stays within 7-day window) + advanceTime(5 * HOUR_MS + 1) + } + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(true) + if (result.limited) { + expect(result.windowName).toBe('7 days') + } + }) + + it('does not increment counters when rate limited', () => { + makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE) + // Advance past the 1-second window so the per-minute window blocks + advanceTime(1 * SECOND_MS + 1) + + // These should all be rejected without changing state + for (let i = 0; i < 5; i++) { + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(true) + } + + // After the 1-minute window expires, the user should only have used PER_MINUTE requests + // against the 30-minute window, not PER_MINUTE + 5 + advanceTime(1 * MINUTE_MS + 1) + + // Should be allowed again (1-min window reset) + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(false) + }) + + it('returns correct retryAfterMs for the violated window', () => { + makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE) + // makeRequests advanced time by (PER_MINUTE - 1) * (SECOND_MS + 1) + const elapsedInMakeRequests = (FREE_MODE_RATE_LIMITS.PER_MINUTE - 1) * (1 * SECOND_MS + 1) + + // Advance past the 1-second window, then a bit more + const additionalAdvance = 2 * SECOND_MS + advanceTime(additionalAdvance) + + const totalElapsed = elapsedInMakeRequests + additionalAdvance + const expectedRetryAfterMs = 1 * MINUTE_MS - totalElapsed + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(true) + if (result.limited) { + expect(result.windowName).toBe('1 minute') + expect(result.retryAfterMs).toBe(expectedRetryAfterMs) + } + }) + + it('resets per-minute window after expiry', () => { + makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE) + advanceTime(1 * SECOND_MS + 1) + + const limited = checkFreeModeRateLimit('user-1') + expect(limited.limited).toBe(true) + + // Advance past the 1-minute window + advanceTime(1 * MINUTE_MS + 1) + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(false) + }) + + it('isolates different users', () => { + makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE) + advanceTime(1 * SECOND_MS + 1) + + // user-1 is rate limited + expect(checkFreeModeRateLimit('user-1').limited).toBe(true) + + // user-2 should not be affected + const result = checkFreeModeRateLimit('user-2') + expect(result.limited).toBe(false) + }) + + it('retryAfterMs is never negative', () => { + for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) { + checkFreeModeRateLimit('user-1') + } + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(true) + if (result.limited) { + expect(result.retryAfterMs).toBeGreaterThanOrEqual(0) + } + }) + + it('tracks counts across all windows simultaneously', () => { + // Make some requests + makeRequests('user-1', 5) + + // Advance past 1-minute window but within 30-minute window + advanceTime(1 * MINUTE_MS + 1) + + // Make more requests — 1-min counter resets, but 30-min counter keeps accumulating + makeRequests('user-1', 5) + + // Advance past 1-minute again + advanceTime(1 * MINUTE_MS + 1) + + // The 30-min window should now have 10 requests counted + // and the 1-min window should be fresh + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(false) + }) + }) + + describe('resetFreeModeRateLimits', () => { + it('clears all rate limit state', () => { + for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) { + checkFreeModeRateLimit('user-1') + } + expect(checkFreeModeRateLimit('user-1').limited).toBe(true) + + resetFreeModeRateLimits() + + const result = checkFreeModeRateLimit('user-1') + expect(result.limited).toBe(false) + }) + + it('clears state for all users', () => { + for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) { + checkFreeModeRateLimit('user-1') + checkFreeModeRateLimit('user-2') + } + + expect(checkFreeModeRateLimit('user-1').limited).toBe(true) + expect(checkFreeModeRateLimit('user-2').limited).toBe(true) + + resetFreeModeRateLimits() + + expect(checkFreeModeRateLimit('user-1').limited).toBe(false) + expect(checkFreeModeRateLimit('user-2').limited).toBe(false) + }) + }) +}) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index bf36ae417f..8553aa69e3 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -65,6 +65,8 @@ import { OpenRouterError, } from '@/llm-api/openrouter' import { extractApiKeyFromHeader } from '@/util/auth' +import { withDefaultProperties } from '@codebuff/common/analytics' +import { checkFreeModeRateLimit } from './free-mode-rate-limiter' const FREE_MODE_ALLOWED_COUNTRIES = new Set([ 'US', 'CA', @@ -80,7 +82,13 @@ function extractClientIp(req: NextRequest): string | undefined { return req.headers.get('x-real-ip') ?? undefined } -function getCountryFromIp(clientIp: string | undefined): string | null { +function getCountryCode(req: NextRequest): string | null { + const cfCountry = req.headers.get('cf-ipcountry') + if (cfCountry && cfCountry !== 'XX' && cfCountry !== 'T1') { + return cfCountry.toUpperCase() + } + + const clientIp = extractClientIp(req) if (!clientIp) { return null } @@ -141,7 +149,6 @@ export async function postChatCompletions(params: { req, getUserInfoFromApiKey, loggerWithContext, - trackEvent, getUserUsageData, getAgentRunFromId, fetch, @@ -150,6 +157,7 @@ export async function postChatCompletions(params: { getUserPreferences, } = params let { logger } = params + let { trackEvent } = params try { // Parse request body @@ -175,6 +183,12 @@ export async function postChatCompletions(params: { const bodyStream = typedBody.stream ?? false const runId = typedBody.codebuff_metadata?.run_id + // Check if the request is in FREE mode (costs 0 credits for allowed agent+model combos) + const costMode = typedBody.codebuff_metadata?.cost_mode + const isFreeModeRequest = isFreeMode(costMode) + + trackEvent = withDefaultProperties(trackEvent, { freebuff: isFreeModeRequest }) + // Extract and validate API key const apiKey = extractApiKeyFromHeader(req) if (!apiKey) { @@ -242,14 +256,17 @@ export async function postChatCompletions(params: { logger, }) - // Check if the request is in FREE mode (costs 0 credits for allowed agent+model combos) - const costMode = typedBody.codebuff_metadata?.cost_mode - const isFreeModeRequest = isFreeMode(costMode) - // For free mode requests, check if user is in US or Canada if (isFreeModeRequest) { + const countryCode = getCountryCode(req) const clientIp = extractClientIp(req) - const countryCode = getCountryFromIp(clientIp) + + const cfHeader = req.headers.get('cf-ipcountry') + const geoipResult = clientIp ? geoip.lookup(clientIp)?.country ?? null : null + logger.info( + { cfHeader, geoipResult, resolvedCountry: countryCode, clientIp: clientIp ? '[redacted]' : undefined }, + 'Free mode country detection', + ) // If we couldn't determine country (null), allow the request (fail open) // This handles users behind VPNs, corporate proxies, or localhost @@ -273,6 +290,7 @@ export async function postChatCompletions(params: { { status: 403 }, ) } + } // Extract and validate agent run ID @@ -333,6 +351,38 @@ export async function postChatCompletions(params: { ) } + // Rate limit free mode requests (after validation so invalid requests don't consume quota) + if (isFreeModeRequest) { + const rateLimitResult = checkFreeModeRateLimit(userId) + if (rateLimitResult.limited) { + const retryAfterSeconds = Math.ceil(rateLimitResult.retryAfterMs / 1000) + const resetTime = new Date(Date.now() + rateLimitResult.retryAfterMs).toISOString() + const resetCountdown = formatQuotaResetCountdown(resetTime) + + trackEvent({ + event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, + userId, + properties: { + error: 'free_mode_rate_limited', + windowName: rateLimitResult.windowName, + retryAfterSeconds, + }, + logger, + }) + + return NextResponse.json( + { + error: 'free_mode_rate_limited', + message: `Free mode rate limit exceeded (${rateLimitResult.windowName} limit). Try again ${resetCountdown}.`, + }, + { + status: 429, + headers: { 'Retry-After': String(retryAfterSeconds) }, + }, + ) + } + } + // For subscribers, ensure a block grant exists before processing the request. // This is done AFTER validation so malformed requests don't start a new 5-hour block. // When the function is provided, always include subscription credits in the balance: diff --git a/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts b/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts new file mode 100644 index 0000000000..b299291cd4 --- /dev/null +++ b/web/src/app/api/v1/chat/completions/free-mode-rate-limiter.ts @@ -0,0 +1,167 @@ +/** + * In-memory rate limiter for FREE mode requests. + * + * Enforces multiple fixed-window limits per user to prevent abuse. + * Each window is anchored to the user's first request in that window + * and resets once the window duration elapses. + * + * Adjust the constants below to tune the limits. + */ + +// --------------------------------------------------------------------------- +// Configurable rate-limit constants +// --------------------------------------------------------------------------- + +export const FREE_MODE_RATE_LIMITS = { + /** Max requests per 1-second window */ + PER_SECOND: 2, + /** Max requests per 1-minute window */ + PER_MINUTE: 20, + /** Max requests per 30-minute window */ + PER_30_MINUTES: 200, + /** Max requests per 5-hour window */ + PER_5_HOURS: 1_000, + /** Max requests per 7-day window */ + PER_7_DAYS: 10_000, +} as const + +// --------------------------------------------------------------------------- +// Internal types +// --------------------------------------------------------------------------- + +interface RateWindow { + name: string + windowMs: number + maxRequests: number +} + +interface WindowTracker { + count: number + windowStart: number +} + +export type RateLimitResult = { + limited: false +} | { + limited: true + windowName: string + retryAfterMs: number +} + +// --------------------------------------------------------------------------- +// Window definitions (derived from the constants above) +// --------------------------------------------------------------------------- + +const SECOND_MS = 1000 +const MINUTE_MS = 60 * SECOND_MS +const HOUR_MS = 60 * MINUTE_MS +const DAY_MS = 24 * HOUR_MS + +const RATE_WINDOWS: RateWindow[] = [ + { name: '1 second', windowMs: 1 * SECOND_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_SECOND }, + { name: '1 minute', windowMs: 1 * MINUTE_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_MINUTE }, + { name: '30 minutes', windowMs: 30 * MINUTE_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_30_MINUTES }, + { name: '5 hours', windowMs: 5 * HOUR_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_5_HOURS }, + { name: '7 days', windowMs: 7 * DAY_MS, maxRequests: FREE_MODE_RATE_LIMITS.PER_7_DAYS }, +] + +// --------------------------------------------------------------------------- +// In-memory state +// --------------------------------------------------------------------------- + +// userId -> (windowName -> tracker) +const userWindows = new Map>() + +let lastCleanupTime = 0 +const CLEANUP_INTERVAL_MS = 5 * MINUTE_MS + +// --------------------------------------------------------------------------- +// Cleanup +// --------------------------------------------------------------------------- + +function cleanupExpiredEntries(): void { + const now = Date.now() + for (const [userId, windows] of userWindows) { + for (const [windowName, tracker] of windows) { + const matchingWindow = RATE_WINDOWS.find((w) => w.name === windowName) + if (!matchingWindow) { + windows.delete(windowName) + continue + } + if (now - tracker.windowStart >= matchingWindow.windowMs) { + windows.delete(windowName) + } + } + if (windows.size === 0) { + userWindows.delete(userId) + } + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Check whether a free-mode request from `userId` should be rate-limited. + * + * If the request is allowed, each window's counter is incremented. + * If any window is exceeded, the request is rejected and no counters change. + */ +export function checkFreeModeRateLimit(userId: string): RateLimitResult { + const now = Date.now() + + // Periodic cleanup to prevent memory leaks + if (now - lastCleanupTime > CLEANUP_INTERVAL_MS) { + cleanupExpiredEntries() + lastCleanupTime = now + } + + let windows = userWindows.get(userId) + if (!windows) { + windows = new Map() + userWindows.set(userId, windows) + } + + // First pass: check all windows without mutating + for (const rateWindow of RATE_WINDOWS) { + let tracker = windows.get(rateWindow.name) + + // Reset the window if it has expired + if (tracker && now - tracker.windowStart >= rateWindow.windowMs) { + windows.delete(rateWindow.name) + tracker = undefined + } + + const currentCount = tracker?.count ?? 0 + if (currentCount >= rateWindow.maxRequests) { + const windowStart = tracker!.windowStart + const retryAfterMs = rateWindow.windowMs - (now - windowStart) + return { + limited: true, + windowName: rateWindow.name, + retryAfterMs: Math.max(0, retryAfterMs), + } + } + } + + // Second pass: increment all window counters (request is allowed) + for (const rateWindow of RATE_WINDOWS) { + let tracker = windows.get(rateWindow.name) + if (!tracker) { + tracker = { count: 0, windowStart: now } + windows.set(rateWindow.name, tracker) + } + tracker.count++ + } + + return { limited: false } +} + +/** + * Reset all rate-limit state. Exposed for testing. + */ +export function resetFreeModeRateLimits(): void { + userWindows.clear() + lastCleanupTime = 0 +} diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts index df8f356d17..2108d408a2 100644 --- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts +++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts @@ -13,7 +13,7 @@ import { import type { Logger } from '@codebuff/common/types/contracts/logger' const STANDARD_MODEL_ID = 'accounts/fireworks/models/minimax-m2p5' -const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/qne3jo8v' +const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/lnfid5h9' function createMockLogger(): Logger { return { diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 8582645944..52fe1885c3 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -39,7 +39,7 @@ function getCanopyWaveModelId(openrouterModel: string): string { return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel } -type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean } +type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean } type LineResult = { state: StreamState @@ -170,6 +170,7 @@ export async function handleCanopyWaveNonStream({ byok: false, logger, costMode, + ttftMs: null, // Non-stream - no TTFT to report }) // Overwrite cost so SDK calculates exact credits we charged @@ -218,7 +219,7 @@ export async function handleCanopyWaveStream({ } let heartbeatInterval: NodeJS.Timeout - let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false } + let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null, billedAlready: false } let clientDisconnected = false const stream = new ReadableStream({ @@ -439,7 +440,7 @@ async function handleResponse({ logger: Logger insertMessage: InsertMessageBigqueryFn }): Promise<{ state: StreamState; billedCredits?: number }> { - state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel }) + state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel }) // Some providers send cumulative usage on EVERY chunk (not just the final one), // so we must only bill once on the final chunk to avoid charging N times. @@ -486,6 +487,7 @@ async function handleResponse({ byok: false, logger, costMode, + ttftMs: state.ttftMs, }) return { state, billedCredits } @@ -494,6 +496,7 @@ async function handleResponse({ function handleStreamChunk({ data, state, + startTime, logger, userId, agentId, @@ -501,6 +504,7 @@ function handleStreamChunk({ }: { data: Record state: StreamState + startTime: Date logger: Logger userId: string agentId: string @@ -544,6 +548,13 @@ function handleStreamChunk({ const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content : typeof delta?.reasoning === 'string' ? delta.reasoning : '' + + // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls) + const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0 + if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) { + state.ttftMs = Date.now() - startTime.getTime() + } + if (state.reasoningText.length < MAX_BUFFER_SIZE) { state.reasoningText += reasoningDelta if (state.reasoningText.length >= MAX_BUFFER_SIZE) { diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 2b28937415..fccfd7892e 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -36,7 +36,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = false /** Custom deployment IDs for models with dedicated Fireworks deployments */ const FIREWORKS_DEPLOYMENT_MAP: Record = { - 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/qne3jo8v', + 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', } /** Check if current time is within deployment hours (10am–8pm ET) */ @@ -79,7 +79,7 @@ function getFireworksModelId(openrouterModel: string): string { return FIREWORKS_MODEL_MAP[openrouterModel] ?? openrouterModel } -type StreamState = { responseText: string; reasoningText: string } +type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null } type LineResult = { state: StreamState @@ -210,6 +210,7 @@ export async function handleFireworksNonStream({ byok: false, logger, costMode, + ttftMs: null, // Non-stream - no TTFT to report }) // Overwrite cost so SDK calculates exact credits we charged @@ -258,7 +259,7 @@ export async function handleFireworksStream({ } let heartbeatInterval: NodeJS.Timeout - let state: StreamState = { responseText: '', reasoningText: '' } + let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null } let clientDisconnected = false const stream = new ReadableStream({ @@ -473,7 +474,7 @@ async function handleResponse({ logger: Logger insertMessage: InsertMessageBigqueryFn }): Promise<{ state: StreamState; billedCredits?: number }> { - state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel }) + state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel }) if ('error' in data || !data.usage) { return { state } @@ -511,6 +512,7 @@ async function handleResponse({ byok: false, logger, costMode, + ttftMs: state.ttftMs, }) return { state, billedCredits } @@ -519,6 +521,7 @@ async function handleResponse({ function handleStreamChunk({ data, state, + startTime, logger, userId, agentId, @@ -526,6 +529,7 @@ function handleStreamChunk({ }: { data: Record state: StreamState + startTime: Date logger: Logger userId: string agentId: string @@ -569,6 +573,13 @@ function handleStreamChunk({ const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content : typeof delta?.reasoning === 'string' ? delta.reasoning : '' + + // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls) + const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0 + if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) { + state.ttftMs = Date.now() - startTime.getTime() + } + if (state.reasoningText.length < MAX_BUFFER_SIZE) { state.reasoningText += reasoningDelta if (state.reasoningText.length >= MAX_BUFFER_SIZE) { diff --git a/web/src/llm-api/helpers.ts b/web/src/llm-api/helpers.ts index 1ba912cf57..14e578fa9b 100644 --- a/web/src/llm-api/helpers.ts +++ b/web/src/llm-api/helpers.ts @@ -114,6 +114,7 @@ export async function consumeCreditsForMessage(params: { byok: boolean logger: Logger costMode?: string + ttftMs?: number | null }): Promise { const { messageId, @@ -130,6 +131,7 @@ export async function consumeCreditsForMessage(params: { byok, logger, costMode, + ttftMs, } = params // Calculate initial credits based on cost @@ -172,6 +174,7 @@ export async function consumeCreditsForMessage(params: { outputTokens: usageData.outputTokens, byok, logger, + ttftMs: ttftMs ?? null, }) return credits diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts index 7ac2f1afeb..8f619e8357 100644 --- a/web/src/llm-api/openai.ts +++ b/web/src/llm-api/openai.ts @@ -304,6 +304,7 @@ export async function handleOpenAINonStream({ byok: false, logger, costMode, + ttftMs: null, // Non-stream - no TTFT to report }) return { @@ -359,6 +360,7 @@ export async function handleOpenAINonStream({ byok: false, logger, costMode, + ttftMs: null, // Non-stream - no TTFT to report }) if (data.usage) { @@ -424,6 +426,7 @@ export async function handleOpenAIStream({ let heartbeatInterval: NodeJS.Timeout let responseText = '' let reasoningText = '' + let ttftMs: number | null = null let clientDisconnected = false const MAX_BUFFER_SIZE = 1 * 1024 * 1024 // 1MB @@ -477,6 +480,14 @@ export async function handleOpenAIStream({ const obj = JSON.parse(raw) const delta = obj.choices?.[0]?.delta + // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls) + const hasContentDelta = delta?.content && responseText.length === 0 + const hasReasoningDelta = delta?.reasoning && reasoningText.length === 0 + const hasToolCallsDelta = delta?.tool_calls && delta.tool_calls.length > 0 + if (ttftMs === null && (hasContentDelta || hasReasoningDelta || hasToolCallsDelta)) { + ttftMs = Date.now() - startTime.getTime() + } + if (delta?.content && responseText.length < MAX_BUFFER_SIZE) { responseText += delta.content if (responseText.length >= MAX_BUFFER_SIZE) { @@ -544,6 +555,7 @@ export async function handleOpenAIStream({ byok: false, logger, costMode, + ttftMs, }) } } catch { @@ -631,6 +643,7 @@ export async function handleOpenAIStream({ byok: false, logger, costMode, + ttftMs, }) } } catch { diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts index c99200f1b0..08b7a31ef5 100644 --- a/web/src/llm-api/openrouter.ts +++ b/web/src/llm-api/openrouter.ts @@ -23,7 +23,7 @@ import type { OpenRouterErrorMetadata, } from './types' -type StreamState = { responseText: string; reasoningText: string } +type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null } // Extended timeout for deep-thinking models (e.g., gpt-5) that can take // a long time to start streaming. @@ -186,6 +186,7 @@ export async function handleOpenRouterNonStream({ byok, logger, costMode, + ttftMs: null, // Non-stream - no TTFT to report }) // Return the first response with aggregated data @@ -257,6 +258,7 @@ export async function handleOpenRouterNonStream({ byok, logger, costMode, + ttftMs: null, // Non-stream - no TTFT to report }) // Overwrite cost so SDK calculates exact credits we charged @@ -313,7 +315,7 @@ export async function handleOpenRouterStream({ } let heartbeatInterval: NodeJS.Timeout - let state: StreamState = { responseText: '', reasoningText: '' } + let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null } let clientDisconnected = false // Create a ReadableStream that Next.js can handle @@ -540,6 +542,7 @@ async function handleResponse({ state = await handleStreamChunk({ data, state, + startTime, logger, userId, agentId, @@ -584,6 +587,7 @@ async function handleResponse({ byok, logger, costMode, + ttftMs: state.ttftMs, }) return { state, billedCredits } @@ -592,6 +596,7 @@ async function handleResponse({ async function handleStreamChunk({ data, state, + startTime, logger, userId, agentId, @@ -599,6 +604,7 @@ async function handleStreamChunk({ }: { data: OpenRouterStreamChatCompletionChunk state: StreamState + startTime: Date logger: Logger userId: string agentId: string @@ -641,6 +647,14 @@ async function handleStreamChunk({ } const choice = data.choices[0] + // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls) + const hasContentDelta = choice?.delta?.content != null && choice?.delta?.content !== '' + const hasReasoningDelta = choice?.delta?.reasoning != null && choice?.delta?.reasoning !== '' + const hasToolCallsDelta = choice?.delta?.tool_calls != null && (choice?.delta?.tool_calls as unknown[])?.length > 0 + if (state.ttftMs === null && (hasContentDelta || hasReasoningDelta || hasToolCallsDelta)) { + state.ttftMs = Date.now() - startTime.getTime() + } + // Append content and reasoning, but only up to the buffer limit. const contentDelta = choice.delta?.content ?? '' if (state.responseText.length < MAX_BUFFER_SIZE) { diff --git a/web/src/llm-api/siliconflow.ts b/web/src/llm-api/siliconflow.ts index 1146bbe3df..6398fe184f 100644 --- a/web/src/llm-api/siliconflow.ts +++ b/web/src/llm-api/siliconflow.ts @@ -39,7 +39,7 @@ function getSiliconFlowModelId(openrouterModel: string): string { return SILICONFLOW_MODEL_MAP[openrouterModel] ?? openrouterModel } -type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean } +type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean } type LineResult = { state: StreamState @@ -171,6 +171,7 @@ export async function handleSiliconFlowNonStream({ byok: false, logger, costMode, + ttftMs: null, // Non-stream - no TTFT to report }) // Overwrite cost so SDK calculates exact credits we charged @@ -219,7 +220,7 @@ export async function handleSiliconFlowStream({ } let heartbeatInterval: NodeJS.Timeout - let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false } + let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null, billedAlready: false } let clientDisconnected = false const stream = new ReadableStream({ @@ -440,7 +441,7 @@ async function handleResponse({ logger: Logger insertMessage: InsertMessageBigqueryFn }): Promise<{ state: StreamState; billedCredits?: number }> { - state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel }) + state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel }) // Some providers send cumulative usage on EVERY chunk (not just the final one), // so we must only bill once on the final chunk to avoid charging N times. @@ -487,6 +488,7 @@ async function handleResponse({ byok: false, logger, costMode, + ttftMs: state.ttftMs, }) return { state, billedCredits } @@ -495,6 +497,7 @@ async function handleResponse({ function handleStreamChunk({ data, state, + startTime, logger, userId, agentId, @@ -502,6 +505,7 @@ function handleStreamChunk({ }: { data: Record state: StreamState + startTime: Date logger: Logger userId: string agentId: string @@ -545,6 +549,13 @@ function handleStreamChunk({ const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content : typeof delta?.reasoning === 'string' ? delta.reasoning : '' + + // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls) + const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0 + if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) { + state.ttftMs = Date.now() - startTime.getTime() + } + if (state.reasoningText.length < MAX_BUFFER_SIZE) { state.reasoningText += reasoningDelta if (state.reasoningText.length >= MAX_BUFFER_SIZE) {