diff --git a/common/src/util/__tests__/format-code-search.test.ts b/common/src/util/__tests__/format-code-search.test.ts new file mode 100644 index 0000000000..f52e65af17 --- /dev/null +++ b/common/src/util/__tests__/format-code-search.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from 'bun:test' + +import { formatCodeSearchOutput } from '../format-code-search' + +describe('formatCodeSearchOutput', () => { + it('adds a match count and line labels', () => { + const output = formatCodeSearchOutput( + [ + 'src/a.ts:12:const alpha = true', + 'src/a.ts:18:return alpha', + 'src/b.ts:3:export const beta = false', + ].join('\n'), + { matchCount: 3 }, + ) + + expect(output).toBe( + [ + 'Found 3 matches', + 'src/a.ts:', + ' Line 12: const alpha = true', + ' Line 18: return alpha', + '', + 'src/b.ts:', + ' Line 3: export const beta = false', + ].join('\n'), + ) + }) + + it('uses the provided match count instead of counting context lines', () => { + const output = formatCodeSearchOutput( + [ + 'src/a.ts:10:const before = true', + 'src/a.ts:11:const match = true', + 'src/a.ts:12:const after = true', + ].join('\n'), + { matchCount: 1 }, + ) + + expect(output).toContain('Found 1 matches') + expect(output).toContain(' Line 10: const before = true') + expect(output).toContain(' Line 11: const match = true') + expect(output).toContain(' Line 12: const after = true') + }) + + it('does not count native ripgrep context lines as matches', () => { + const output = formatCodeSearchOutput( + [ + 'src/a.ts-10-const before = true', + 'src/a.ts:11:const match = true', + 'src/a.ts-12-const after = true', + ].join('\n'), + ) + + expect(output).toContain('Found 1 matches') + }) + + it('reports zero matches for empty output', () => { + expect(formatCodeSearchOutput('')).toBe('Found 0 matches') + }) +}) diff --git a/common/src/util/format-code-search.ts b/common/src/util/format-code-search.ts index 5b98edec31..8a89a7897e 100644 --- a/common/src/util/format-code-search.ts +++ b/common/src/util/format-code-search.ts @@ -1,24 +1,31 @@ /** * Formats code search output to group matches by file. * - * Input format: ./file.ts:line content + * Input format: ./file.ts:line:content * Output format: + * Found 3 matches * ./file.ts: - * line content - * another line content - * yet another line content + * Line 1: content + * Line 2: another line content + * Line 3: yet another line content * * (double newline between distinct files) * * @param stdout The raw stdout from ripgrep + * @param options.matchCount The number of actual matches, excluding context lines * @returns Formatted output with matches grouped by file */ -export function formatCodeSearchOutput(stdout: string): string { +export function formatCodeSearchOutput( + stdout: string, + options: { matchCount?: number } = {}, +): string { if (!stdout) { - return 'No results' + return 'Found 0 matches' } const lines = stdout.split('\n') - const formatted: string[] = [] + const formatted: string[] = [ + `Found ${options.matchCount ?? countFormattedMatches(lines)} matches`, + ] let currentFile: string | null = null for (const line of lines) { @@ -38,30 +45,13 @@ export function formatCodeSearchOutput(stdout: string): string { // Use regex to find the pattern: separator + digits + separator // This handles filenames with hyphens/colons by matching the line number pattern - let separatorIndex = -1 - let filePath = '' + const parsedLine = parseRipgrepLine(line) - // Try match line pattern: filename:digits:content - const matchLinePattern = /(.*?):(\d+):(.*)$/ - const matchLineMatch = line.match(matchLinePattern) - if (matchLineMatch) { - filePath = matchLineMatch[1] - separatorIndex = matchLineMatch[1].length - } else { - // Try context line pattern: filename-digits-content - const contextLinePattern = /(.*?)-(\d+)-(.*)$/ - const contextLineMatch = line.match(contextLinePattern) - if (contextLineMatch) { - filePath = contextLineMatch[1] - separatorIndex = contextLineMatch[1].length - } - } - - if (separatorIndex === -1) { + if (!parsedLine) { formatted.push(line) continue } - const content = line.substring(separatorIndex) + const { filePath, lineNumber, content } = parsedLine // Check if this is a new file (file paths don't start with whitespace) if (filePath && !filePath.startsWith(' ') && !filePath.startsWith('\t')) { @@ -73,11 +63,9 @@ export function formatCodeSearchOutput(stdout: string): string { currentFile = filePath // Show file path with colon on its own line formatted.push(filePath + ':') - // Show content without leading separator on next line - formatted.push(content.substring(1)) + formatted.push(` Line ${lineNumber}: ${content}`) } else { - // Same file - just show content without leading separator - formatted.push(content.substring(1)) + formatted.push(` Line ${lineNumber}: ${content}`) } } else { // Line doesn't match expected format, keep as-is @@ -87,3 +75,41 @@ export function formatCodeSearchOutput(stdout: string): string { return formatted.join('\n') } + +function parseRipgrepLine(line: string): { + filePath: string + lineNumber: string + content: string + isContext: boolean +} | null { + // Try match line pattern: filename:digits:content + const matchLineMatch = line.match(/(.*?):(\d+):(.*)$/) + if (matchLineMatch) { + return { + filePath: matchLineMatch[1], + lineNumber: matchLineMatch[2], + content: matchLineMatch[3], + isContext: false, + } + } + + // Try context line pattern: filename-digits-content + const contextLineMatch = line.match(/(.*?)-(\d+)-(.*)$/) + if (contextLineMatch) { + return { + filePath: contextLineMatch[1], + lineNumber: contextLineMatch[2], + content: contextLineMatch[3], + isContext: true, + } + } + + return null +} + +function countFormattedMatches(lines: string[]): number { + return lines.filter((line) => { + const parsedLine = parseRipgrepLine(line) + return parsedLine && !parsedLine.isContext + }).length +} diff --git a/packages/agent-runtime/src/__tests__/run-programmatic-step.test.ts b/packages/agent-runtime/src/__tests__/run-programmatic-step.test.ts index 954bdc73f1..5a06372e0b 100644 --- a/packages/agent-runtime/src/__tests__/run-programmatic-step.test.ts +++ b/packages/agent-runtime/src/__tests__/run-programmatic-step.test.ts @@ -212,6 +212,28 @@ describe('runProgrammaticStep', () => { }) describe('tool execution', () => { + it('assigns deterministic per-tool ids to handleSteps tool calls', async () => { + const mockGenerator = (function* () { + yield { toolName: 'read_files', input: { paths: ['first.txt'] } } + yield { toolName: 'read_files', input: { paths: ['second.txt'] } } + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(executeToolCallSpy.mock.calls[0][0].toolCallId).toBe( + 'functions.read_files:0', + ) + expect(executeToolCallSpy.mock.calls[1][0].toolCallId).toBe( + 'functions.read_files:1', + ) + expect(executeToolCallSpy.mock.calls[2][0].toolCallId).toBe( + 'functions.end_turn:0', + ) + }) + it('should not add tool call message for add_message tool', async () => { const mockGenerator = (function* () { yield { diff --git a/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts b/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts index 9b834024ac..ff75aa44e6 100644 --- a/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts +++ b/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts @@ -401,6 +401,7 @@ describe('tool validation error handling', () => { ) expect(toolCallEvents.length).toBe(1) expect(toolCallEvents[0].toolName).toBe('read_files') + expect(toolCallEvents[0].toolCallId).toBe('functions.read_files:0') // Verify tool_result event was emitted const toolResultEvents = responseChunks.filter( @@ -408,6 +409,8 @@ describe('tool validation error handling', () => { typeof chunk !== 'string' && chunk.type === 'tool_result', ) expect(toolResultEvents.length).toBe(1) + expect(toolResultEvents[0].toolName).toBe('read_files') + expect(toolResultEvents[0].toolCallId).toBe('functions.read_files:0') // Verify NO error events const errorEvents = responseChunks.filter( diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts index 64addd4103..83bd943687 100644 --- a/packages/agent-runtime/src/run-programmatic-step.ts +++ b/packages/agent-runtime/src/run-programmatic-step.ts @@ -6,7 +6,7 @@ import { cloneDeep } from 'lodash' import { clearProposedContentForRun } from './tools/handlers/tool/proposed-content-store' import { executeToolCall } from './tools/tool-executor' import { parseTextWithToolCalls } from './util/parse-tool-calls-from-text' - +import { createToolCallIdGenerator } from './util/tool-call-id' import type { FileProcessingState } from './tools/handlers/tool/write-file' import type { ExecuteToolCallParams } from './tools/tool-executor' @@ -213,6 +213,7 @@ export async function runProgrammaticStep( let toolResult: ToolResultOutput[] | undefined = undefined let endTurn = false let generateN: number | undefined = undefined + const getToolCallId = createToolCallIdGenerator(agentState.messageHistory) let startTime = new Date() let creditsBefore = agentState.directCreditsUsed @@ -273,6 +274,7 @@ export async function runProgrammaticStep( previousToolCallFinished: Promise.resolve(), toolCalls, toolResults, + getToolCallId, onResponseChunk, }) } @@ -301,6 +303,7 @@ export async function runProgrammaticStep( previousToolCallFinished: Promise.resolve(), toolCalls, toolResults, + getToolCallId, onResponseChunk, }) @@ -432,6 +435,7 @@ type ExecuteToolCallsArrayParams = Omit< | 'toolResultsToAddToMessageHistory' > & { agentState: AgentState + getToolCallId: (toolName: string) => string onResponseChunk: (chunk: string | PrintModeEvent) => void } @@ -445,7 +449,7 @@ async function executeSingleToolCall( toolCallToExecute: ToolCallToExecute, params: ExecuteToolCallsArrayParams, ): Promise { - const { agentState, onResponseChunk, toolResults } = params + const { agentState, getToolCallId, onResponseChunk, toolResults } = params // Note: We don't check if the tool is available for the agent template anymore. // You can run any tool from handleSteps now! @@ -455,7 +459,7 @@ async function executeSingleToolCall( // ) // } - const toolCallId = crypto.randomUUID() + const toolCallId = getToolCallId(toolCallToExecute.toolName) const excludeToolFromMessageHistory = toolCallToExecute.includeToolCall === false diff --git a/packages/agent-runtime/src/tool-stream-parser.ts b/packages/agent-runtime/src/tool-stream-parser.ts index cd4ca58df7..1f4deed9d1 100644 --- a/packages/agent-runtime/src/tool-stream-parser.ts +++ b/packages/agent-runtime/src/tool-stream-parser.ts @@ -50,7 +50,6 @@ export async function* processStreamWithTools(params: { } trackEvent: TrackEventFn executeXmlToolCall: (params: { - toolCallId: string toolName: string input: Record }) => Promise @@ -150,12 +149,9 @@ export async function* processStreamWithTools(params: { // Then process and yield any XML tool calls found for (const toolCall of toolCalls) { - const toolCallId = `xml-${crypto.randomUUID().slice(0, 8)}` - // Execute the tool immediately if callback provided, pausing the stream // The callback handles emitting tool_call and tool_result events await executeXmlToolCall({ - toolCallId, toolName: toolCall.toolName, input: toolCall.input, }) diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index 4cdb32117e..fd8f9ea0c4 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -5,7 +5,6 @@ import { assistantMessage, userMessage, } from '@codebuff/common/util/messages' -import { generateCompactId } from '@codebuff/common/util/string' import { processStreamWithTools } from '../tool-stream-parser' import { INCLUDE_REASONING_IN_MESSAGE_HISTORY } from '../constants' @@ -14,6 +13,7 @@ import { executeToolCall, tryTransformAgentToolCall, } from './tool-executor' +import { createToolCallIdGenerator } from '../util/tool-call-id' import { withSystemTags } from '../util/messages' import type { CustomToolCall, ExecuteToolCallParams } from './tool-executor' @@ -91,6 +91,7 @@ export async function processStream( const toolCalls: (CodebuffToolCall | CustomToolCall)[] = [] const toolCallsToAddToMessageHistory: (CodebuffToolCall | CustomToolCall)[] = [] const assistantMessages: Message[] = [] + const getToolCallId = createToolCallIdGenerator(params.messages) let hadToolCallError = false const errorMessages: Message[] = [] const { promise: streamDonePromise, resolve: resolveStreamDonePromise } = @@ -137,7 +138,6 @@ export async function processStream( if (signal.aborted) { return } - const toolCallId = generateCompactId() const isNativeTool = toolNames.includes(toolName as ToolName) // Check if this is an agent tool call that should be transformed to spawn_agents @@ -160,19 +160,20 @@ export async function processStream( // Determine which executor to use and with what parameters let toolPromise: Promise if (isNativeTool || transformed) { + const effectiveToolName = transformed + ? transformed.toolName + : (toolName as ToolName) // Use executeToolCall for native tools or transformed agent calls toolPromise = executeToolCall({ ...params, - toolName: transformed - ? transformed.toolName - : (toolName as ToolName), + toolName: effectiveToolName, input: transformed ? transformed.input : input, fromHandleSteps: false, fileProcessingState, fullResponse: fullResponseChunks.join(''), previousToolCallFinished: previousPromise, - toolCallId, + toolCallId: getToolCallId(effectiveToolName), toolCalls, toolCallsToAddToMessageHistory, toolResults, @@ -191,7 +192,7 @@ export async function processStream( fileProcessingState, fullResponse: fullResponseChunks.join(''), previousToolCallFinished: previousPromise, - toolCallId, + toolCallId: getToolCallId(toolName), toolCalls, toolCallsToAddToMessageHistory, toolResults, diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 303765ea7d..60993a0223 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -1,12 +1,12 @@ import { endsAgentStepParam, toolNames } from '@codebuff/common/tools/constants' import { toolParams } from '@codebuff/common/tools/list' -import { generateCompactId } from '@codebuff/common/util/string' import { cloneDeep } from 'lodash' import { getMCPToolData } from '../mcp' import { MCP_TOOL_SEPARATOR } from '../mcp-constants' import { getAgentShortName, getAgentToolName } from '../templates/prompts' import { formatValueForError } from '../util/format-value' +import { createToolCallIdGenerator } from '../util/tool-call-id' import { codebuffToolHandlers } from './handlers/list' import { getMatchingSpawn } from './handlers/tool/spawn-agent-utils' import { getAgentTemplate } from '../templates/agent-registry' @@ -308,7 +308,9 @@ export async function executeToolCall( onResponseChunk, requestToolCall, } = params - const toolCallId = params.toolCallId ?? generateCompactId() + const toolCallId = + params.toolCallId ?? + createToolCallIdGenerator(agentState.messageHistory, toolCalls)(toolName) const toolCall: CodebuffToolCall | ToolCallError = parseRawToolCall({ rawToolCall: { @@ -640,7 +642,11 @@ export async function executeCustomToolCall( }), rawToolCall: { toolName, - toolCallId: toolCallId ?? generateCompactId(), + toolCallId: + toolCallId ?? + createToolCallIdGenerator(agentState.messageHistory, toolCalls)( + toolName, + ), input, }, autoInsertEndStepParam, diff --git a/packages/agent-runtime/src/util/__tests__/tool-call-id.test.ts b/packages/agent-runtime/src/util/__tests__/tool-call-id.test.ts new file mode 100644 index 0000000000..21a150f639 --- /dev/null +++ b/packages/agent-runtime/src/util/__tests__/tool-call-id.test.ts @@ -0,0 +1,63 @@ +import { assistantMessage } from '@codebuff/common/util/messages' +import { describe, expect, it } from 'bun:test' + +import { + countToolCallsByName, + createToolCallIdGenerator, + formatToolCallId, +} from '../tool-call-id' + +describe('tool call ids', () => { + it('formats ids with the tool name and per-tool invocation index', () => { + expect(formatToolCallId('glob', 0)).toBe('functions.glob:0') + }) + + it('seeds per-tool counters from existing message history', () => { + const messages = [ + assistantMessage({ + type: 'tool-call', + toolName: 'glob', + toolCallId: 'functions.glob:0', + input: { pattern: '**/*.ts' }, + }), + assistantMessage({ + type: 'tool-call', + toolName: 'read_files', + toolCallId: 'functions.read_files:0', + input: { paths: ['src/index.ts'] }, + }), + assistantMessage({ + type: 'tool-call', + toolName: 'glob', + toolCallId: 'functions.glob:1', + input: { pattern: '**/*.tsx' }, + }), + ] + + expect(countToolCallsByName(messages)).toEqual( + new Map([ + ['glob', 2], + ['read_files', 1], + ]), + ) + + const getToolCallId = createToolCallIdGenerator(messages) + + expect(getToolCallId('glob')).toBe('functions.glob:2') + expect(getToolCallId('glob')).toBe('functions.glob:3') + expect(getToolCallId('read_files')).toBe('functions.read_files:1') + }) + + it('can seed counters from pending tool calls', () => { + const getToolCallId = createToolCallIdGenerator([], [ + { + toolName: 'glob', + }, + { + toolName: 'glob', + }, + ]) + + expect(getToolCallId('glob')).toBe('functions.glob:2') + }) +}) diff --git a/packages/agent-runtime/src/util/tool-call-id.ts b/packages/agent-runtime/src/util/tool-call-id.ts new file mode 100644 index 0000000000..bfa64f1506 --- /dev/null +++ b/packages/agent-runtime/src/util/tool-call-id.ts @@ -0,0 +1,48 @@ +import type { Message } from '@codebuff/common/types/messages/codebuff-message' + +const TOOL_CALL_ID_PREFIX = 'functions' +type ToolCallLike = { toolName: string } + +export function formatToolCallId(toolName: string, index: number): string { + return `${TOOL_CALL_ID_PREFIX}.${toolName}:${index}` +} + +export function countToolCallsByName( + messages: Message[], + pendingToolCalls: ToolCallLike[] = [], +): Map { + const counts = new Map() + + for (const message of messages) { + if (message.role !== 'assistant') { + continue + } + + for (const part of message.content) { + if (part.type !== 'tool-call') { + continue + } + + counts.set(part.toolName, (counts.get(part.toolName) ?? 0) + 1) + } + } + + for (const toolCall of pendingToolCalls) { + counts.set(toolCall.toolName, (counts.get(toolCall.toolName) ?? 0) + 1) + } + + return counts +} + +export function createToolCallIdGenerator( + messages: Message[], + pendingToolCalls: ToolCallLike[] = [], +) { + const counts = countToolCallsByName(messages, pendingToolCalls) + + return (toolName: string): string => { + const index = counts.get(toolName) ?? 0 + counts.set(toolName, index + 1) + return formatToolCallId(toolName, index) + } +} diff --git a/sdk/src/__tests__/code-search.test.ts b/sdk/src/__tests__/code-search.test.ts index 2e4d27fcd0..2cad255613 100644 --- a/sdk/src/__tests__/code-search.test.ts +++ b/sdk/src/__tests__/code-search.test.ts @@ -51,7 +51,9 @@ describe('codeSearch', () => { const result = await searchPromise expect(result[0].type).toBe('json') const value = asCodeSearchResult(result[0]) + expect(value.stdout).toContain('Found 3 matches') expect(value.stdout).toContain('file1.ts:') + expect(value.stdout).toContain(' Line 1: import foo from "bar"') expect(value.stdout).toContain('file2.ts:') }) }) @@ -81,6 +83,8 @@ describe('codeSearch', () => { expect(result[0].type).toBe('json') const value = asCodeSearchResult(result[0]) + expect(value.stdout).toContain('Found 2 matches') + // Should contain match lines expect(value.stdout).toContain('import { env } from "./config"') expect(value.stdout).toContain('import env from "process"') @@ -104,7 +108,11 @@ describe('codeSearch', () => { createRgJsonContext('app.ts', 1, 'import React from "react"'), createRgJsonContext('app.ts', 2, ''), createRgJsonMatch('app.ts', 3, 'export const main = () => {}'), - createRgJsonContext('utils.ts', 8, 'function validateInput(x: string) {'), + createRgJsonContext( + 'utils.ts', + 8, + 'function validateInput(x: string) {', + ), createRgJsonContext('utils.ts', 9, ' return x.length > 0'), createRgJsonMatch('utils.ts', 10, 'export function helper() {}'), ].join('\n') @@ -343,6 +351,28 @@ describe('codeSearch', () => { } }) + it('should not report truncation when matches exactly equal maxResults', async () => { + const searchPromise = codeSearch({ + projectPath: '/test/project', + pattern: 'test', + maxResults: 2, + }) + + const output = [ + createRgJsonMatch('file.ts', 1, 'test 1'), + createRgJsonMatch('file.ts', 2, 'test 2'), + ].join('\n') + + mockProcess.stdout.emit('data', Buffer.from(output)) + mockProcess.emit('close', 0) + + const result = await searchPromise + const value = asCodeSearchResult(result[0]) + + expect(value.stdout).toContain('Found 2 matches') + expect(value.stdout).not.toContain('Results limited') + }) + it('should respect globalMaxResults with context lines', async () => { const searchPromise = codeSearch({ projectPath: '/test/project', @@ -447,8 +477,7 @@ describe('codeSearch', () => { const result = await searchPromise const value = asCodeSearchResult(result[0]) - // formatCodeSearchOutput returns 'No results' for empty input - expect(value.stdout).toBe('No results') + expect(value.stdout).toBe('Found 0 matches') }) }) @@ -544,7 +573,13 @@ describe('codeSearch', () => { // Generate matches with long content to quickly exceed output size const matches: string[] = [] for (let i = 0; i < 20; i++) { - matches.push(createRgJsonMatch('file.ts', i, `test line ${i} with some content that is quite long to fill up the buffer quickly`)) + matches.push( + createRgJsonMatch( + 'file.ts', + i, + `test line ${i} with some content that is quite long to fill up the buffer quickly`, + ), + ) } const output = matches.join('\n') @@ -559,8 +594,8 @@ describe('codeSearch', () => { const matchCount = (value.stdout!.match(/test line \d+/g) || []).length expect(matchCount).toBeLessThan(20) // Should indicate truncation happened - const hasTruncationMessage = - value.stdout!.includes('truncated') || + const hasTruncationMessage = + value.stdout!.includes('truncated') || value.stdout!.includes('limit reached') || value.stdout!.includes('Output size limit') expect(hasTruncationMessage).toBe(true) @@ -616,7 +651,7 @@ describe('codeSearch', () => { expect(result[0].type).toBe('json') const value = asCodeSearchResult(result[0]) expect(value.stdout).toContain('file.ts:') - + // Verify the args passed to spawn include the glob flag correctly expect(mockSpawn).toHaveBeenCalled() const spawnArgs = mockSpawn.mock.calls[0]![1] as string[] @@ -631,7 +666,11 @@ describe('codeSearch', () => { flags: '-g *.ts -g *.tsx', }) - const output = createRgJsonMatch('file.tsx', 1, 'import React from "react"') + const output = createRgJsonMatch( + 'file.tsx', + 1, + 'import React from "react"', + ) mockProcess.stdout.emit('data', Buffer.from(output)) mockProcess.emit('close', 0) @@ -640,11 +679,13 @@ describe('codeSearch', () => { expect(result[0].type).toBe('json') const value = asCodeSearchResult(result[0]) expect(value.stdout).toContain('file.tsx:') - + // Verify both glob patterns are passed correctly const spawnArgs = mockSpawn.mock.calls[0]![1] as string[] // Should have two -g flags, each followed by its pattern - const gFlagIndices = spawnArgs.map((arg, i) => arg === '-g' ? i : -1).filter(i => i !== -1) + const gFlagIndices = spawnArgs + .map((arg, i) => (arg === '-g' ? i : -1)) + .filter((i) => i !== -1) expect(gFlagIndices.length).toBe(2) expect(spawnArgs[gFlagIndices[0]! + 1]).toBe('*.ts') expect(spawnArgs[gFlagIndices[1]! + 1]).toBe('*.tsx') @@ -657,7 +698,11 @@ describe('codeSearch', () => { flags: "-g 'authentication.knowledge.md'", }) - const output = createRgJsonMatch('authentication.knowledge.md', 5, 'auth content') + const output = createRgJsonMatch( + 'authentication.knowledge.md', + 5, + 'auth content', + ) mockProcess.stdout.emit('data', Buffer.from(output)) mockProcess.emit('close', 0) @@ -721,13 +766,17 @@ describe('codeSearch', () => { flags: '-g *.ts -i -g *.tsx', }) - const output = createRgJsonMatch('file.tsx', 1, 'import React from "react"') + const output = createRgJsonMatch( + 'file.tsx', + 1, + 'import React from "react"', + ) mockProcess.stdout.emit('data', Buffer.from(output)) mockProcess.emit('close', 0) const result = await searchPromise - + // Verify flags are preserved in order without deduplication const spawnArgs = mockSpawn.mock.calls[0]![1] as string[] const flagsSection = spawnArgs.slice(0, spawnArgs.indexOf('--')) @@ -735,9 +784,9 @@ describe('codeSearch', () => { expect(flagsSection).toContain('*.ts') expect(flagsSection).toContain('-i') expect(flagsSection).toContain('*.tsx') - + // Count -g flags - should be 2, not deduplicated to 1 - const gCount = flagsSection.filter(arg => arg === '-g').length + const gCount = flagsSection.filter((arg) => arg === '-g').length expect(gCount).toBe(2) }) }) diff --git a/sdk/src/tools/code-search.ts b/sdk/src/tools/code-search.ts index 6bd656b6a4..2fa0286d5c 100644 --- a/sdk/src/tools/code-search.ts +++ b/sdk/src/tools/code-search.ts @@ -98,7 +98,10 @@ export function codeSearch({ const rgPath = getBundledRgPath(import.meta.url) if (logger) { - logger.info({ rgPath, args, searchCwd }, 'code-search: Spawning ripgrep process') + logger.info( + { rgPath, args, searchCwd }, + 'code-search: Spawning ripgrep process', + ) } const childProcess = spawn(rgPath, args, { cwd: searchCwd, @@ -111,6 +114,7 @@ export function codeSearch({ const fileGroups = new Map() // Track match count per file separately from total lines const fileMatchCounts = new Map() + const filesLimitedByMaxResults = new Set() let matchesGlobal = 0 let estimatedOutputLen = 0 let killedForLimit = false @@ -140,7 +144,7 @@ export function codeSearch({ const hardKill = () => { try { childProcess.kill('SIGTERM') - } catch { } + } catch {} // Store timeout reference so it can be cleared if process closes normally killTimeoutId = setTimeout(() => { try { @@ -148,12 +152,22 @@ export function codeSearch({ } catch { try { childProcess.kill() - } catch { } + } catch {} } killTimeoutId = null }, 1000) } + const formatCollectedOutput = (rawOutput: string) => + formatCodeSearchOutput(rawOutput, { + matchCount: matchesGlobal, + }) + + const truncateOutput = (output: string, maxLength: number) => + output.length > maxLength + ? output.substring(0, maxLength) + '\n\n[Output truncated]' + : output + const timeoutId = setTimeout(() => { if (isResolved) return hardKill() @@ -165,10 +179,10 @@ export function codeSearch({ } const partialOutput = collectedLines.join('\n') - const truncatedStdout = - partialOutput.length > 1000 - ? partialOutput.substring(0, 1000) + '\n\n[Output truncated]' - : partialOutput + const truncatedStdout = truncateOutput( + formatCollectedOutput(partialOutput), + 1000, + ) const truncatedStderr = stderrBuf.length > 1000 ? stderrBuf.substring(0, 1000) + '\n\n[Error output truncated]' @@ -228,6 +242,9 @@ export function codeSearch({ // For matches: only if we haven't hit the per-file limit // For context: always include (they don't count toward limit) const shouldInclude = !isMatch || fileMatchCount < maxResults + if (isMatch && !shouldInclude) { + filesLimitedByMaxResults.add(filePath) + } if (shouldInclude) { // Add the line to output @@ -253,13 +270,10 @@ export function codeSearch({ limitedLines.push(...lines) } const rawOutput = limitedLines.join('\n') - const formattedOutput = formatCodeSearchOutput(rawOutput) - - const finalOutput = - formattedOutput.length > maxOutputStringLength - ? formattedOutput.substring(0, maxOutputStringLength) + - '\n\n[Output truncated]' - : formattedOutput + const finalOutput = truncateOutput( + formatCollectedOutput(rawOutput), + maxOutputStringLength, + ) const limitReason = matchesGlobal >= globalMaxResults @@ -324,6 +338,13 @@ export function codeSearch({ !isMatch || (fileMatchCount < maxResults && matchesGlobal < globalMaxResults) + if ( + isMatch && + fileMatchCount >= maxResults && + matchesGlobal < globalMaxResults + ) { + filesLimitedByMaxResults.add(filePath) + } if (shouldInclude) { fileLines.push(formattedLine) @@ -335,10 +356,10 @@ export function codeSearch({ } } } - } catch { } + } catch {} } } - } catch { } + } catch {} // Build final output from collected matches const limitedLines: string[] = [] @@ -346,9 +367,7 @@ export function codeSearch({ for (const [filename, fileLines] of fileGroups) { limitedLines.push(...fileLines) - // Note if file was truncated (based on match count, not total lines) - const fileMatchCount = fileMatchCounts.get(filename) ?? 0 - if (fileMatchCount >= maxResults) { + if (filesLimitedByMaxResults.has(filename)) { truncatedFiles.push( `${filename}: limited to ${maxResults} results per file`, ) @@ -374,20 +393,17 @@ export function codeSearch({ rawOutput += `\n\n[${truncationMessages.join('\n\n')}]` } - const formattedOutput = formatCodeSearchOutput(rawOutput) - // Truncate output to prevent memory issues - const truncatedStdout = - formattedOutput.length > maxOutputStringLength - ? formattedOutput.substring(0, maxOutputStringLength) + - '\n\n[Output truncated]' - : formattedOutput + const truncatedStdout = truncateOutput( + formatCollectedOutput(rawOutput), + maxOutputStringLength, + ) const truncatedStderr = stderrBuf ? stderrBuf + - (stderrBuf.length >= Math.floor(maxOutputStringLength / 5) - ? '\n\n[Error output truncated]' - : '') + (stderrBuf.length >= Math.floor(maxOutputStringLength / 5) + ? '\n\n[Error output truncated]' + : '') : '' settle({