Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7002f01
Convert text responses to tool calls
charleslien Nov 25, 2025
c939465
input tools into ai sdk
charleslien Nov 25, 2025
1ecbe4e
do not convert cb tool messages
charleslien Nov 25, 2025
de15639
pass tool-call chunk through promptAiSdkStream
charleslien Nov 25, 2025
eae4f02
fork tool-stream-parser and do not parse text
charleslien Nov 25, 2025
619fc93
change example tool call format
charleslien Nov 25, 2025
532a3b4
remove tool instructions from prompts
charleslien Nov 25, 2025
908664e
remove debug message
charleslien Nov 25, 2025
e087689
have runProgrammaticStep use native tool calls
charleslien Nov 25, 2025
cf76b12
fix typecheck
charleslien Nov 25, 2025
6eff7d4
fix: cli uses parentAgentId to distinguish tools from subagents
jahooma Nov 26, 2025
17a4e57
For "last_message" output mode, return the entire assistant response,…
jahooma Nov 26, 2025
32197cb
fix stream parser bug by adding flush()
jahooma Nov 26, 2025
4487c4e
Fix file picker
jahooma Nov 26, 2025
45c91d0
fix typecheck
charleslien Nov 26, 2025
47e3bc5
Merge branch 'main' into charles/native-tools
jahooma Nov 26, 2025
576c44f
Remove spawn agents example tool call in old format
jahooma Nov 26, 2025
a23a698
Include stringified error for more detail
jahooma Nov 26, 2025
8a1ba97
update editor best of n max + add unit tests (not fully working yet tho)
jahooma Nov 26, 2025
9f9f464
Revert "Include stringified error for more detail"
jahooma Nov 26, 2025
71c285b
web: Pass open router errors through
jahooma Nov 26, 2025
d5a5381
fix cost-aggregation integration tests
charleslien Nov 26, 2025
ca0399f
fix typecheck?
charleslien Nov 26, 2025
839ef3e
editor: don't include spawn agents tool call so anthropic api doesn't…
jahooma Nov 26, 2025
e10ff72
fix typecheck for .agents
charleslien Nov 26, 2025
8994cb7
Validate handleSteps yield values with zod!
jahooma Nov 26, 2025
3d2a1db
fix common unit tests
charleslien Nov 26, 2025
567d661
work around readonly messageHistory
jahooma Nov 26, 2025
b08c8f0
tweak best of n selector to have the impl
jahooma Nov 26, 2025
72c617a
Implement STEP_TEXT within custom parsing within run-programmatic-ste…
jahooma Nov 27, 2025
3b59c87
require json output for tool results
charleslien Nov 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions .agents/__tests__/editor-best-of-n.integration.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
import { describe, expect, it } from 'bun:test'

import { CodebuffClient } from '@codebuff/sdk'

import type { PrintModeEvent } from '@codebuff/common/types/print-mode'

/**
* Integration tests for the editor-best-of-n-max agent.
* These tests verify that the best-of-n editor workflow works correctly:
* 1. Spawns multiple implementor agents in parallel
* 2. Collects their implementation proposals
* 3. Uses a selector agent to choose the best implementation
* 4. Applies the chosen implementation
*/
describe('Editor Best-of-N Max Agent Integration', () => {
it(
'should generate and select the best implementation for a simple edit',
async () => {
const apiKey = process.env[API_KEY_ENV_VAR]
if (!apiKey) {
throw new Error('API key not found')
}

// Create mock project files with a simple TypeScript file to edit
const projectFiles: Record<string, string> = {
'src/utils/math.ts': `
export function add(a: number, b: number): number {
return a + b
}

export function subtract(a: number, b: number): number {
return a - b
}
`,
'src/index.ts': `
import { add, subtract } from './utils/math'

console.log(add(1, 2))
console.log(subtract(5, 3))
`,
'package.json': JSON.stringify({
name: 'test-project',
version: '1.0.0',
dependencies: {},
}),
}

const client = new CodebuffClient({
apiKey,
cwd: '/tmp/test-best-of-n-project',
projectFiles,
})

const events: PrintModeEvent[] = []

// Run the editor-best-of-n-max agent with a simple task
// Using n=2 to keep the test fast while still testing the best-of-n workflow
const run = await client.run({
agent: 'editor-best-of-n-max',
prompt:
'Add a multiply function to src/utils/math.ts that takes two numbers and returns their product',
params: { n: 2 },
handleEvent: (event) => {
console.log(event)
events.push(event)
},
})

// The output should not be an error
expect(run.output.type).not.toEqual('error')

// Verify we got some output
expect(run.output).toBeDefined()

// The output should contain the implementation response
const outputStr =
typeof run.output === 'string' ? run.output : JSON.stringify(run.output)
console.log('Output:', outputStr)

// Should contain evidence of the multiply function being added
const relevantTerms = ['multiply', 'product', 'str_replace', 'write_file']
const foundRelevantTerm = relevantTerms.some((term) =>
outputStr.toLowerCase().includes(term.toLowerCase()),
)

expect(foundRelevantTerm).toBe(true)
},
{ timeout: 120_000 }, // 2 minute timeout for best-of-n workflow
)
})
Loading
Loading