diff --git a/.github/workflows/qualops.yml b/.github/workflows/qualops.yml new file mode 100644 index 0000000..3247e57 --- /dev/null +++ b/.github/workflows/qualops.yml @@ -0,0 +1,23 @@ +name: QualOps Review + +on: + pull_request: + branches: [main] + +permissions: + contents: read + pull-requests: write + checks: write + +jobs: + review: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: eggai-tech/qualops@v0.2.1 + with: + anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.qualops/.qualopsrc.json b/.qualops/.qualopsrc.json new file mode 100644 index 0000000..844c2ff --- /dev/null +++ b/.qualops/.qualopsrc.json @@ -0,0 +1,15 @@ +{ + "ai": { + "reviewStage": { + "provider": "anthropic", + "model": "claude-sonnet-4-20250514" + } + }, + "review": { + "minConfidence": 4 + }, + "github": { + "postComments": true, + "maxInlineComments": 50 + } +} diff --git a/README.md b/README.md index 51459c7..cc9ac96 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,6 @@ Providers: Anthropic, OpenAI, Google, and any OpenAI-compatible endpoint ```bash pnpm install export ANTHROPIC_API_KEY=... # or OPENAI_API_KEY / GOOGLE_GENERATIVE_AI_API_KEY -export TAVILY_API_KEY=... # only if websearch tool is enabled CONFIG_PATH=./example.config.yaml pnpm dev ``` @@ -51,7 +50,7 @@ model: agent: maxSteps: 10 # hard cap on the tool-use loop -mcpTools: # external MCP servers, none bundled +mcpTools: # external MCP servers (none bundled — see Built-in tools below) - name: accounts transport: stdio command: accounts-mcp @@ -86,6 +85,16 @@ output: # required: [answer] ``` +## Built-in tools + +The agent always has access to one built-in tool regardless of `mcpTools` configuration: + +| Tool | Purpose | +|------|---------| +| `todowrite` | Maintains an in-memory todo list for the duration of a single run. Each call **replaces** the entire list. Use it to break complex requests into steps and track progress (`pending` → `in_progress` → `completed`). The store is reset between requests. | + +All other tools are provided externally via MCP servers configured under `mcpTools`. + ## HTTP API | Route | Method | Purpose | diff --git a/src/agent/loop.ts b/src/agent/loop.ts index 0c59498..b2f8a51 100644 --- a/src/agent/loop.ts +++ b/src/agent/loop.ts @@ -17,6 +17,7 @@ import { renderSystemPrompt } from './prompt.js'; import { maybeCompactMessages } from './safety/compaction.js'; import { buildMcpRegistry, wrapToolsWithSummarization } from './tools/mcp.js'; +import { createTodoStore, createTodoWriteTool } from './tools/todowrite.js'; export type { AgentEmitter, AgentEvent } from './events.js'; @@ -60,7 +61,12 @@ export async function runAgent( rawTools = registry.tools; cleanup = registry.cleanup; } - const tools = wrapToolsWithSummarization(rawTools, { config, summarize }); + const mcpTools = wrapToolsWithSummarization(rawTools, { config, summarize }); + const todoStore = createTodoStore(); + const tools = { + ...mcpTools, + todowrite: createTodoWriteTool(todoStore), + }; try { let finishReason: FinishReason | 'unknown' = 'unknown'; diff --git a/src/agent/tools/todowrite.ts b/src/agent/tools/todowrite.ts new file mode 100644 index 0000000..0f2ba41 --- /dev/null +++ b/src/agent/tools/todowrite.ts @@ -0,0 +1,69 @@ +import { tool } from 'ai'; +import type { Tool } from 'ai'; +import { z } from 'zod'; +import type { ToolResult } from '../events.js'; + +export type TodoStatus = 'pending' | 'in_progress' | 'completed'; + +export interface TodoItem { + content: string; + activeForm: string; + status: TodoStatus; +} + +export interface TodoStore { + todos: TodoItem[]; +} + +export function createTodoStore(): TodoStore { + return { todos: [] }; +} + +export function createTodoWriteTool(store: TodoStore): Tool { + const base = tool({ + description: + 'Manage a structured todo list to plan and track multi-step work within this run. ' + + 'Each call REPLACES the entire list. Use this to break complex requests into steps, ' + + 'then update status as you progress. Exactly one item should be in_progress at a time. ' + + 'Each todo has: `content` (imperative, e.g. "Add HTTP tool"), `activeForm` ' + + '(present-continuous shown while working, e.g. "Adding HTTP tool"), and `status` ' + + '(one of pending / in_progress / completed). Returns the updated list.', + inputSchema: z.object({ + todos: z + .array( + z.object({ + content: z.string().min(1).describe('Imperative description of the task'), + activeForm: z + .string() + .min(1) + .describe('Present-continuous form shown while working on the task'), + status: z.enum(['pending', 'in_progress', 'completed']), + }), + ) + .describe('The full replacement list of todos'), + }), + execute: async (args): Promise => { + const start = Date.now(); + store.todos = args.todos; + return { + label: 'todowrite', + status: 'succeeded', + content: JSON.stringify({ todos: store.todos, count: store.todos.length }, null, 2), + return_code: null, + args, + duration_ms: Date.now() - start, + }; + }, + }); + + return { + ...base, + toModelOutput(output: unknown) { + const env = output as ToolResult; + if (env?.status === 'error') { + return { type: 'error-text', value: env.content } as const; + } + return { type: 'text', value: env.content } as const; + }, + } as unknown as Tool; +} diff --git a/tests/todowrite.test.ts b/tests/todowrite.test.ts new file mode 100644 index 0000000..ab18974 --- /dev/null +++ b/tests/todowrite.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from 'vitest'; +import { createTodoStore, createTodoWriteTool } from '../src/agent/tools/todowrite.js'; + +describe('createTodoWriteTool', () => { + it('replaces the store with the provided list and returns the full list', async () => { + const store = createTodoStore(); + const tool = createTodoWriteTool(store); + + const execute = (tool as unknown as { execute: (args: unknown) => Promise }).execute; + const result = (await execute({ + todos: [ + { content: 'Step one', activeForm: 'Doing step one', status: 'in_progress' }, + { content: 'Step two', activeForm: 'Doing step two', status: 'pending' }, + ], + })) as { status: string; content: string }; + + expect(result.status).toBe('succeeded'); + const parsed = JSON.parse(result.content); + expect(parsed.count).toBe(2); + expect(parsed.todos[0].content).toBe('Step one'); + expect(store.todos).toHaveLength(2); + }); + + it('replaces the previous list on a second call', async () => { + const store = createTodoStore(); + const tool = createTodoWriteTool(store); + const execute = (tool as unknown as { execute: (args: unknown) => Promise }).execute; + + await execute({ todos: [{ content: 'Old', activeForm: 'Doing old', status: 'in_progress' }] }); + await execute({ todos: [] }); + + expect(store.todos).toHaveLength(0); + }); + + it('toModelOutput returns text for succeeded results', () => { + const tool = createTodoWriteTool(createTodoStore()); + const toModelOutput = ( + tool as unknown as { toModelOutput: (output: unknown) => { type: string; value: string } } + ).toModelOutput; + + const out = toModelOutput({ status: 'succeeded', content: '{"todos":[]}' }); + expect(out.type).toBe('text'); + expect(out.value).toBe('{"todos":[]}'); + }); + + it('toModelOutput returns error-text for error results', () => { + const tool = createTodoWriteTool(createTodoStore()); + const toModelOutput = ( + tool as unknown as { toModelOutput: (output: unknown) => { type: string; value: string } } + ).toModelOutput; + + const out = toModelOutput({ status: 'error', content: 'something went wrong' }); + expect(out.type).toBe('error-text'); + expect(out.value).toBe('something went wrong'); + }); +});