diff --git a/scripts/generate-benchmark-matrix.ts b/scripts/generate-benchmark-matrix.ts index 53167a0..b032e56 100644 --- a/scripts/generate-benchmark-matrix.ts +++ b/scripts/generate-benchmark-matrix.ts @@ -2,17 +2,11 @@ import { Agent } from "~/agents/index.js"; import { Task } from "~/src/tasks/index.js"; +// Note: Models are no longer hardcoded per agent. +// This script now generates an empty matrix since models should be specified externally. const agents = Agent.list(); const tasks = await Task.listNames(); -const include = tasks.flatMap((task) => - agents.flatMap((agent) => - agent.models.map((model) => ({ - eval: task, - model, - agent: agent.name, - })), - ), -); +const include: any[] = []; const matrix = JSON.stringify({ include }); process.stdout.write(matrix); diff --git a/scripts/sync-workflow-inputs.ts b/scripts/sync-workflow-inputs.ts index fedc4e2..a70102c 100644 --- a/scripts/sync-workflow-inputs.ts +++ b/scripts/sync-workflow-inputs.ts @@ -38,19 +38,14 @@ async function main(): Promise { const workflowContent = readFileSync(workflowPath, "utf8"); const workflow = YAML.parse(workflowContent); - // Get all available agent:model combinations + // Note: Models are no longer hardcoded per agent. + // This script now generates empty inputs since models should be specified externally. const agents = Agent.list(); const combinations: Array<{ agent: string; model: string }> = []; - for (const agent of agents) { - for (const model of agent.models) { - combinations.push({ agent: agent.name, model }); - } - } - + // Models are no longer hardcoded, so combinations list will be empty if (combinations.length === 0) { - console.error("No agent:model combinations found"); - process.exit(1); + console.log("No hardcoded agent:model combinations (models are now dynamic)"); } // Build new inputs diff --git a/src/agents/claude-code.ts b/src/agents/claude-code.ts index 8b4494a..9d913c9 100644 --- a/src/agents/claude-code.ts +++ b/src/agents/claude-code.ts @@ -8,14 +8,6 @@ import { Logger } from "../util/logger.js"; const sessionCache = new Map(); -export const models: string[] = [ - "claude-sonnet-4-5", - "claude-opus-4-5", - // "claude-sonnet-4", - // "claude-opus-4-1", - // "claude-3-5-haiku", -]; - function sessionKey(model: string, cwd: string): string { return `${cwd}::${model}`; } diff --git a/src/agents/codex.ts b/src/agents/codex.ts index 8d37638..0c46467 100644 --- a/src/agents/codex.ts +++ b/src/agents/codex.ts @@ -16,14 +16,6 @@ const DEFAULT_SANDBOX: SandboxMode = "workspace-write"; const codexClient = new Codex(); const threadCache = new Map(); -export const models = [ - "gpt-5-codex", - "gpt-5.1-codex", - // "gpt-5", - // "o3", - // "o4-mini" -] as const; - function sessionKey(model: string, cwd: string): string { return `${cwd}::${model}`; } @@ -67,7 +59,7 @@ function getOrCreateThread(model: string, cwd: string): Thread { return thread; } -const codexAgent: Agent.Definition<(typeof models)[number]> = { +const codexAgent: Agent.Definition = { async run(model, prompt, options) { options.logger.log( `codex-sdk --model ${model} --sandbox ${DEFAULT_SANDBOX} ${prompt}`, diff --git a/src/agents/index.ts b/src/agents/index.ts index eecf837..6dd46e2 100644 --- a/src/agents/index.ts +++ b/src/agents/index.ts @@ -46,7 +46,6 @@ export namespace Agent { export interface Registration { name: string; definition: Definition; - models: ReadonlyArray; } const agents: Record> = { @@ -60,16 +59,13 @@ export namespace Agent { name: string, module: { default?: Definition; - models?: ReadonlyArray; }, ): Registration { const definition = module.default; - const models = module.models; assert(definition, `Agent module ${name} is missing a default export.`); - assert(models, `Agent module ${name} is missing the exported models list.`); - return { name, definition, models }; + return { name, definition }; } export function get(name: string): Registration { @@ -78,13 +74,6 @@ export namespace Agent { return agent; } - export function validateModel(agent: Registration, model: string) { - if (!agent.models.find((entry) => entry === model)) - throw new Error( - `Model ${model} is not registered for agent ${agent.name}.`, - ); - } - export function list() { return Object.values(agents); } diff --git a/src/agents/opencode.ts b/src/agents/opencode.ts index d97b321..4fff763 100644 --- a/src/agents/opencode.ts +++ b/src/agents/opencode.ts @@ -36,20 +36,6 @@ const opencode = await createOpencode({ const sessionCache = new Map(); -export const models: string[] = [ - "opencode/gpt-5-codex", - "opencode/gpt-5.1-codex", - "opencode/claude-sonnet-4-5", - "opencode/claude-opus-4-5", - "opencode/glm-4.6", - "opencode/glm-4.7-free", - "opencode/gemini-3-pro", - "opencode/qwen3-coder", - "opencode/kimi-k2", - "opencode/grok-code", - "opencode/alpha-gd4", -]; - function sessionKey(model: string, cwd: string): string { return `${cwd}::${model}`; } diff --git a/src/eval.ts b/src/eval.ts index 1bd3cce..51e158a 100644 --- a/src/eval.ts +++ b/src/eval.ts @@ -47,7 +47,6 @@ export namespace Eval { }, ) { const agent = Agent.get(agentName); - Agent.validateModel(agent, modelId); const task = await Task.get(taskId); const cwd = await mkdtemp(join(tmpdir(), "openreval-")); $.cwd(cwd);