From 3910f03b5400f8013e78203de10eca8d4df1b308 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 08:23:06 +0000 Subject: [PATCH] feat: blind second-opinion tool for independent cross-model review Introduces the second-opinion tool (category: gemini) that sends a problem to Gemini without exposing any existing answer, then optionally compares the independent result with the orchestrator's own answer to surface agreements and divergences. Anti-anchoring guarantee is statically enforced: buildSolvePrompt accepts only problem (single-arg signature), making it a TypeScript compile error to accidentally pass an existing answer into the solve call. Injected-executor support makes the invariant hermetically testable without spawning subprocesses. --- docs/usage/second-opinion.md | 87 ++++++++++ src/constants.ts | 7 + src/tools/index.ts | 4 +- src/tools/second-opinion.tool.ts | 130 +++++++++++++++ src/utils/secondOpinion.ts | 91 ++++++++++ test/unit/tools/second-opinion.test.ts | 221 +++++++++++++++++++++++++ 6 files changed, 539 insertions(+), 1 deletion(-) create mode 100644 docs/usage/second-opinion.md create mode 100644 src/tools/second-opinion.tool.ts create mode 100644 src/utils/secondOpinion.ts create mode 100644 test/unit/tools/second-opinion.test.ts diff --git a/docs/usage/second-opinion.md b/docs/usage/second-opinion.md new file mode 100644 index 0000000..c275216 --- /dev/null +++ b/docs/usage/second-opinion.md @@ -0,0 +1,87 @@ +# Second Opinion (Blind Independent Review) + +The `second-opinion` tool sends a problem to Gemini and obtains a completely independent answer — one that is never shown the orchestrator's existing analysis. This prevents *anchoring bias*, where a model's output is unconsciously shaped by a prior answer it was shown. + +## Why anchoring matters + +When a model is shown an existing answer before being asked to evaluate or improve it, it tends to: + +- Adopt the framing and assumptions of the prior answer uncritically +- Miss alternative approaches that the first answer did not consider +- Agree with the prior answer even when it contains errors + +By hiding the orchestrator's answer from the independent solve step, the `second-opinion` tool ensures the second perspective is genuinely fresh. + +## How it works + +1. **Blind solve** — The problem text is sent to Gemini with a prompt that instructs it to reason from first principles. The orchestrator's own answer is *not* included in this call, regardless of whether one is provided. + +2. **Optional comparison** — If `ownAnswer` is provided and `compare` is `true` (the default), a second call compares the two answers and lists agreements and divergences. This comparison step can freely see both answers because the independent answer is already locked in. + +## Usage + +### Independent answer only + +```json +{ + "tool": "second-opinion", + "problem": "What database indexing strategy should we use for a write-heavy time-series workload?" +} +``` + +The tool returns the independent answer under a `## Independent answer` heading. + +### With divergence comparison + +```json +{ + "tool": "second-opinion", + "problem": "What database indexing strategy should we use for a write-heavy time-series workload?", + "ownAnswer": "We should use a B-tree index on the timestamp column and partition by month.", + "compare": true +} +``` + +The tool returns the independent answer and then a `## Points of divergence` section that lists where the two answers agree or differ and which position is better supported. + +### Skipping the comparison + +Set `compare: false` to obtain only the independent answer even when `ownAnswer` is provided. This is useful when you want the raw independent perspective without the comparison overhead. + +```json +{ + "tool": "second-opinion", + "problem": "Explain the tradeoffs between eventual and strong consistency.", + "ownAnswer": "Strong consistency is always safer.", + "compare": false +} +``` + +## Parameters + +| Parameter | Type | Required | Default | Description | +|-------------|---------|----------|----------------|-------------| +| `problem` | string | yes | — | The problem or question to be answered independently. Must contain only the problem — no existing answer. | +| `ownAnswer` | string | no | — | The orchestrator's own answer. Used only in the optional compare step; never forwarded to the solve call. | +| `model` | string | no | gemini-2.5-pro | Gemini model to use for both calls. | +| `compare` | boolean | no | `true` | Whether to run the divergence comparison when `ownAnswer` is provided. | + +## Output format + +``` +## Independent answer + + + +--- + +## Points of divergence + + +``` + +The `## Points of divergence` section is omitted if `ownAnswer` was not provided or `compare` is `false`. + +## Anti-anchoring guarantee + +The `buildSolvePrompt` function — which constructs the prompt for the independent solve call — accepts only the `problem` string. It has no parameter for an existing answer. This is enforced both by the TypeScript type signature and by the tool's execution flow, where `ownAnswer` is explicitly kept out of the first executor call and is only passed to `buildComparePrompt` in the second call. diff --git a/src/constants.ts b/src/constants.ts index fc5eb9a..815cca5 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -11,6 +11,13 @@ export const ERROR_MESSAGES = { NO_PROMPT_PROVIDED: "Please provide a prompt for analysis. Use @ syntax to include files (e.g., '@largefile.js explain what this does') or ask general questions", } as const; +// Second-opinion tool messages +export const SECOND_OPINION_MESSAGES = { + SOLVE_START: 'Requesting independent solution (anti-anchoring mode)...', + COMPARE_START: 'Comparing answers for points of divergence...', + NO_PROBLEM_PROVIDED: 'A non-empty problem description is required for the second-opinion tool.', +} as const; + // Status messages export const STATUS_MESSAGES = { QUOTA_SWITCHING: "🚫 Gemini 2.5 Pro quota exceeded, switching to Flash model...", diff --git a/src/tools/index.ts b/src/tools/index.ts index 0d8419f..7afee9c 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -5,13 +5,15 @@ import { pingTool, helpTool } from './simple-tools.js'; import { brainstormTool } from './brainstorm.tool.js'; import { fetchChunkTool } from './fetch-chunk.tool.js'; import { timeoutTestTool } from './timeout-test.tool.js'; +import { secondOpinionTool } from './second-opinion.tool.js'; toolRegistry.push( askGeminiTool, pingTool, helpTool, brainstormTool, - fetchChunkTool + fetchChunkTool, + secondOpinionTool ); // Only register test-only tools when explicitly enabled (e.g. judge/e2e test suite) diff --git a/src/tools/second-opinion.tool.ts b/src/tools/second-opinion.tool.ts new file mode 100644 index 0000000..8c7ac6e --- /dev/null +++ b/src/tools/second-opinion.tool.ts @@ -0,0 +1,130 @@ +import { z } from 'zod'; +import { UnifiedTool } from './registry.js'; +import { Logger } from '../utils/logger.js'; +import { executeGeminiCLI } from '../utils/geminiExecutor.js'; +import { + buildSolvePrompt, + buildComparePrompt, + formatResult, +} from '../utils/secondOpinion.js'; +import { STATUS_MESSAGES } from '../constants.js'; + +/** + * Type signature for an executor function compatible with executeGeminiCLI. + * Accepting an injected executor makes the anti-anchoring invariant testable + * without spawning real subprocesses. + */ +export type GeminiExecutor = ( + prompt: string, + model?: string, + sandbox?: boolean, + changeMode?: boolean, + onProgress?: (output: string) => void +) => Promise; + +const secondOpinionArgsSchema = z.object({ + problem: z + .string() + .min(1) + .describe( + 'The problem or question to be answered independently. Must not include any existing answer — state only the problem.' + ), + ownAnswer: z + .string() + .optional() + .describe( + "The orchestrator's own answer to the problem. Provided only for the optional divergence comparison step — it is NEVER forwarded to the independent solve call." + ), + model: z + .string() + .optional() + .describe( + "Optional Gemini model to use (e.g., 'gemini-2.5-flash'). Defaults to gemini-2.5-pro." + ), + compare: z + .boolean() + .default(true) + .describe( + 'When true (default) and ownAnswer is provided, perform a divergence comparison after the independent solve.' + ), +}); + +/** + * Factory that produces the second-opinion UnifiedTool with a configurable + * executor. Production code uses the default (executeGeminiCLI). Tests inject + * a fake executor to capture prompts without spawning subprocesses. + */ +export function createSecondOpinionTool( + executor: GeminiExecutor = executeGeminiCLI +): UnifiedTool { + return { + name: 'second-opinion', + description: + 'Obtain a blind, independent Gemini answer to a problem without exposing any existing answer (anti-anchoring). Optionally compare the independent answer with the orchestrator\'s own answer to surface agreements and divergences.', + zodSchema: secondOpinionArgsSchema, + prompt: { + description: + 'Obtain an independent second opinion on a problem, then optionally compare it with an existing answer to identify divergences.', + }, + category: 'gemini', + + execute: async (args, onProgress) => { + const { problem, ownAnswer, model, compare = true } = args; + + const problemStr = typeof problem === 'string' ? problem : String(problem ?? ''); + if (!problemStr.trim()) { + throw new Error( + 'A non-empty problem description is required for the second-opinion tool.' + ); + } + + // ── Step 1: Independent solve ────────────────────────────────────────── + // ANTI-ANCHORING: buildSolvePrompt only receives the problem. The + // ownAnswer value is not accessible to this call site at all. + const solvePrompt = buildSolvePrompt(problemStr); + + Logger.debug('second-opinion: requesting independent solution'); + onProgress?.(STATUS_MESSAGES.PROCESSING_START); + + const independentAnswer = await executor( + solvePrompt, + model as string | undefined, + false, + false, + onProgress + ); + + // ── Step 2: Optional divergence comparison ───────────────────────────── + let comparison: string | undefined; + + const ownAnswerStr = typeof ownAnswer === 'string' ? ownAnswer : undefined; + + if (ownAnswerStr && compare) { + Logger.debug('second-opinion: performing divergence comparison'); + onProgress?.('Comparing answers for points of divergence...'); + + const comparePrompt = buildComparePrompt( + problemStr, + ownAnswerStr, + independentAnswer + ); + + comparison = await executor( + comparePrompt, + model as string | undefined, + false, + false, + onProgress + ); + } + + return formatResult({ independentAnswer, comparison }); + }, + }; +} + +/** + * The production tool instance registered in the tool registry. + * Uses the real executeGeminiCLI executor. + */ +export const secondOpinionTool: UnifiedTool = createSecondOpinionTool(); diff --git a/src/utils/secondOpinion.ts b/src/utils/secondOpinion.ts new file mode 100644 index 0000000..a2c842d --- /dev/null +++ b/src/utils/secondOpinion.ts @@ -0,0 +1,91 @@ +/** + * Pure string-manipulation helpers for the blind second-opinion workflow. + * + * ANTI-ANCHORING GUARANTEE: + * buildSolvePrompt(problem) ONLY takes the problem description — no answer + * parameter exists — so the orchestrator's own answer can never leak into the + * independent solve call, even by accident. + */ + +/** + * Builds the prompt sent to the independent solver. + * + * HARD INVARIANT: this function signature intentionally accepts only `problem`. + * There is no second parameter for an existing answer. Any attempt to pass an + * existing answer at call-site would be a TypeScript compile error. This makes + * the anti-anchoring guarantee statically enforced. + */ +export function buildSolvePrompt(problem: string): string { + return `You are an independent expert providing a fresh solution to the following problem. Approach it from first principles without reference to any prior analysis. + +## Problem + +${problem} + +## Instructions + +- Reason through the problem independently and thoroughly. +- State your assumptions clearly. +- Provide a complete, well-structured answer. +- Do not hedge or truncate your response — give your full analysis.`; +} + +/** + * Builds the prompt used to compare the orchestrator's answer with the + * independently generated answer. + * + * This prompt is only executed AFTER the independent solve is complete, so it + * has no influence on the independent answer. + */ +export function buildComparePrompt( + problem: string, + ownAnswer: string, + independentAnswer: string +): string { + return `You are a neutral analyst comparing two independent answers to the same problem. Identify where they agree, where they diverge, and which (if any) divergences are substantive. + +## Problem + +${problem} + +## Answer A + +${ownAnswer} + +## Answer B + +${independentAnswer} + +## Instructions + +1. List key **points of agreement** between A and B. +2. List key **points of divergence** — focus on substantive differences in conclusions, recommendations, or reasoning, not merely phrasing. +3. For each divergence, briefly assess which position (if either) is better supported. +4. Conclude with an overall summary of alignment. + +Structure your output with clear headings.`; +} + +/** + * Formats the combined output as markdown. + * + * The "Independent answer" section is always present. The "Points of + * divergence" section is included only when a comparison was performed. + */ +export function formatResult({ + independentAnswer, + comparison, +}: { + independentAnswer: string; + comparison?: string; +}): string { + const sections: string[] = [ + `## Independent answer\n\n${independentAnswer.trim()}`, + ]; + + if (comparison !== undefined && comparison.trim().length > 0) { + sections.push(`## Points of divergence\n\n${comparison.trim()}`); + } + + return sections.join('\n\n---\n\n'); +} diff --git a/test/unit/tools/second-opinion.test.ts b/test/unit/tools/second-opinion.test.ts new file mode 100644 index 0000000..dafd1a3 --- /dev/null +++ b/test/unit/tools/second-opinion.test.ts @@ -0,0 +1,221 @@ +import { test, describe } from 'node:test'; +import assert from 'node:assert/strict'; + +import { + buildSolvePrompt, + buildComparePrompt, + formatResult, +} from '../../../src/utils/secondOpinion.js'; +import { createSecondOpinionTool } from '../../../src/tools/second-opinion.tool.js'; +import { toolExists, getToolDefinitions } from '../../../src/tools/index.js'; + +// --------------------------------------------------------------------------- +// Pure helpers: secondOpinion.ts +// --------------------------------------------------------------------------- + +describe('secondOpinion: buildSolvePrompt', () => { + test('only depends on problem — no answer parameter in signature', () => { + // TypeScript enforces single-arg at compile time; here we confirm the + // runtime output only reflects the problem. + const problem = 'How should we handle distributed transactions?'; + const prompt = buildSolvePrompt(problem); + assert.ok(prompt.includes(problem), 'prompt contains the problem text'); + }); + + test('does not contain any known-answer sentinel', () => { + const sentinel = 'ORCHESTRATOR_OWN_ANSWER_SENTINEL_XYZ_12345'; + const prompt = buildSolvePrompt('some problem'); + assert.ok( + !prompt.includes(sentinel), + 'buildSolvePrompt must not include any answer text' + ); + }); + + test('instructs the solver to reason independently', () => { + const prompt = buildSolvePrompt('explain caching strategies'); + assert.match(prompt, /independent/i); + }); +}); + +describe('secondOpinion: buildComparePrompt', () => { + test('includes problem, ownAnswer, and independentAnswer', () => { + const problem = 'What is the best sorting algorithm?'; + const ownAnswer = 'Quicksort for average cases.'; + const independentAnswer = 'Mergesort for stability guarantees.'; + const prompt = buildComparePrompt(problem, ownAnswer, independentAnswer); + assert.ok(prompt.includes(problem), 'contains problem'); + assert.ok(prompt.includes(ownAnswer), 'contains ownAnswer'); + assert.ok(prompt.includes(independentAnswer), 'contains independentAnswer'); + }); + + test('asks for agreement and divergence analysis', () => { + const prompt = buildComparePrompt('p', 'a', 'b'); + assert.match(prompt, /agree/i); + assert.match(prompt, /diverge/i); + }); +}); + +describe('secondOpinion: formatResult', () => { + test('renders "Independent answer" section always', () => { + const out = formatResult({ independentAnswer: 'The answer is 42.' }); + assert.match(out, /## Independent answer/); + assert.ok(out.includes('The answer is 42.')); + }); + + test('omits "Points of divergence" when comparison is absent', () => { + const out = formatResult({ independentAnswer: 'Answer here.' }); + assert.ok(!out.includes('Points of divergence')); + }); + + test('includes "Points of divergence" section when comparison is provided', () => { + const out = formatResult({ + independentAnswer: 'Answer A.', + comparison: 'They agree on X but differ on Y.', + }); + assert.match(out, /## Points of divergence/); + assert.ok(out.includes('They agree on X but differ on Y.')); + }); + + test('omits "Points of divergence" when comparison is an empty string', () => { + const out = formatResult({ independentAnswer: 'Answer.', comparison: '' }); + assert.ok(!out.includes('Points of divergence')); + }); +}); + +// --------------------------------------------------------------------------- +// ANTI-ANCHORING INVARIANT — core correctness test +// --------------------------------------------------------------------------- + +describe('second-opinion tool: anti-anchoring invariant', () => { + test('the SOLVE call never receives ownAnswer — even with a distinctive sentinel', async () => { + const SENTINEL = 'ORCHESTRATOR_ANSWER_SENTINEL_MUST_NOT_APPEAR_IN_SOLVE_1A2B3C'; + + const capturedSolvePrompts: string[] = []; + let callCount = 0; + + const fakeExecutor = async (prompt: string): Promise => { + callCount++; + // First call is the solve call; capture it for inspection. + if (callCount === 1) { + capturedSolvePrompts.push(prompt); + return 'Independent answer text.'; + } + // Second call is the compare call; just return something. + return 'Comparison text.'; + }; + + const tool = createSecondOpinionTool(fakeExecutor); + await tool.execute({ + problem: 'Describe the CAP theorem.', + ownAnswer: SENTINEL, + compare: true, + }); + + // Exactly one solve call must have been made. + assert.equal(capturedSolvePrompts.length, 1, 'exactly one solve call made'); + + // CRITICAL: the sentinel must NOT appear in the solve prompt. + assert.ok( + !capturedSolvePrompts[0].includes(SENTINEL), + `The solve prompt must not contain the orchestrator's own answer. Got:\n${capturedSolvePrompts[0]}` + ); + }); + + test('compare call DOES receive ownAnswer and independentAnswer', async () => { + const OWN = 'OWN_ANSWER_TEXT'; + const INDEPENDENT = 'INDEPENDENT_ANSWER_FROM_GEMINI'; + + const capturedPrompts: string[] = []; + + const fakeExecutor = async (prompt: string): Promise => { + capturedPrompts.push(prompt); + if (capturedPrompts.length === 1) return INDEPENDENT; + return 'Comparison result.'; + }; + + const tool = createSecondOpinionTool(fakeExecutor); + await tool.execute({ + problem: 'Explain eventual consistency.', + ownAnswer: OWN, + compare: true, + }); + + assert.equal(capturedPrompts.length, 2, 'solve + compare = 2 calls'); + const comparePrompt = capturedPrompts[1]; + assert.ok(comparePrompt.includes(OWN), 'compare prompt contains ownAnswer'); + assert.ok( + comparePrompt.includes(INDEPENDENT), + 'compare prompt contains independentAnswer' + ); + }); + + test('no compare call when compare=false', async () => { + let callCount = 0; + const fakeExecutor = async (): Promise => { + callCount++; + return 'answer'; + }; + + const tool = createSecondOpinionTool(fakeExecutor); + await tool.execute({ + problem: 'Any problem.', + ownAnswer: 'Some answer.', + compare: false, + }); + + assert.equal(callCount, 1, 'only the solve call is made when compare=false'); + }); + + test('no compare call when ownAnswer is absent', async () => { + let callCount = 0; + const fakeExecutor = async (): Promise => { + callCount++; + return 'answer'; + }; + + const tool = createSecondOpinionTool(fakeExecutor); + await tool.execute({ problem: 'Any problem.' }); + + assert.equal(callCount, 1, 'only the solve call is made when ownAnswer is absent'); + }); +}); + +// --------------------------------------------------------------------------- +// Tool integration: registry +// --------------------------------------------------------------------------- + +describe('second-opinion tool: registry', () => { + test('tool is registered under the name "second-opinion"', () => { + assert.equal(toolExists('second-opinion'), true); + }); + + test('schema requires problem; ownAnswer is optional', () => { + const defs = getToolDefinitions(); + const def = defs.find((d) => d.name === 'second-opinion'); + assert.ok(def, 'second-opinion definition found'); + const required = def!.inputSchema.required as string[]; + assert.ok(required.includes('problem'), '"problem" is required'); + assert.ok(!required.includes('ownAnswer'), '"ownAnswer" is not required'); + assert.ok(!required.includes('model'), '"model" is not required'); + assert.ok(!required.includes('compare'), '"compare" is not required'); + }); + + test('schema includes expected properties', () => { + const defs = getToolDefinitions(); + const def = defs.find((d) => d.name === 'second-opinion'); + const props = def!.inputSchema.properties as Record; + assert.ok('problem' in props); + assert.ok('ownAnswer' in props); + assert.ok('model' in props); + assert.ok('compare' in props); + }); + + test('formatResult output includes both sections when comparison provided', () => { + const result = formatResult({ + independentAnswer: 'Ind answer.', + comparison: 'Divergence details.', + }); + assert.match(result, /## Independent answer/); + assert.match(result, /## Points of divergence/); + }); +});