From 855730e85033328343dfd294740d66995eb465de Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 13:55:52 -0700
Subject: [PATCH 1/8] =?UTF-8?q?feat:=20v1.2.0=20=E2=80=94=20pluggable=20ba?=
 =?UTF-8?q?ckends,=20approval=20mode,=20native=20sessions,=20Windows=20rel?=
 =?UTF-8?q?iability,=20timeouts,=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A feature release for the 1.2.0 line, on top of the 1.1.6 security patch. Hardens
cross-platform execution, adds an opt-in safety control and native multi-turn
sessions, makes the CLI backend pluggable ahead of the Gemini CLI retirement
(2026-06-18 -> Antigravity agy), and adds a real test suite.

- Backend abstraction (src/backends/): the Gemini CLI stays the default; add an
  experimental Antigravity CLI (agy) backend behind GEMINI_MCP_BACKEND, with a
  transcript-file fallback for agy's empty-stdout -p bug (Flash-only).
- Opt-in approval mode: approvalMode arg + GEMINI_MCP_APPROVAL_MODE env forward
  gemini --approval-mode. Not forced by default — defaulting to 'plan' turns
  headless gemini into an autonomous planner that breaks plain Q&A.
- Native multi-turn sessions: sessionId/resume forward gemini --session-id/--resume
  and the active session id is surfaced in the response.
- Windows executable resolution: GEMINI_CLI_PATH, then 'where gemini' preferring
  the .cmd shim; plus platform-aware ENOENT guidance.
- Per-command timeout (SIGTERM -> SIGKILL), GEMINI_MCP_TIMEOUT_MS (default 30m,
  0 disables); implements the previously-empty timeoutManager.
- Fix Help tool: 'gemini --help' (was '-help', mis-parsed by yargs as -h -e -l -p).
- Read server version from package.json at runtime (was hardcoded, stale at 1.1.4);
  engines >=18; prepare script for Git-checkout installs.
- Complex prompts (changeMode / @file) are sent on stdin instead of -p; windowsHide
  suppresses the popup console window on Windows.
- node:test suite + tsconfig.build.json so tests are type-checked but not shipped in dist.
---
 CHANGELOG.md                      |  23 ++++-
 SECURITY-REPORT-2026-05-28.md     |  92 +++++++++++++++++
 package.json                      |   9 +-
 scripts/run-tests.mjs             |  33 ++++++
 src/backends/agy.test.ts          |  26 +++++
 src/backends/agy.ts               | 138 ++++++++++++++++++++++++++
 src/backends/gemini.test.ts       |  61 ++++++++++++
 src/backends/gemini.ts            |  87 ++++++++++++++++
 src/backends/index.test.ts        |  16 +++
 src/backends/index.ts             |  26 +++++
 src/backends/types.ts             |  29 ++++++
 src/constants.ts                  |  31 +++++-
 src/index.ts                      |   9 +-
 src/tools/ask-gemini.tool.ts      |  39 +++++---
 src/tools/brainstorm.tool.ts      |   9 +-
 src/tools/simple-tools.ts         |   5 +-
 src/utils/commandExecutor.test.ts |  41 ++++++++
 src/utils/commandExecutor.ts      | 160 ++++++++++++++++++++++++------
 src/utils/geminiExecutor.test.ts  |  17 ++++
 src/utils/geminiExecutor.ts       |  87 ++++++----------
 src/utils/timeoutManager.test.ts  |  19 ++++
 src/utils/timeoutManager.ts       |  20 ++++
 tsconfig.build.json               |   4 +
 23 files changed, 871 insertions(+), 110 deletions(-)
 create mode 100644 SECURITY-REPORT-2026-05-28.md
 create mode 100644 scripts/run-tests.mjs
 create mode 100644 src/backends/agy.test.ts
 create mode 100644 src/backends/agy.ts
 create mode 100644 src/backends/gemini.test.ts
 create mode 100644 src/backends/gemini.ts
 create mode 100644 src/backends/index.test.ts
 create mode 100644 src/backends/index.ts
 create mode 100644 src/backends/types.ts
 create mode 100644 src/utils/commandExecutor.test.ts
 create mode 100644 src/utils/geminiExecutor.test.ts
 create mode 100644 src/utils/timeoutManager.test.ts
 create mode 100644 tsconfig.build.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3739f96..6d7ae52 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,28 @@
 # Changelog
 
+## [1.2.0] - 2026-05-30
+First feature release after the 1.1.6 security patch. Hardens cross-platform execution, adds an opt-in safety control and native multi-turn sessions, makes the CLI backend pluggable (ahead of Gemini CLI's retirement), and adds a real test suite.
+
+### Added
+- **Approval mode** — optional `approvalMode` argument on `ask-gemini`/`brainstorm` (and `GEMINI_MCP_APPROVAL_MODE` env), forwarding Gemini's `--approval-mode` (`default` / `auto_edit` / `yolo` / `plan`). Opt-in: when unset, behaviour is unchanged. Use `yolo` / `auto_edit` with `sandbox` to let Gemini run or edit; `plan` runs Gemini as an autonomous read-only planner.
+- **Native multi-turn sessions** — `sessionId` and `resume` arguments forward Gemini's `--session-id` / `--resume`; the active session id is surfaced in the response so a follow-up call can continue the conversation. Builds on #50; uses the CLI's own sessions rather than local transcript storage.
+- **Pluggable backends** — the executor is now backend-agnostic. The Gemini CLI stays the default; set `GEMINI_MCP_BACKEND=agy` to use the **experimental** Antigravity CLI (`agy`) backend, ahead of Gemini CLI's 2026-06-18 retirement for free/Pro/Ultra tiers. (agy print-mode is Flash-only, and its reply is recovered from agy's transcript files to work around the upstream `agy -p` empty-stdout bug.)
+- **Per-command timeout** — a hung CLI call is now terminated (SIGTERM → SIGKILL). Configurable via `GEMINI_MCP_TIMEOUT_MS` (default 30 minutes; `0` disables).
+- **Windows executable resolution** — honours `GEMINI_CLI_PATH`, otherwise resolves the real `gemini` shim via `where` (preferring `.cmd`), fixing "command not found" when the MCP server doesn't inherit your shell's PATH.
+- **Test suite** — `node:test` coverage for the `@file` security guard, Windows quoting/resolution, approval-mode and session argument building, backend selection, and timeout parsing (`npm test`).
+
+### Changed
+- `engines.node` raised to `>=18`.
+- The server version is now read from `package.json` at runtime, instead of a hardcoded string that had drifted to `1.1.4`.
+- Installing from a Git checkout now builds automatically via a `prepare` script.
+
+### Fixed
+- The `Help` tool now invokes `gemini --help` instead of `-help`, which yargs mis-parsed as `-h -e -l -p`.
+- Clearer, platform-aware guidance when the executable is not found (ENOENT), including the `GEMINI_CLI_PATH` hint.
+- Windows robustness: complex prompts (`changeMode` / `@file`) are sent to the Gemini CLI on **stdin** instead of the `-p` flag, sidestepping cmd.exe argument parsing and the OS command-line length limit; added `windowsHide` to suppress the popup console window. (#27, #77)
+
 ## [1.1.6] - 2026-05-30
-_Emergency security patch — the CVE-2026-0755 fix only, ahead of the larger 1.2.0 release._
+_Emergency security patch — the CVE-2026-0755 fix only, ahead of this 1.2.0 release._
 - Security fix: OS command-injection / `@file` exfiltration via prompt quoting in `geminiExecutor.ts` (CVE-2026-0755, CWE-78). Fixes #73 (and the literal-quote corruption in #66).
   - Removed the broken double-quote wrapping from both the primary and fallback paths. With `spawn` running `shell: false`, those quotes were passed as literal characters — they provided no protection and corrupted `@file` references. Windows `.cmd` argument quoting is hardened separately (see below).
   - Added `assertSafeFileReferences()`, which rejects any `@file` reference that resolves outside the project working directory (absolute paths, `~` home references, and `../` traversal), closing the arbitrary-file-read exfiltration vector while preserving legitimate in-project `@file` usage.
diff --git a/SECURITY-REPORT-2026-05-28.md b/SECURITY-REPORT-2026-05-28.md
new file mode 100644
index 0000000..8af60be
--- /dev/null
+++ b/SECURITY-REPORT-2026-05-28.md
@@ -0,0 +1,92 @@
+# Security Report — gemini-mcp-tool
+
+- **Date:** 2026-05-28
+- **Repository:** `jamubc/gemini-mcp-tool`
+- **Branch reviewed:** `security/cve-2026-0755` (PR #75)
+- **Scope:** All hand-written source under `src/`, plus declared npm dependencies.
+- **Method:** Manual code review + sink analysis (`child_process` / `fs` / network / `eval`), `npm audit` with runtime-vs-dev tree attribution, and a cross-check of open GitHub issues.
+
+> No security issue was filed today (2026-05-28). The most recent security report is **#73 (CVE-2026-0755)**, which is fixed on this branch (PR #75).
+
+---
+
+## Executive summary
+
+| Area         | Critical | High | Moderate | Low / Info |
+|--------------|:--------:|:----:|:--------:|:----------:|
+| Code         | 1 (fixed)| 0    | 0        | 4          |
+| Dependencies | 0        | 8*   | 15       | 2          |
+
+\* Only **2 of the 8 dependency HIGHs reach the published/runtime tree** (`@modelcontextprotocol/sdk`, and `tmp` via the unused `inquirer` dep). The other 6 HIGHs live exclusively in the docs/build toolchain (`vitepress`, `mermaid`, `archiver`) and are never installed for end users.
+
+---
+
+## Code findings
+
+### C1 — CVE-2026-0755: OS command-injection / `@file` exfiltration — **Critical — FIXED (PR #75)**
+`geminiExecutor.ts` wrapped any prompt containing `@` in literal `"` before passing it to `spawn` (`shell: false`), which injected literal quote characters and corrupted `@file` references, while leaving an arbitrary-file-read vector through the Gemini CLI's `@file` parser.
+
+**Fix (this branch):** removed the broken quoting from the primary and fallback paths; added `assertSafeFileReferences()` which rejects `@file` references that resolve outside the project working directory (absolute, `~`, and `../` traversal). The guard runs on the fully-processed prompt, so it also protects the `brainstorm` and `changeMode` code paths.
+
+### C2 — Windows `cmd.exe` variable expansion in prompts — **Low (Windows-only)**
+`commandExecutor.ts` uses `shell: true` on Windows and wraps whitespace/quote args in `"..."` (escaping `"`→`""`). `cmd.exe` still expands `%VAR%` **inside** double quotes, so a prompt containing e.g. `%USERNAME%` / `%PATH%` is substituted before reaching `gemini`. This is not a command-execution break-out, but it is a correctness + minor information-substitution issue. Unix is unaffected (`shell: false`).
+**Recommendation:** adopt the issue #62 approach — spawn `process.execPath` with the resolved `gemini.js` path and `shell: false` on Windows too — eliminating the shell (and the quoting fragility) entirely.
+
+### C3 — Verbose logging of full tool arguments / prompts — **Low / Informational**
+`logger.ts` logs raw args via `JSON.stringify` on every invocation (`Logger.toolInvocation`), and `Logger.debug` is wired to `console.warn`, so prompt bodies are written to stderr **regardless of any debug flag**. Prompts may contain pasted file contents or secrets; on shared hosts or captured MCP logs this is a disclosure risk.
+**Recommendation:** gate full-argument logging behind an explicit debug env var; avoid logging full prompt bodies at the default level.
+
+### C4 — Raw `error.message` returned to client — **Informational**
+`index.ts` returns `Error executing ${tool}: ${error.message}`. CLI/`fs` errors may embed absolute local paths. Low impact for a local stdio server; noted for completeness.
+
+### C5 — Unbounded lazy regex over model output — **Informational**
+`changeModeParser.ts` uses `[\s\S]*?` groups. Input is Gemini's *response* (model-controlled, not direct attacker network input), so ReDoS exposure is low. Acceptable today; revisit if these inputs ever become untrusted.
+
+### Positives observed
+- `commandExecutor.ts` uses `spawn` with `shell: false` on Unix and an args array — no shell injection.
+- #72 path-traversal hardening on `cacheKey` is solid: format regex (`/^[a-f0-9]{8}$/`) + `path.resolve` containment + removal of the silent `unlink` primitive.
+- All tool arguments are validated through `zod` before execution.
+- The server is **stdio-only** — there is no network listener by default.
+
+---
+
+## Dependency findings
+
+`npm audit`: **25 vulnerabilities (8 high, 15 moderate, 2 low)**. The published package ships only `dist/`, but its `dependencies` are installed transitively for every end user, so the runtime-vs-dev split below is what actually matters.
+
+### D1 — `@modelcontextprotocol/sdk@0.5.0` — **High — runtime, USED**
+- Advisories: ReDoS (high); "DNS-rebinding protection not enabled by default" (high).
+- **DNS rebinding does not apply** here: this server uses `StdioServerTransport`, not the Streamable-HTTP transport the advisory concerns.
+- ReDoS applies to SDK message handling; with a trusted local stdio client, exposure is limited but real.
+- `0.5.0` is far behind the current `1.x` line. **Upgrading is recommended but is a breaking API change** and will require edits to `index.ts`.
+
+### D2 — `inquirer@9.3.7` → `external-editor` → `tmp@0.0.33` — **High path traversal — runtime, UNUSED**
+- `inquirer`, `ai`, `chalk`, `d3-shape`, and `prismjs` are declared as runtime `dependencies` but are **not imported anywhere in `src/`**. They are still installed for every user, and `inquirer` drags in the HIGH `tmp` path-traversal advisory.
+- **Recommendation (high value, low effort):** remove these unused runtime deps. This eliminates the only runtime-tree HIGH besides the SDK and significantly shrinks install/attack surface. (Note: `package.json` references a `contribute` script at `src/contribute.ts` which does not exist in the tree — confirm nothing relies on these before removal.)
+
+### D3 — Docs/build toolchain HIGHs — **Not shipped, lower priority**
+All remaining HIGHs are confined to `devDependencies` and are not installed for end users or used by the running server:
+- `archiver` → `glob`, `minimatch`, `lodash`
+- `vitepress` → `rollup`, `vite`, `esbuild`, `preact`
+- `mermaid` → `dompurify`
+
+Patch opportunistically with `npm audit fix`, but these do not affect deployed MCP servers.
+
+---
+
+## Additional observations (full source-tree read)
+
+These do **not** affect the published npm package or the running MCP server (the docs site is built/deployed separately to GitHub Pages), but are noted for completeness:
+
+- **Docs site loads a third-party ad script.** `docs/.vitepress/theme/components/AdBanner.vue` injects `//cdn.carbonads.com/carbon.js` into the page `<head>`. It is currently an inert placeholder (`serve=YOUR_CARBON_ID`), but any third-party script on the docs origin is a supply-chain/privacy consideration. *(Informational — docs site only.)*
+- **`v-html` in `CodeBlock.vue`.** Renders Prism-highlighted output via `v-html`. Input is build-time-authored doc content and Prism escapes HTML, so this is not an exploitable XSS today. *(Informational — docs site only.)*
+- **Dead / duplicate files.** `src/utils/timeoutManager.ts` is effectively empty (1 line) and imported nowhere; `src/scripts/deploy-wiki.sh` is a byte-for-byte duplicate of `scripts/deploy-wiki.sh`. Housekeeping, not security — safe to remove.
+
+## Prioritized recommendations
+
+1. **Merge PR #75** — CVE-2026-0755 fix. *(Critical — done, pending merge.)*
+2. **Remove unused runtime deps** (`ai`, `chalk`, `d3-shape`, `inquirer`, `prismjs`) — removes the `tmp` HIGH from the shipped tree. *(High, low effort.)*
+3. **Plan `@modelcontextprotocol/sdk` 0.5 → 1.x upgrade.** *(High, breaking — needs code changes.)*
+4. **Gate verbose prompt/argument logging** behind a debug flag. *(Low.)*
+5. **Windows:** drop `shell: true` in favor of the node + `gemini.js` approach (issue #62) to remove `%VAR%` expansion and quoting fragility. *(Low.)*
+6. **`npm audit fix`** for the docs/build toolchain. *(Low.)*
diff --git a/package.json b/package.json
index cb1f096..6e5aa3c 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "gemini-mcp-tool",
-  "version": "1.1.6",
+  "version": "1.2.0",
   "description": "MCP server for Gemini CLI integration",
   "type": "module",
   "main": "dist/index.js",
@@ -8,10 +8,11 @@
     "gemini-mcp": "dist/index.js"
   },
   "scripts": {
-    "build": "tsc",
+    "build": "tsc -p tsconfig.build.json",
+    "prepare": "npm run build",
     "start": "node dist/index.js",
     "dev": "tsc && node dist/index.js",
-    "test": "echo \"No tests yet\" && exit 0",
+    "test": "node scripts/run-tests.mjs",
     "lint": "tsc --noEmit",
     "contribute": "tsx src/contribute.ts",
     "prepublishOnly": "echo '⚠️  Remember to test locally first!' && npm run build",
@@ -38,7 +39,7 @@
   },
   "homepage": "https://github.com/jamubc/gemini-mcp-tool#readme",
   "engines": {
-    "node": ">=16.0.0"
+    "node": ">=18.0.0"
   },
   "files": [
     "dist/",
diff --git a/scripts/run-tests.mjs b/scripts/run-tests.mjs
new file mode 100644
index 0000000..5ba7268
--- /dev/null
+++ b/scripts/run-tests.mjs
@@ -0,0 +1,33 @@
+#!/usr/bin/env node
+// Discover and run every *.test.ts under src/ with the built-in node:test
+// runner, using the tsx loader so the TypeScript sources run directly.
+import { spawnSync } from "node:child_process";
+import { readdirSync, statSync } from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = path.dirname(fileURLToPath(import.meta.url));
+const srcDir = path.join(scriptDir, "..", "src");
+
+function findTests(dir) {
+  const found = [];
+  for (const entry of readdirSync(dir)) {
+    const full = path.join(dir, entry);
+    if (statSync(full).isDirectory()) found.push(...findTests(full));
+    else if (entry.endsWith(".test.ts")) found.push(full);
+  }
+  return found;
+}
+
+const tests = findTests(srcDir);
+if (tests.length === 0) {
+  console.log("No test files found.");
+  process.exit(0);
+}
+
+const result = spawnSync(
+  process.execPath,
+  ["--import", "tsx", "--test", ...tests],
+  { stdio: "inherit" },
+);
+process.exit(result.status ?? 1);
diff --git a/src/backends/agy.test.ts b/src/backends/agy.test.ts
new file mode 100644
index 0000000..3e9418c
--- /dev/null
+++ b/src/backends/agy.test.ts
@@ -0,0 +1,26 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { buildAgyArgs } from "./agy.js";
+
+test("buildAgyArgs maps prompt, sessions, sandbox, and yolo", () => {
+  assert.deepEqual(buildAgyArgs("hi", {}), ["-p", "hi"]);
+  assert.deepEqual(buildAgyArgs("hi", { resume: "latest" }), ["--continue", "-p", "hi"]);
+  assert.deepEqual(buildAgyArgs("hi", { resume: "conv-1" }), [
+    "--conversation",
+    "conv-1",
+    "-p",
+    "hi",
+  ]);
+  assert.deepEqual(buildAgyArgs("hi", { sessionId: "conv-2" }), [
+    "--conversation",
+    "conv-2",
+    "-p",
+    "hi",
+  ]);
+  assert.deepEqual(buildAgyArgs("hi", { sandbox: true, approvalMode: "yolo" }), [
+    "--sandbox",
+    "--dangerously-skip-permissions",
+    "-p",
+    "hi",
+  ]);
+});
diff --git a/src/backends/agy.ts b/src/backends/agy.ts
new file mode 100644
index 0000000..f0c1d26
--- /dev/null
+++ b/src/backends/agy.ts
@@ -0,0 +1,138 @@
+import { readFileSync } from "fs";
+import os from "os";
+import path from "path";
+import { Logger } from "../utils/logger.js";
+import { CLI, APPROVAL_MODES } from "../constants.js";
+import { executeCommand } from "../utils/commandExecutor.js";
+import type { Backend, BackendRunOptions } from "./types.js";
+
+/**
+ * EXPERIMENTAL Antigravity CLI (`agy`) backend — opt in with GEMINI_MCP_BACKEND=agy.
+ *
+ * agy is gemini-cli's successor (Gemini CLI is retired 2026-06-18 for free/Pro/
+ * Ultra tiers). Two caveats drive this implementation:
+ *  1. Print-mode (`agy -p`) is broken in 1.0.x — it returns exit 0 but writes
+ *     nothing to stdout. We therefore recover the reply from agy's own transcript
+ *     on disk when stdout is empty (matching the community MCP bridge).
+ *  2. Print-mode is hardcoded to Gemini 3.5 Flash; `model` is ignored.
+ */
+
+const AGY_BASE = path.join(os.homedir(), ".gemini", "antigravity-cli");
+const LAST_CONVERSATIONS = path.join(AGY_BASE, "cache", "last_conversations.json");
+const transcriptPath = (id: string) =>
+  path.join(AGY_BASE, "brain", id, ".system_generated", "logs", "transcript.jsonl");
+
+interface TranscriptEntry {
+  source?: string;
+  type?: string;
+  status?: string;
+  content?: string;
+}
+
+/** Map the current workspace directory to its most recent agy conversation id. */
+function conversationIdForCwd(cwd: string): string | undefined {
+  try {
+    const map = JSON.parse(readFileSync(LAST_CONVERSATIONS, "utf8")) as Record<string, string>;
+    return map[cwd] ?? map[path.resolve(cwd)];
+  } catch (e) {
+    Logger.warn(`agy: could not read last_conversations.json: ${(e as Error).message}`);
+    return undefined;
+  }
+}
+
+/** Read the model's reply(s) for a conversation from the transcript on disk. */
+export function readTranscriptResponse(id: string): string {
+  let lines: string[];
+  try {
+    lines = readFileSync(transcriptPath(id), "utf8").split(/\r?\n/).filter(Boolean);
+  } catch (e) {
+    throw new Error(
+      `agy: response transcript not found for conversation ${id}: ${(e as Error).message}`,
+    );
+  }
+
+  const entries: TranscriptEntry[] = [];
+  for (const line of lines) {
+    try {
+      entries.push(JSON.parse(line) as TranscriptEntry);
+    } catch {
+      /* skip malformed lines */
+    }
+  }
+
+  // Take the model planner responses that follow the last user input.
+  let lastUserIdx = -1;
+  for (let i = entries.length - 1; i >= 0; i--) {
+    if (entries[i].type === "USER_INPUT") {
+      lastUserIdx = i;
+      break;
+    }
+  }
+  const replies = entries
+    .slice(lastUserIdx + 1)
+    .filter(
+      (e) =>
+        e.source === "MODEL" &&
+        e.type === "PLANNER_RESPONSE" &&
+        e.status === "DONE" &&
+        typeof e.content === "string",
+    )
+    .map((e) => e.content as string);
+
+  const text = replies.join("\n\n").trim();
+  if (!text) {
+    throw new Error(`agy: no model response found in transcript for conversation ${id}`);
+  }
+  return text;
+}
+
+export function buildAgyArgs(prompt: string, opts: BackendRunOptions): string[] {
+  const args: string[] = [];
+  // Sessions: --continue resumes the most recent; --conversation <id> a specific one.
+  if (opts.resume) {
+    if (opts.resume === "latest") args.push("--continue");
+    else args.push("--conversation", opts.resume);
+  } else if (opts.sessionId) {
+    args.push("--conversation", opts.sessionId);
+  }
+  if (opts.sandbox) args.push("--sandbox");
+  // agy has no graded approval modes; only "skip all prompts" maps cleanly.
+  if (opts.approvalMode === APPROVAL_MODES.YOLO) args.push("--dangerously-skip-permissions");
+  args.push("-p", prompt);
+  return args;
+}
+
+// Serialize agy calls: each run rewrites last_conversations.json, so concurrent
+// runs would read each other's conversation ids back.
+let agyQueue: Promise<unknown> = Promise.resolve();
+
+export const agyBackend: Backend = {
+  name: "agy",
+  supportsModelSelection: false, // print-mode is hardcoded to Gemini 3.5 Flash
+  run(prompt, opts) {
+    const task = agyQueue.then(async () => {
+      Logger.warn(
+        "[experimental] agy backend: print-mode is Flash-only and recovers output from transcript files.",
+      );
+      const cwd = process.cwd();
+      const args = buildAgyArgs(prompt, opts);
+      const stdout = await executeCommand(CLI.COMMANDS.AGY, args, opts.onProgress);
+      if (stdout && stdout.trim()) return stdout.trim(); // future agy may fix -p stdout
+
+      const id = conversationIdForCwd(cwd);
+      if (!id) {
+        throw new Error(
+          `agy: produced no stdout and no conversation id was found for ${cwd}. ` +
+            "Run `agy -i` once to authenticate, then retry.",
+        );
+      }
+      return readTranscriptResponse(id);
+    });
+    // Keep the chain alive regardless of this call's outcome.
+    agyQueue = task.then(
+      () => undefined,
+      () => undefined,
+    );
+    return task;
+  },
+};
diff --git a/src/backends/gemini.test.ts b/src/backends/gemini.test.ts
new file mode 100644
index 0000000..75749a2
--- /dev/null
+++ b/src/backends/gemini.test.ts
@@ -0,0 +1,61 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { resolveApprovalMode, buildGeminiArgs } from "./gemini.js";
+
+const ENV_KEY = "GEMINI_MCP_APPROVAL_MODE";
+
+function withEnv(value: string | undefined, fn: () => void): void {
+  const prev = process.env[ENV_KEY];
+  if (value === undefined) delete process.env[ENV_KEY];
+  else process.env[ENV_KEY] = value;
+  try {
+    fn();
+  } finally {
+    if (prev === undefined) delete process.env[ENV_KEY];
+    else process.env[ENV_KEY] = prev;
+  }
+}
+
+test("resolveApprovalMode is opt-in (undefined unless set) and rejects unknown values", () => {
+  withEnv(undefined, () => {
+    assert.equal(resolveApprovalMode(), undefined);
+    assert.equal(resolveApprovalMode("bogus"), undefined);
+    assert.equal(resolveApprovalMode("yolo"), "yolo");
+    assert.equal(resolveApprovalMode("plan"), "plan");
+  });
+});
+
+test("resolveApprovalMode reads the env var, but the arg overrides it", () => {
+  withEnv("auto_edit", () => {
+    assert.equal(resolveApprovalMode(), "auto_edit");
+    assert.equal(resolveApprovalMode("plan"), "plan");
+  });
+});
+
+test("buildGeminiArgs forces no approval mode by default", () => {
+  withEnv(undefined, () => {
+    assert.deepEqual(buildGeminiArgs("gemini-2.5-flash", { sandbox: true }), [
+      "-m",
+      "gemini-2.5-flash",
+      "-s",
+    ]);
+    assert.deepEqual(buildGeminiArgs(undefined, { resume: "abc" }), ["--resume", "abc"]);
+    assert.deepEqual(buildGeminiArgs(undefined, { sessionId: "xyz" }), [
+      "--session-id",
+      "xyz",
+    ]);
+  });
+});
+
+test("buildGeminiArgs adds the approval flag only when requested; resume beats sessionId", () => {
+  withEnv(undefined, () => {
+    assert.deepEqual(buildGeminiArgs(undefined, { approvalMode: "yolo" }), [
+      "--approval-mode",
+      "yolo",
+    ]);
+    assert.deepEqual(
+      buildGeminiArgs(undefined, { approvalMode: "plan", resume: "r1", sessionId: "s1" }),
+      ["--approval-mode", "plan", "--resume", "r1"],
+    );
+  });
+});
diff --git a/src/backends/gemini.ts b/src/backends/gemini.ts
new file mode 100644
index 0000000..cc7ea9c
--- /dev/null
+++ b/src/backends/gemini.ts
@@ -0,0 +1,87 @@
+import { executeCommand } from "../utils/commandExecutor.js";
+import { Logger } from "../utils/logger.js";
+import {
+  CLI,
+  MODELS,
+  ERROR_MESSAGES,
+  APPROVAL_MODES,
+  ENV,
+  type ApprovalMode,
+} from "../constants.js";
+import type { Backend, BackendRunOptions } from "./types.js";
+
+const VALID_APPROVAL_MODES = Object.values(APPROVAL_MODES) as string[];
+
+/**
+ * Resolve the approval mode: explicit arg > GEMINI_MCP_APPROVAL_MODE env. This
+ * is OPT-IN — when neither is set we return undefined and pass no flag, so the
+ * Gemini CLI behaves exactly as it does today for plain Q&A. (We deliberately do
+ * NOT default to "plan": in headless `-p` mode that turns Gemini into an
+ * autonomous planner that ignores simple questions and can error out.) Unknown
+ * values are ignored rather than forced.
+ */
+export function resolveApprovalMode(arg?: string): ApprovalMode | undefined {
+  const candidate = arg || process.env[ENV.APPROVAL_MODE];
+  if (!candidate) return undefined;
+  return VALID_APPROVAL_MODES.includes(candidate) ? (candidate as ApprovalMode) : undefined;
+}
+
+/** Build the Gemini CLI argv (minus the prompt, which may go on stdin). */
+export function buildGeminiArgs(
+  model: string | undefined,
+  opts: BackendRunOptions,
+): string[] {
+  const args: string[] = [];
+  if (model) args.push(CLI.FLAGS.MODEL, model);
+  if (opts.sandbox) args.push(CLI.FLAGS.SANDBOX);
+  const approval = resolveApprovalMode(opts.approvalMode);
+  if (approval) args.push(CLI.FLAGS.APPROVAL_MODE, approval);
+  // Native sessions: resume a prior session, or start/identify one by id.
+  if (opts.resume) args.push(CLI.FLAGS.RESUME, opts.resume);
+  else if (opts.sessionId) args.push(CLI.FLAGS.SESSION_ID, opts.sessionId);
+  return args;
+}
+
+async function runOnce(
+  prompt: string,
+  model: string | undefined,
+  opts: BackendRunOptions,
+): Promise<string> {
+  const args = buildGeminiArgs(model, opts);
+  if (!opts.useStdin) args.push(CLI.FLAGS.PROMPT, prompt);
+  return executeCommand(
+    CLI.COMMANDS.GEMINI,
+    args,
+    opts.onProgress,
+    opts.useStdin ? prompt : undefined,
+  );
+}
+
+export const geminiBackend: Backend = {
+  name: "gemini",
+  supportsModelSelection: true,
+  async run(prompt, opts) {
+    const model = opts.model;
+    try {
+      return await runOnce(prompt, model, opts);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      // gemini-2.5-pro quota exhausted → retry once on flash (unless already flash).
+      if (message.includes(ERROR_MESSAGES.QUOTA_EXCEEDED) && model !== MODELS.FLASH) {
+        Logger.warn(`${ERROR_MESSAGES.QUOTA_EXCEEDED}. Falling back to ${MODELS.FLASH}.`);
+        try {
+          const result = await runOnce(prompt, MODELS.FLASH, opts);
+          Logger.warn(`Successfully executed with ${MODELS.FLASH} fallback.`);
+          return result;
+        } catch (fallbackError) {
+          const fe =
+            fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
+          throw new Error(
+            `${MODELS.PRO} quota exceeded, ${MODELS.FLASH} fallback also failed: ${fe}`,
+          );
+        }
+      }
+      throw error;
+    }
+  },
+};
diff --git a/src/backends/index.test.ts b/src/backends/index.test.ts
new file mode 100644
index 0000000..e5c3e4f
--- /dev/null
+++ b/src/backends/index.test.ts
@@ -0,0 +1,16 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { getBackend } from "./index.js";
+
+test("getBackend defaults to gemini", () => {
+  assert.equal(getBackend({}).name, "gemini");
+  assert.equal(getBackend({ GEMINI_MCP_BACKEND: "" }).name, "gemini");
+  assert.equal(getBackend({ GEMINI_MCP_BACKEND: "gemini" }).name, "gemini");
+  assert.equal(getBackend({ GEMINI_MCP_BACKEND: "unknown" }).name, "gemini");
+});
+
+test("getBackend selects agy when requested (case-insensitive, incl. 'antigravity')", () => {
+  assert.equal(getBackend({ GEMINI_MCP_BACKEND: "agy" }).name, "agy");
+  assert.equal(getBackend({ GEMINI_MCP_BACKEND: "AGY" }).name, "agy");
+  assert.equal(getBackend({ GEMINI_MCP_BACKEND: "antigravity" }).name, "agy");
+});
diff --git a/src/backends/index.ts b/src/backends/index.ts
new file mode 100644
index 0000000..25bf440
--- /dev/null
+++ b/src/backends/index.ts
@@ -0,0 +1,26 @@
+import { ENV } from "../constants.js";
+import type { Backend } from "./types.js";
+import { geminiBackend } from "./gemini.js";
+import { agyBackend } from "./agy.js";
+
+export type { Backend, BackendRunOptions } from "./types.js";
+export { geminiBackend } from "./gemini.js";
+export { agyBackend } from "./agy.js";
+
+/**
+ * Select the active backend from GEMINI_MCP_BACKEND. Defaults to the Gemini CLI;
+ * "agy"/"antigravity" selects the experimental Antigravity CLI backend.
+ */
+export function getBackend(env: NodeJS.ProcessEnv = process.env): Backend {
+  const name = (env[ENV.BACKEND] || "gemini").trim().toLowerCase();
+  switch (name) {
+    case "agy":
+    case "antigravity":
+      return agyBackend;
+    case "gemini":
+    case "":
+      return geminiBackend;
+    default:
+      return geminiBackend;
+  }
+}
diff --git a/src/backends/types.ts b/src/backends/types.ts
new file mode 100644
index 0000000..a395e17
--- /dev/null
+++ b/src/backends/types.ts
@@ -0,0 +1,29 @@
+import type { ApprovalMode } from "../constants.js";
+
+/**
+ * Options a backend understands. Backends interpret these in their own terms
+ * (e.g. the gemini backend maps `resume` to `--resume`, the agy backend to
+ * `--conversation`/`--continue`); unsupported options are ignored.
+ */
+export interface BackendRunOptions {
+  model?: string;
+  sandbox?: boolean;
+  approvalMode?: ApprovalMode;
+  sessionId?: string;
+  resume?: string;
+  /**
+   * Deliver the prompt on stdin rather than as a flag argument. Used for
+   * changeMode / `@file` prompts to dodge cmd.exe parsing and the OS
+   * command-line length limit.
+   */
+  useStdin?: boolean;
+  onProgress?: (newOutput: string) => void;
+}
+
+/** A pluggable CLI backend that turns a prompt into model output. */
+export interface Backend {
+  readonly name: string;
+  /** Whether `model` selection is honoured (agy print-mode is Flash-only). */
+  readonly supportsModelSelection: boolean;
+  run(prompt: string, options: BackendRunOptions): Promise<string>;
+}
diff --git a/src/constants.ts b/src/constants.ts
index 184cac2..087ea0f 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -62,14 +62,18 @@ export const CLI = {
   // Command names
   COMMANDS: {
     GEMINI: "gemini",
+    AGY: "agy", // Antigravity CLI — experimental backend (gemini-cli's successor)
     ECHO: "echo",
   },
-  // Command flags
+  // Command flags (Gemini CLI)
   FLAGS: {
     MODEL: "-m",
     SANDBOX: "-s",
     PROMPT: "-p",
-    HELP: "-help",
+    HELP: "--help", // was "-help" — yargs parsed that as -h -e -l -p (the help bug)
+    APPROVAL_MODE: "--approval-mode",
+    SESSION_ID: "--session-id",
+    RESUME: "--resume",
   },
   // Default values
   DEFAULTS: {
@@ -79,6 +83,26 @@ export const CLI = {
   },
 } as const;
 
+// Gemini CLI approval modes (`gemini --approval-mode <mode>`, confirmed in v0.43).
+// Opt-in only — when unset, no mode is forced (preserves plain Q&A behaviour).
+// plan = autonomous read-only planner · auto_edit = auto-approve edit tools ·
+// yolo = auto-approve all tools.
+export const APPROVAL_MODES = {
+  DEFAULT: "default",
+  AUTO_EDIT: "auto_edit",
+  YOLO: "yolo",
+  PLAN: "plan",
+} as const;
+export type ApprovalMode = (typeof APPROVAL_MODES)[keyof typeof APPROVAL_MODES];
+
+// Environment variables that configure the server.
+export const ENV = {
+  BACKEND: "GEMINI_MCP_BACKEND", // "gemini" (default) | "agy"
+  APPROVAL_MODE: "GEMINI_MCP_APPROVAL_MODE", // overridden per-call by the approvalMode arg
+  GEMINI_CLI_PATH: "GEMINI_CLI_PATH", // explicit path to the gemini executable (Windows shim resolution)
+  TIMEOUT_MS: "GEMINI_MCP_TIMEOUT_MS", // per-call command timeout in milliseconds
+} as const;
+
 
 // (merged PromptArguments and ToolArguments)
 export interface ToolArguments {
@@ -88,6 +112,9 @@ export interface ToolArguments {
   changeMode?: boolean | string;
   chunkIndex?: number | string; // Which chunk to return (1-based)
   chunkCacheKey?: string; // Optional cache key for continuation
+  approvalMode?: string; // Gemini approval mode: default | auto_edit | yolo | plan
+  sessionId?: string; // Start/identify a session (gemini --session-id, agy --conversation)
+  resume?: string; // Resume a prior session id or "latest" (gemini --resume, agy --continue)
   message?: string; // For Ping tool -- Un-used.
   
   // --> new tool
diff --git a/src/index.ts b/src/index.ts
index 46c6118..a1d10ee 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -16,6 +16,7 @@ import {
   GetPromptResult,
   CallToolResult,
 } from "@modelcontextprotocol/sdk/types.js";
+import { readFileSync } from "node:fs";
 import { Logger } from "./utils/logger.js";
 import { PROTOCOL, ToolArguments } from "./constants.js";
 
@@ -27,10 +28,16 @@ import {
   getPromptMessage 
 } from "./tools/index.js";
 
+// Read the version from package.json at runtime so it never drifts from the
+// published version (it previously hardcoded an out-of-date "1.1.4").
+const pkg = JSON.parse(
+  readFileSync(new URL("../package.json", import.meta.url), "utf8"),
+) as { version: string };
+
 const server = new Server(
   {
     name: "gemini-cli-mcp",
-    version: "1.1.4",
+    version: pkg.version,
   },{
     capabilities: {
       tools: {},
diff --git a/src/tools/ask-gemini.tool.ts b/src/tools/ask-gemini.tool.ts
index b6fee71..bfdc917 100644
--- a/src/tools/ask-gemini.tool.ts
+++ b/src/tools/ask-gemini.tool.ts
@@ -1,9 +1,10 @@
 import { z } from 'zod';
 import { UnifiedTool } from './registry.js';
 import { executeGeminiCLI, processChangeModeOutput } from '../utils/geminiExecutor.js';
-import { 
-  ERROR_MESSAGES, 
-  STATUS_MESSAGES
+import {
+  ERROR_MESSAGES,
+  STATUS_MESSAGES,
+  type ApprovalMode,
 } from '../constants.js';
 
 const askGeminiArgsSchema = z.object({
@@ -13,6 +14,9 @@ const askGeminiArgsSchema = z.object({
   changeMode: z.boolean().default(false).describe("Enable structured change mode - formats prompts to prevent tool errors and returns structured edit suggestions that Claude can apply directly"),
   chunkIndex: z.union([z.number(), z.string()]).optional().describe("Which chunk to return (1-based)"),
   chunkCacheKey: z.string().optional().describe("Optional cache key for continuation"),
+  approvalMode: z.enum(['default', 'auto_edit', 'yolo', 'plan']).optional().describe("Optional Gemini approval mode. If omitted, no mode is forced (best for plain Q&A/analysis). 'yolo'/'auto_edit' let Gemini run or edit (use with sandbox); 'plan' makes Gemini an autonomous read-only planner."),
+  sessionId: z.string().optional().describe("Start or identify a conversation session by id, so a later call can resume it (gemini --session-id)."),
+  resume: z.string().optional().describe("Resume a prior session by id, or 'latest' for the most recent, to continue a multi-turn conversation (gemini --resume)."),
 });
 
 export const askGeminiTool: UnifiedTool = {
@@ -24,8 +28,8 @@ export const askGeminiTool: UnifiedTool = {
   },
   category: 'gemini',
   execute: async (args, onProgress) => {
-    const { prompt, model, sandbox, changeMode, chunkIndex, chunkCacheKey } = args; if (!prompt?.trim()) { throw new Error(ERROR_MESSAGES.NO_PROMPT_PROVIDED); }
-  
+    const { prompt, model, sandbox, changeMode, chunkIndex, chunkCacheKey, approvalMode, sessionId, resume } = args; if (!prompt?.trim()) { throw new Error(ERROR_MESSAGES.NO_PROMPT_PROVIDED); }
+
     if (changeMode && chunkIndex && chunkCacheKey) {
       // Security: validate cacheKey format before any cache access
       if (typeof chunkCacheKey !== 'string' || !/^[a-f0-9]{8}$/.test(chunkCacheKey)) {
@@ -38,15 +42,17 @@ export const askGeminiTool: UnifiedTool = {
         prompt as string
       );
     }
-    
-    const result = await executeGeminiCLI(
-      prompt as string,
-      model as string | undefined,
-      !!sandbox,
-      !!changeMode,
-      onProgress
-    );
-    
+
+    const result = await executeGeminiCLI(prompt as string, {
+      model: model as string | undefined,
+      sandbox: !!sandbox,
+      changeMode: !!changeMode,
+      approvalMode: approvalMode as ApprovalMode | undefined,
+      sessionId: sessionId as string | undefined,
+      resume: resume as string | undefined,
+      onProgress,
+    });
+
     if (changeMode) {
       return processChangeModeOutput(
         result,
@@ -55,6 +61,9 @@ export const askGeminiTool: UnifiedTool = {
         prompt as string
       );
     }
-    return `${STATUS_MESSAGES.GEMINI_RESPONSE}\n${result}`; // changeMode false
+    // Surface the active session id so the caller can resume the conversation.
+    const activeSession = (resume as string | undefined) || (sessionId as string | undefined);
+    const sessionNote = activeSession ? `\n\n[session: ${activeSession}]` : '';
+    return `${STATUS_MESSAGES.GEMINI_RESPONSE}\n${result}${sessionNote}`; // changeMode false
   }
 };
\ No newline at end of file
diff --git a/src/tools/brainstorm.tool.ts b/src/tools/brainstorm.tool.ts
index 0970ade..e5680d9 100644
--- a/src/tools/brainstorm.tool.ts
+++ b/src/tools/brainstorm.tool.ts
@@ -2,6 +2,7 @@ import { z } from 'zod';
 import { UnifiedTool } from './registry.js';
 import { Logger } from '../utils/logger.js';
 import { executeGeminiCLI } from '../utils/geminiExecutor.js';
+import { type ApprovalMode } from '../constants.js';
 
 function buildBrainstormPrompt(config: {
   prompt: string;
@@ -118,6 +119,7 @@ ${domain ? `Given the ${domain} domain, I'll apply the most effective combinatio
 const brainstormArgsSchema = z.object({
   prompt: z.string().min(1).describe("Primary brainstorming challenge or question to explore"),
   model: z.string().optional().describe("Optional model to use (e.g., 'gemini-2.5-flash'). If not specified, uses the default model (gemini-2.5-pro)."),
+  approvalMode: z.enum(['default', 'auto_edit', 'yolo', 'plan']).optional().describe("Optional Gemini approval mode. If omitted, no mode is forced."),
   methodology: z.enum(['divergent', 'convergent', 'scamper', 'design-thinking', 'lateral', 'auto']).default('auto').describe("Brainstorming framework: 'divergent' (generate many ideas), 'convergent' (refine existing), 'scamper' (systematic triggers), 'design-thinking' (human-centered), 'lateral' (unexpected connections), 'auto' (AI selects best)"),
   domain: z.string().optional().describe("Domain context for specialized brainstorming (e.g., 'software', 'business', 'creative', 'research', 'product', 'marketing')"),
   constraints: z.string().optional().describe("Known limitations, requirements, or boundaries (budget, time, technical, legal, etc.)"),
@@ -138,6 +140,7 @@ export const brainstormTool: UnifiedTool = {
     const {
       prompt,
       model,
+      approvalMode,
       methodology = 'auto',
       domain,
       constraints,
@@ -166,6 +169,10 @@ export const brainstormTool: UnifiedTool = {
     onProgress?.(`Generating ${ideaCount} ideas via ${methodology} methodology...`);
     
     // Execute with Gemini
-    return await executeGeminiCLI(enhancedPrompt, model as string | undefined, false, false, onProgress);
+    return await executeGeminiCLI(enhancedPrompt, {
+      model: model as string | undefined,
+      approvalMode: approvalMode as ApprovalMode | undefined,
+      onProgress,
+    });
   }
 };
\ No newline at end of file
diff --git a/src/tools/simple-tools.ts b/src/tools/simple-tools.ts
index 64af593..df272b9 100644
--- a/src/tools/simple-tools.ts
+++ b/src/tools/simple-tools.ts
@@ -1,6 +1,7 @@
 import { z } from 'zod';
 import { UnifiedTool } from './registry.js';
 import { executeCommand } from '../utils/commandExecutor.js';
+import { CLI } from '../constants.js';
 
 const pingArgsSchema = z.object({
   prompt: z.string().default('').describe("Message to echo "),
@@ -16,7 +17,7 @@ export const pingTool: UnifiedTool = {
   category: 'simple',
   execute: async (args, onProgress) => {
     const message = args.prompt || args.message || "Pong!";
-    return executeCommand("echo", [message as string], onProgress);
+    return executeCommand(CLI.COMMANDS.ECHO, [message as string], onProgress);
   }
 };
 
@@ -31,6 +32,6 @@ export const helpTool: UnifiedTool = {
   },
   category: 'simple',
   execute: async (args, onProgress) => {
-    return executeCommand("gemini", ["-help"], onProgress);
+    return executeCommand(CLI.COMMANDS.GEMINI, [CLI.FLAGS.HELP], onProgress);
   }
 };
\ No newline at end of file
diff --git a/src/utils/commandExecutor.test.ts b/src/utils/commandExecutor.test.ts
new file mode 100644
index 0000000..5f510ca
--- /dev/null
+++ b/src/utils/commandExecutor.test.ts
@@ -0,0 +1,41 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+  quoteForCmd,
+  resolveCommandForExecution,
+  buildEnoentErrorMessage,
+} from "./commandExecutor.js";
+
+test("quoteForCmd wraps in double quotes and doubles embedded quotes", () => {
+  assert.equal(quoteForCmd("hello"), '"hello"');
+  assert.equal(quoteForCmd("a&calc"), '"a&calc"'); // cmd metachar made inert by quoting
+  assert.equal(quoteForCmd('a"b'), '"a""b"');
+});
+
+test("quoteForCmd doubles a trailing backslash so it can't escape the closing quote", () => {
+  assert.equal(quoteForCmd("path\\"), '"path\\\\"');
+});
+
+test("resolveCommandForExecution is a no-op off Windows", () => {
+  if (process.platform !== "win32") {
+    assert.equal(resolveCommandForExecution("gemini"), "gemini");
+    assert.equal(resolveCommandForExecution("echo"), "echo");
+  } else {
+    // On Windows it should at least never return an empty string.
+    assert.ok(resolveCommandForExecution("gemini").length > 0);
+  }
+});
+
+test("buildEnoentErrorMessage gives gemini-specific, platform-aware guidance", () => {
+  const msg = buildEnoentErrorMessage("gemini");
+  assert.match(msg, /Could not find the "gemini"/);
+  assert.match(msg, /GEMINI_CLI_PATH/);
+  assert.match(msg, /@google\/gemini-cli/);
+  assert.match(msg, process.platform === "win32" ? /where gemini/ : /which gemini/);
+});
+
+test("buildEnoentErrorMessage omits the gemini install hint for other commands", () => {
+  const msg = buildEnoentErrorMessage("agy");
+  assert.match(msg, /Could not find the "agy"/);
+  assert.doesNotMatch(msg, /@google\/gemini-cli/);
+});
diff --git a/src/utils/commandExecutor.ts b/src/utils/commandExecutor.ts
index edf90c7..f31e42f 100644
--- a/src/utils/commandExecutor.ts
+++ b/src/utils/commandExecutor.ts
@@ -1,24 +1,86 @@
-import { spawn } from "child_process";
+import { spawn, execSync } from "child_process";
 import { Logger } from "./logger.js";
+import { CLI, ENV } from "../constants.js";
+import { resolveTimeoutMs } from "./timeoutManager.js";
 
 // Quote a single argument for cmd.exe (used by spawn's shell:true on Windows).
 // Embedded quotes are doubled and backslash runs before a quote (or the closing
 // quote) are doubled so they don't escape it, per CommandLineToArgvW rules. Note
 // cmd still expands %VAR%/!VAR! inside quotes — an env read at worst, not RCE.
-function quoteForCmd(arg: string): string {
+export function quoteForCmd(arg: string): string {
   const body = String(arg).replace(/(\\*)"/g, '$1$1""').replace(/(\\+)$/, '$1$1');
   return `"${body}"`;
 }
 
+// Windows-only: find the real executable for the gemini command. The MCP server
+// often runs without the user's interactive PATH, so we (1) honour an explicit
+// GEMINI_CLI_PATH override, then (2) ask `where` and prefer the `.cmd` shim that
+// Node can actually launch (over .ps1/.bat/.exe). Falls back to "gemini.cmd".
+// Resolution is cached per command for the life of the process.
+const resolveCache = new Map<string, string>();
+export function resolveCommandForExecution(command: string): string {
+  if (process.platform !== "win32" || command !== CLI.COMMANDS.GEMINI) return command;
+
+  const cached = resolveCache.get(command);
+  if (cached) return cached;
+
+  let resolved: string = command;
+  const override = process.env[ENV.GEMINI_CLI_PATH]?.trim();
+  if (override) {
+    resolved = override;
+  } else {
+    try {
+      const out = execSync(`where ${command}`, {
+        encoding: "utf8",
+        stdio: ["ignore", "pipe", "ignore"],
+      });
+      const candidates = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
+      const byExt = (ext: string) => candidates.find((c) => c.toLowerCase().endsWith(ext));
+      resolved =
+        byExt(".cmd") || byExt(".ps1") || byExt(".bat") || byExt(".exe") ||
+        candidates[0] || `${command}.cmd`;
+    } catch {
+      resolved = `${command}.cmd`;
+    }
+  }
+
+  resolveCache.set(command, resolved);
+  return resolved;
+}
+
+// Actionable guidance when the executable can't be found (ENOENT). The most
+// common cause is the MCP server not inheriting the user's interactive PATH.
+export function buildEnoentErrorMessage(command: string): string {
+  const isWindows = process.platform === "win32";
+  const lines = [
+    `Could not find the "${command}" executable.`,
+    `The MCP server runs in its own process and may not inherit your shell's PATH.`,
+    `• Verify it is installed and resolvable: \`${isWindows ? "where" : "which"} ${command}\`.`,
+  ];
+  if (command === CLI.COMMANDS.GEMINI) {
+    lines.push(
+      `• Install it: \`npm install -g @google/gemini-cli\`.`,
+      isWindows
+        ? `• Or set ${ENV.GEMINI_CLI_PATH} to the full path of the gemini shim (e.g. C:\\path\\to\\gemini.cmd).`
+        : `• Or set ${ENV.GEMINI_CLI_PATH} to the full path of the gemini executable.`,
+    );
+  }
+  return lines.join("\n");
+}
+
 export async function executeCommand(
   command: string,
   args: string[],
-  onProgress?: (newOutput: string) => void
+  onProgress?: (newOutput: string) => void,
+  stdinData?: string,
 ): Promise<string> {
   return new Promise((resolve, reject) => {
     const startTime = Date.now();
     Logger.commandExecution(command, args, startTime);
 
+    const isWindows = process.platform === "win32";
+    const resolvedCommand = resolveCommandForExecution(command);
+
     // Windows quirk: Node 22+ blocks spawning `.cmd` / `.bat` shims without
     // `shell: true` (CVE-2024-27980). But shell:true routes the command through
     // cmd.exe, which re-parses the joined line — so EVERY argument must be
@@ -26,23 +88,61 @@ export async function executeCommand(
     // trigger command injection even in tokens without spaces (e.g. a prompt
     // `a&calc`); wrapping each arg in double quotes makes them inert. This is a
     // no-op on macOS / Linux, where shell:false passes argv directly.
-    const isWindows = process.platform === "win32";
     const safeArgs = isWindows ? args.map(quoteForCmd) : args;
+    // A resolved full path may contain spaces; quote it for cmd.exe. A bare
+    // command name (no whitespace) passes through unchanged to preserve the
+    // exact, already-tested shim-launch behaviour.
+    const spawnCommand =
+      isWindows && /\s/.test(resolvedCommand) ? `"${resolvedCommand}"` : resolvedCommand;
 
-    const childProcess = spawn(command, safeArgs, {
+    // Complex prompts arrive on stdin (see geminiExecutor) to bypass cmd.exe
+    // parsing and the OS command-line length limit; only open stdin then.
+    // windowsHide suppresses the popup console window on Windows (no-op elsewhere).
+    const childProcess = spawn(spawnCommand, safeArgs, {
       env: process.env,
       shell: isWindows,
-      stdio: ["ignore", "pipe", "pipe"],
+      windowsHide: true,
+      stdio: [stdinData !== undefined ? "pipe" : "ignore", "pipe", "pipe"],
     });
 
+    if (stdinData !== undefined && childProcess.stdin) {
+      childProcess.stdin.write(stdinData);
+      childProcess.stdin.end();
+    }
+
     let stdout = "";
     let stderr = "";
     let isResolved = false;
     let lastReportedLength = 0;
-    
-    childProcess.stdout.on("data", (data) => {
+
+    // Release a genuinely hung child after the configured timeout (default 30m;
+    // GEMINI_MCP_TIMEOUT_MS overrides, 0 disables). SIGTERM first, then SIGKILL.
+    const timeoutMs = resolveTimeoutMs();
+    let timeoutHandle: NodeJS.Timeout | undefined;
+    const clearTimer = () => {
+      if (timeoutHandle) {
+        clearTimeout(timeoutHandle);
+        timeoutHandle = undefined;
+      }
+    };
+    if (timeoutMs > 0) {
+      timeoutHandle = setTimeout(() => {
+        if (isResolved) return;
+        isResolved = true;
+        Logger.error(`Command timed out after ${timeoutMs}ms; terminating: ${command}`);
+        try { childProcess.kill("SIGTERM"); } catch { /* already gone */ }
+        const sigkill = setTimeout(() => {
+          try { childProcess.kill("SIGKILL"); } catch { /* already gone */ }
+        }, 2000);
+        sigkill.unref?.();
+        reject(new Error(`Command timed out after ${timeoutMs}ms: ${command}`));
+      }, timeoutMs);
+      timeoutHandle.unref?.();
+    }
+
+    childProcess.stdout?.on("data", (data) => {
       stdout += data.toString();
-      
+
       // Report new content if callback provided
       if (onProgress && stdout.length > lastReportedLength) {
         const newContent = stdout.substring(lastReportedLength);
@@ -51,9 +151,8 @@ export async function executeCommand(
       }
     });
 
-
     // CLI level errors
-    childProcess.stderr.on("data", (data) => {
+    childProcess.stderr?.on("data", (data) => {
       stderr += data.toString();
       // find RESOURCE_EXHAUSTED when gemini-2.5-pro quota is exceeded
       if (stderr.includes("RESOURCE_EXHAUSTED")) {
@@ -78,27 +177,32 @@ export async function executeCommand(
       }
     });
     childProcess.on("error", (error) => {
-      if (!isResolved) {
-        isResolved = true;
-        Logger.error(`Process error:`, error);
+      if (isResolved) return;
+      isResolved = true;
+      clearTimer();
+      Logger.error(`Process error:`, error);
+      const code = (error as NodeJS.ErrnoException).code;
+      if (code === "ENOENT") {
+        reject(new Error(buildEnoentErrorMessage(command)));
+      } else {
         reject(new Error(`Failed to spawn command: ${error.message}`));
       }
     });
     childProcess.on("close", (code) => {
-      if (!isResolved) {
-        isResolved = true;
-        if (code === 0) {
-          Logger.commandComplete(startTime, code, stdout.length);
-          resolve(stdout.trim());
-        } else {
-          Logger.commandComplete(startTime, code);
-          Logger.error(`Failed with exit code ${code}`);
-          const errorMessage = stderr.trim() || "Unknown error";
-          reject(
-            new Error(`Command failed with exit code ${code}: ${errorMessage}`),
-          );
-        }
+      if (isResolved) return;
+      isResolved = true;
+      clearTimer();
+      if (code === 0) {
+        Logger.commandComplete(startTime, code, stdout.length);
+        resolve(stdout.trim());
+      } else {
+        Logger.commandComplete(startTime, code);
+        Logger.error(`Failed with exit code ${code}`);
+        const errorMessage = stderr.trim() || "Unknown error";
+        reject(
+          new Error(`Command failed with exit code ${code}: ${errorMessage}`),
+        );
       }
     });
   });
-}
\ No newline at end of file
+}
diff --git a/src/utils/geminiExecutor.test.ts b/src/utils/geminiExecutor.test.ts
new file mode 100644
index 0000000..2fd922c
--- /dev/null
+++ b/src/utils/geminiExecutor.test.ts
@@ -0,0 +1,17 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { assertSafeFileReferences } from "./geminiExecutor.js";
+
+const root = process.cwd();
+
+test("assertSafeFileReferences allows in-project @file references", () => {
+  assert.doesNotThrow(() => assertSafeFileReferences("explain @src/index.ts", root));
+  assert.doesNotThrow(() => assertSafeFileReferences("no references at all", root));
+  assert.doesNotThrow(() => assertSafeFileReferences("@package.json summarise", root));
+});
+
+test("assertSafeFileReferences rejects traversal, home, and absolute references", () => {
+  assert.throws(() => assertSafeFileReferences("@../secret.txt", root), /outside the project directory/);
+  assert.throws(() => assertSafeFileReferences("@~/.ssh/id_rsa", root), /outside the project directory/);
+  assert.throws(() => assertSafeFileReferences("@/etc/passwd", root), /outside the project directory/);
+});
diff --git a/src/utils/geminiExecutor.ts b/src/utils/geminiExecutor.ts
index 6cae0aa..e754934 100644
--- a/src/utils/geminiExecutor.ts
+++ b/src/utils/geminiExecutor.ts
@@ -1,12 +1,7 @@
 import * as path from 'path';
-import { executeCommand } from './commandExecutor.js';
 import { Logger } from './logger.js';
-import { 
-  ERROR_MESSAGES, 
-  STATUS_MESSAGES, 
-  MODELS, 
-  CLI
-} from '../constants.js';
+import type { ApprovalMode } from '../constants.js';
+import { getBackend } from '../backends/index.js';
 
 import { parseChangeModeOutput, validateChangeModeEdits } from './changeModeParser.js';
 import { formatChangeModeResponse, summarizeChangeModeEdits } from './changeModeTranslator.js';
@@ -43,13 +38,21 @@ export function assertSafeFileReferences(prompt: string, root: string = process.
   }
 }
 
+export interface ExecuteGeminiOptions {
+  model?: string;
+  sandbox?: boolean;
+  changeMode?: boolean;
+  approvalMode?: ApprovalMode;
+  sessionId?: string;
+  resume?: string;
+  onProgress?: (newOutput: string) => void;
+}
+
 export async function executeGeminiCLI(
   prompt: string,
-  model?: string,
-  sandbox?: boolean,
-  changeMode?: boolean,
-  onProgress?: (newOutput: string) => void
+  options: ExecuteGeminiOptions = {},
 ): Promise<string> {
+  const { model, sandbox, changeMode, approvalMode, sessionId, resume, onProgress } = options;
   let prompt_processed = prompt;
   
   if (changeMode) {
@@ -118,48 +121,25 @@ ${prompt_processed}
     prompt_processed = changeModeInstructions;
   }
 
-  // Block @file references that escape the project root before the prompt
-  // reaches the Gemini CLI's file-inlining parser (CVE-2026-0755).
+  // Security: block @file refs that escape the project root before the prompt
+  // reaches any CLI that inlines file contents (CVE-2026-0755).
   assertSafeFileReferences(prompt_processed);
 
-  const args = [];
-  if (model) { args.push(CLI.FLAGS.MODEL, model); }
-  if (sandbox) { args.push(CLI.FLAGS.SANDBOX); }
-
-  // spawn runs with shell: false (and cmd.exe-safe quoting on Windows is
-  // handled in commandExecutor), so the prompt is passed verbatim as a single
-  // argv entry. No manual quoting here — wrapping in `"` only injects literal
-  // quote characters and corrupts @file references (#66, CVE-2026-0755).
-  args.push(CLI.FLAGS.PROMPT, prompt_processed);
-
-  try {
-    return await executeCommand(CLI.COMMANDS.GEMINI, args, onProgress);
-  } catch (error) {
-    const errorMessage = error instanceof Error ? error.message : String(error);
-    if (errorMessage.includes(ERROR_MESSAGES.QUOTA_EXCEEDED) && model !== MODELS.FLASH) {
-      Logger.warn(`${ERROR_MESSAGES.QUOTA_EXCEEDED}. Falling back to ${MODELS.FLASH}.`);
-      await sendStatusMessage(STATUS_MESSAGES.FLASH_RETRY);
-      const fallbackArgs = [];
-      fallbackArgs.push(CLI.FLAGS.MODEL, MODELS.FLASH);
-      if (sandbox) {
-        fallbackArgs.push(CLI.FLAGS.SANDBOX);
-      }
-      
-      // Pass the prompt verbatim here too (see note in the primary path).
-      fallbackArgs.push(CLI.FLAGS.PROMPT, prompt_processed);
-      try {
-        const result = await executeCommand(CLI.COMMANDS.GEMINI, fallbackArgs, onProgress);
-        Logger.warn(`Successfully executed with ${MODELS.FLASH} fallback.`);
-        await sendStatusMessage(STATUS_MESSAGES.FLASH_SUCCESS);
-        return result;
-      } catch (fallbackError) {
-        const fallbackErrorMessage = fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
-        throw new Error(`${MODELS.PRO} quota exceeded, ${MODELS.FLASH} fallback also failed: ${fallbackErrorMessage}`);
-      }
-    } else {
-      throw error;
-    }
-  }
+  // changeMode and @file prompts go on stdin (gemini backend) to keep large
+  // prompts under the OS command-line length limit and away from cmd.exe
+  // parsing on Windows; simple prompts use -p. The selected backend
+  // (gemini by default, agy when GEMINI_MCP_BACKEND=agy) handles the rest.
+  const useStdin = !!changeMode || prompt_processed.includes('@');
+
+  return getBackend().run(prompt_processed, {
+    model,
+    sandbox,
+    approvalMode,
+    sessionId,
+    resume,
+    useStdin,
+    onProgress,
+  });
 }
 
 export async function processChangeModeOutput(
@@ -229,9 +209,4 @@ export async function processChangeModeOutput(
   
   Logger.debug(`ChangeMode: Parsed ${edits.length} edits, ${chunks.length} chunks, returning chunk ${returnChunkIndex}`);
   return result;
-}
-
-// Placeholder
-async function sendStatusMessage(message: string): Promise<void> {
-  Logger.debug(`Status: ${message}`);
 }
\ No newline at end of file
diff --git a/src/utils/timeoutManager.test.ts b/src/utils/timeoutManager.test.ts
new file mode 100644
index 0000000..f2f2f21
--- /dev/null
+++ b/src/utils/timeoutManager.test.ts
@@ -0,0 +1,19 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { resolveTimeoutMs, DEFAULT_COMMAND_TIMEOUT_MS } from "./timeoutManager.js";
+
+test("resolveTimeoutMs: default when unset or blank", () => {
+  assert.equal(resolveTimeoutMs({}), DEFAULT_COMMAND_TIMEOUT_MS);
+  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "" }), DEFAULT_COMMAND_TIMEOUT_MS);
+  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "   " }), DEFAULT_COMMAND_TIMEOUT_MS);
+});
+
+test("resolveTimeoutMs: honours a positive override", () => {
+  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "5000" }), 5000);
+});
+
+test("resolveTimeoutMs: 0, negative, or invalid disables the timeout (returns 0)", () => {
+  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "0" }), 0);
+  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "-1" }), 0);
+  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "abc" }), 0);
+});
diff --git a/src/utils/timeoutManager.ts b/src/utils/timeoutManager.ts
index e69de29..2764359 100644
--- a/src/utils/timeoutManager.ts
+++ b/src/utils/timeoutManager.ts
@@ -0,0 +1,20 @@
+import { ENV } from "../constants.js";
+
+// Default per-command timeout. Large-codebase analyses can legitimately run for
+// many minutes (see STATUS_MESSAGES), so this is deliberately generous — it
+// exists to release a genuinely hung child process, not to cap normal work.
+// Override with GEMINI_MCP_TIMEOUT_MS (milliseconds); set it to 0 to disable.
+export const DEFAULT_COMMAND_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
+
+/**
+ * Resolve the per-command timeout in milliseconds from the environment, falling
+ * back to {@link DEFAULT_COMMAND_TIMEOUT_MS}. A value of 0 — or any negative /
+ * non-numeric value — disables the timeout and returns 0.
+ */
+export function resolveTimeoutMs(env: NodeJS.ProcessEnv = process.env): number {
+  const raw = env[ENV.TIMEOUT_MS];
+  if (raw === undefined || raw.trim() === "") return DEFAULT_COMMAND_TIMEOUT_MS;
+  const parsed = Number(raw);
+  if (!Number.isFinite(parsed) || parsed <= 0) return 0; // disabled / invalid
+  return parsed;
+}
diff --git a/tsconfig.build.json b/tsconfig.build.json
new file mode 100644
index 0000000..50d32b9
--- /dev/null
+++ b/tsconfig.build.json
@@ -0,0 +1,4 @@
+{
+  "extends": "./tsconfig.json",
+  "exclude": ["node_modules", "dist", "scripts", "src/**/*.test.ts"]
+}

From 4c9b9b3ff197461f6431bcc15e4ab829e385cb31 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 14:15:08 -0700
Subject: [PATCH 2/8] fix: address PR #78 review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- commandExecutor: add an 'error' listener on child stdin so an EPIPE (child exited before reading) is logged instead of crashing the server (gemini-code-assist).
- commandExecutor: on Windows, terminate timed-out processes with 'taskkill /pid <pid> /T /F' — with shell:true, kill() only hit cmd.exe and orphaned the real gemini/agy child (gemini-code-assist).
- resolveCommandForExecution: prefer .cmd/.exe/.bat and stop preferring .ps1, which cmd.exe (shell:true) can't launch directly (Copilot).
- run-tests.mjs: feature-detect the tsx loader — '--import tsx' on Node >=20.6, '--loader tsx' below (the >=18 floor lacks --import) (Copilot).
- ask-gemini: don't emit 'latest' as a [session: ...] id and clarify it's the requested id, not one parsed from the CLI (Copilot).
---
 docs/.vitepress/config.js         |   1 +
 docs/api.md                       | 103 +++++++++++++++++-
 docs/concepts/configuration.md    | 169 ++++++++++++++++++++++++++++++
 docs/concepts/how-it-works.md     |  46 ++++++--
 docs/concepts/models.md           |  20 ++--
 docs/getting-started.md           |  32 +++++-
 docs/index.md                     |   5 +-
 docs/installation.md              |   2 +-
 docs/resources/faq.md             |  15 ++-
 docs/resources/roadmap.md         |  59 ++++++++---
 docs/resources/troubleshooting.md |  13 ++-
 docs/usage/commands.md            |  96 +++++++++++------
 scripts/run-tests.mjs             |   8 +-
 src/tools/ask-gemini.tool.ts      |  10 +-
 src/utils/commandExecutor.ts      |  29 +++--
 15 files changed, 520 insertions(+), 88 deletions(-)
 create mode 100644 docs/concepts/configuration.md

diff --git a/docs/.vitepress/config.js b/docs/.vitepress/config.js
index 00a0a60..c8def64 100644
--- a/docs/.vitepress/config.js
+++ b/docs/.vitepress/config.js
@@ -39,6 +39,7 @@ export default withMermaid(
         collapsed: false,
         items: [
           { text: 'How It Works', link: '/concepts/how-it-works' },
+          { text: 'Configuration', link: '/concepts/configuration' },
           { text: 'File Analysis (@)', link: '/concepts/file-analysis' },
           { text: 'Model Selection', link: '/concepts/models' },
           { text: 'Sandbox Mode', link: '/concepts/sandbox' }
diff --git a/docs/api.md b/docs/api.md
index 1f469bd..f9341f1 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -1,3 +1,102 @@
-# API
+# API Reference
 
-Stay tuned.
\ No newline at end of file
+## Tools
+
+The MCP server exposes the following tools over stdio transport.
+
+### ask-gemini
+
+The primary tool for sending prompts to Gemini.
+
+**Arguments:**
+
+```typescript
+{
+  prompt: string;           // Required. Use @ to include files.
+  model?: string;           // e.g. "gemini-2.5-flash"
+  sandbox?: boolean;        // default false
+  changeMode?: boolean;     // default false — structured edits
+  approvalMode?: "default" | "auto_edit" | "yolo" | "plan";
+  sessionId?: string;       // tag a session
+  resume?: string;          // resume by id or "latest"
+  chunkIndex?: number;      // 1-based chunk (changeMode)
+  chunkCacheKey?: string;   // hex cache key (changeMode)
+}
+```
+
+### brainstorm
+
+Structured ideation with methodology frameworks.
+
+**Arguments:**
+
+```typescript
+{
+  prompt: string;           // Required. The challenge to brainstorm.
+  model?: string;
+  approvalMode?: "default" | "auto_edit" | "yolo" | "plan";
+  methodology?: "divergent" | "convergent" | "scamper"
+              | "design-thinking" | "lateral" | "auto";
+  domain?: string;          // e.g. "software", "business"
+  constraints?: string;
+  existingContext?: string;
+  ideaCount?: number;       // default 12
+  includeAnalysis?: boolean; // default true
+}
+```
+
+### ping
+
+Echo test. Returns the input message.
+
+```typescript
+{ prompt?: string; }  // defaults to "Pong!"
+```
+
+### Help
+
+Returns `gemini --help` output.
+
+```typescript
+{}  // no arguments
+```
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `GEMINI_MCP_BACKEND` | `gemini` | Backend: `gemini` or `agy` (experimental) |
+| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
+| `GEMINI_MCP_TIMEOUT_MS` | `1800000` | Per-call timeout in ms; `0` disables |
+| `GEMINI_CLI_PATH` | *(auto)* | Full path to the gemini executable (Windows) |
+
+## Transport
+
+The server uses **stdio** transport (MCP standard). It reads JSON-RPC from stdin and writes responses to stdout. No HTTP server, no ports.
+
+```json
+{
+  "mcpServers": {
+    "gemini-cli": {
+      "command": "npx",
+      "args": ["-y", "gemini-mcp-tool"]
+    }
+  }
+}
+```
+
+## Backends
+
+The `BackendProvider` interface is:
+
+```typescript
+interface Backend {
+  readonly name: string;
+  readonly supportsModelSelection: boolean;
+  run(prompt: string, options: BackendRunOptions): Promise<string>;
+}
+```
+
+Two implementations ship:
+- **`geminiBackend`** — default, full feature support
+- **`agyBackend`** — experimental, Flash-only, transcript-file recovery
\ No newline at end of file
diff --git a/docs/concepts/configuration.md b/docs/concepts/configuration.md
new file mode 100644
index 0000000..220729f
--- /dev/null
+++ b/docs/concepts/configuration.md
@@ -0,0 +1,169 @@
+# Configuration <Badge text="1.2.0" type="tip" />
+
+All configuration is done via environment variables in your MCP client config. No config files to manage.
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
+| `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` |
+| `GEMINI_MCP_TIMEOUT_MS` | `1800000` (30 min) | Per-call timeout; `0` disables |
+| `GEMINI_CLI_PATH` | *(auto-detect)* | Explicit path to the gemini executable |
+
+### Setting Environment Variables
+
+#### Claude Code
+```bash
+claude mcp add gemini-cli -e GEMINI_MCP_APPROVAL_MODE=plan -- npx -y gemini-mcp-tool
+```
+
+#### Claude Desktop / Other Clients
+```json
+{
+  "mcpServers": {
+    "gemini-cli": {
+      "command": "npx",
+      "args": ["-y", "gemini-mcp-tool"],
+      "env": {
+        "GEMINI_MCP_APPROVAL_MODE": "plan",
+        "GEMINI_MCP_TIMEOUT_MS": "1800000"
+      }
+    }
+  }
+}
+```
+
+---
+
+## Approval Mode
+
+Controls how much autonomy Gemini has when processing a request. Maps directly to `gemini --approval-mode`.
+
+| Mode | Behaviour | Use Case |
+|------|-----------|----------|
+| *(unset)* | No flag passed — Gemini behaves as normal Q&A | Default; best for analysis and questions |
+| `default` | Gemini's own default mode | Same as unset |
+| `plan` | Read-only autonomous planner | "Gemini reads, Claude edits" |
+| `auto_edit` | Auto-approve file edits, prompt for other tools | Combine with `sandbox` for safe edits |
+| `yolo` | Auto-approve everything | CI scripts, fully trusted operations |
+
+::: warning
+In headless mode (`-p`), `plan` turns Gemini into an autonomous planner that may ignore simple questions. Leave unset for plain Q&A.
+:::
+
+### Per-call Override
+
+The `approvalMode` tool argument overrides the environment variable:
+
+```
+Ask gemini to review this codebase with approvalMode: "plan"
+```
+
+---
+
+## Backends
+
+The MCP server can use different CLI backends to talk to Google's models.
+
+### Gemini CLI (default)
+
+The standard `gemini` command. Supports model selection, approval modes, sandbox, and native sessions.
+
+```json
+{
+  "env": {
+    "GEMINI_MCP_BACKEND": "gemini"
+  }
+}
+```
+
+### Antigravity CLI (experimental) <Badge text="experimental" type="warning" />
+
+Google's Antigravity CLI (`agy`) is the successor to `gemini` (Gemini CLI is retired June 18, 2026 for free/Pro/Ultra tiers). Set `GEMINI_MCP_BACKEND=agy` to opt in.
+
+```json
+{
+  "env": {
+    "GEMINI_MCP_BACKEND": "agy"
+  }
+}
+```
+
+**Caveats:**
+- Print mode (`agy -p`) is hardcoded to **Gemini 3.5 Flash** — model selection is ignored
+- The `agy -p` stdout bug (exit 0, empty output) is worked around by reading agy's transcript files on disk
+- Only `yolo` maps to agy's `--dangerously-skip-permissions`; graded approval modes are not supported
+- Calls are serialised to avoid transcript id collision
+
+::: tip
+You don't need to do anything today. Gemini CLI still works for headless/automation use. This backend is here so you're ready when the transition happens.
+:::
+
+---
+
+## Timeout
+
+A per-call timeout protects against hung CLI processes. If the timeout fires, the child is sent `SIGTERM`, then `SIGKILL` after 2 seconds.
+
+| Value | Behaviour |
+|-------|-----------|
+| `1800000` (default) | 30-minute timeout |
+| Any positive number | Timeout in milliseconds |
+| `0` | Disabled — wait forever |
+
+```json
+{
+  "env": {
+    "GEMINI_MCP_TIMEOUT_MS": "600000"
+  }
+}
+```
+
+::: tip
+Large codebase analyses can legitimately run for many minutes. The 30-minute default is deliberately generous — it exists to release genuinely hung processes, not to cap normal work.
+:::
+
+---
+
+## Native Sessions <Badge text="1.2.0" type="tip" />
+
+Multi-turn conversations use the Gemini CLI's own session system — no local transcript storage.
+
+### Starting a session
+Pass `sessionId` to tag a conversation:
+```
+ask-gemini with sessionId: "my-review" — review the auth module
+```
+
+### Resuming a session
+Pass `resume` with the session id (or `"latest"`) to continue:
+```
+ask-gemini with resume: "my-review" — now suggest improvements
+```
+
+The response includes a `[session: <id>]` footer so you can track which session is active.
+
+::: info
+Sessions are backed by `gemini --session-id` / `--resume` on the Gemini backend, and `agy --conversation` / `--continue` on the agy backend.
+:::
+
+---
+
+## Windows Executable Resolution
+
+On Windows, the MCP server often runs without your interactive PATH. The tool resolves the `gemini` command by:
+
+1. Checking `GEMINI_CLI_PATH` (if set)
+2. Running `where gemini` and preferring the `.cmd` shim
+3. Falling back to `gemini.cmd`
+
+If you get "command not found" errors on Windows, set `GEMINI_CLI_PATH` to the full path:
+
+```json
+{
+  "env": {
+    "GEMINI_CLI_PATH": "C:\\Users\\you\\AppData\\Roaming\\npm\\gemini.cmd"
+  }
+}
+```
diff --git a/docs/concepts/how-it-works.md b/docs/concepts/how-it-works.md
index f9bf5bd..98620e2 100644
--- a/docs/concepts/how-it-works.md
+++ b/docs/concepts/how-it-works.md
@@ -27,26 +27,58 @@ flowchart LR
     subgraph main
         direction TB
         A[You] --> |"ask gemini..."| B([**Claude**])
-        B -..-> |"invokes 'ask-gemini'"| C["Gemini-MCP-Tool"]
-        C --> |"spawn!"| D[Gemini-CLI]
-        D e1@-.-> |"response"| C
+        B -.-> |"invokes 'ask-gemini'"| C["Gemini-MCP-Tool"]
+        C --> |"dispatch"| D{"Backend"}
+        D --> |"default"| E[Gemini-CLI]
+        D -.-> |"experimental"| F["agy"]
+        E e1@-.-> |"response"| C
+        F -.-> |"transcript"| C
         C -.-> |"response"| B
         B -.-> |"summary response"| A
         e1@{ animate: true }
     end
     subgraph Project
-        B --> |"edits"| E["`**@*Files***`"]
-        D -.-> |"reads"| E
+        B --> |"edits"| G["`**@*Files***`"]
+        E -.-> |"reads"| G
     end
     classDef userNode fill:#1a237e,stroke:#fff,color:#fff,stroke-width:2px
     classDef claudeNode fill:#e64100,stroke:#fff,color:#fff,stroke-width:2px
     classDef geminiNode fill:#4285f4,stroke:#fff,color:#fff,stroke-width:2px
     classDef mcpNode fill:#37474f,stroke:#fff,color:#fff,stroke-width:2px
     classDef dataNode fill:#1b5e20,stroke:#fff,color:#fff,stroke-width:2px
+    classDef dispatchNode fill:#6a1b9a,stroke:#fff,color:#fff,stroke-width:2px
+    classDef agyNode fill:#f57f17,stroke:#fff,color:#fff,stroke-width:2px
     class A userNode
     class B claudeNode
     class C mcpNode
-    class D geminiNode
-    class E dataNode
+    class D dispatchNode
+    class E geminiNode
+    class F agyNode
+    class G dataNode
 ```
 </DiagramModal>
+
+## Architecture <Badge text="1.2.0" type="tip" />
+
+Starting with v1.2.0, the MCP server uses a **pluggable backend** architecture:
+
+1. **Your MCP client** (Claude Code, Claude Desktop, etc.) sends a tool call via stdio
+2. **gemini-mcp-tool** validates arguments, applies security guards (`@file` containment, approval mode), and routes the prompt through the selected backend
+3. **The backend** (Gemini CLI by default, Antigravity CLI when opted in) spawns the CLI, handles stdin/stdout, and returns the model response
+4. **The MCP server** formats the response and sends it back to your client
+
+### Key Components
+
+| Component | What it does |
+|-----------|-------------|
+| `commandExecutor` | Spawns CLI processes with Windows quoting, timeout/kill, ENOENT guidance |
+| `geminiExecutor` | Security guards, changeMode templating, backend dispatch |
+| `backends/gemini` | Builds Gemini CLI args, handles quota fallback (Pro → Flash) |
+| `backends/agy` | Experimental Antigravity CLI with transcript-file recovery |
+| `timeoutManager` | Configurable per-call timeout (SIGTERM → SIGKILL) |
+
+### Security
+
+- **CVE-2026-0755**: `@file` references are checked to stay within the project directory before being sent to any CLI
+- **CWE-22**: `chunkCacheKey` is validated against a strict hex format
+- **Windows injection**: All arguments are quoted for `cmd.exe` even without whitespace, neutralising `& | < > ^ ( )` metacharacters
diff --git a/docs/concepts/models.md b/docs/concepts/models.md
index e7207db..bcc2828 100644
--- a/docs/concepts/models.md
+++ b/docs/concepts/models.md
@@ -27,19 +27,27 @@ You can also append with '-m' or ask specifically with
 {
   "mcpServers": {
     "gemini-cli": {
-      "command": "gemini-mcp",
-      "env": {
-        "GEMINI_MODEL": "gemini-1.5-flash"
-      }
+      "command": "npx",
+      "args": ["-y", "gemini-mcp-tool"]
     }
   }
 }
 ```
 
-### Per Request (Coming Soon)
+The model is selected per-request via natural language or the `model` tool argument.
+
+### Per Request
+```
+ask gemini using flash to review this file
 ```
-/gemini-cli:analyze --model=flash @file.js quick review
+or explicitly:
 ```
+ask-gemini with model: "gemini-2.5-flash" — review @index.ts
+```
+
+::: warning Antigravity CLI (agy) backend
+When using `GEMINI_MCP_BACKEND=agy`, model selection is ignored — print mode is hardcoded to **Gemini 3.5 Flash**.
+:::
 
 ## Model Comparison
 
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 266f160..33621f9 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -31,7 +31,7 @@
 
 Before installing, ensure you have:
 
-- **[Node.js](https://nodejs.org/)** v16.0.0 or higher
+- **[Node.js](https://nodejs.org/)** v18.0.0 or higher
 - **[Google Gemini CLI](https://github.com/google-gemini/gemini-cli)** installed and configured on your system
 - **[Claude Desktop](https://claude.ai/download)** or **[Claude Code](https://www.anthropic.com/claude-code)** with MCP support
 
@@ -78,6 +78,27 @@ For Claude Desktop users, add this to your configuration file:
 }
 ```
 
+### Optional Environment Variables <Badge text="1.2.0" type="tip" />
+
+You can pass environment variables to configure the server:
+
+```json
+{
+  "mcpServers": {
+    "gemini-cli": {
+      "command": "npx",
+      "args": ["-y", "gemini-mcp-tool"],
+      "env": {
+        "GEMINI_MCP_APPROVAL_MODE": "plan",
+        "GEMINI_MCP_TIMEOUT_MS": "1800000"
+      }
+    }
+  }
+}
+```
+
+See [Configuration](/concepts/configuration) for all available environment variables.
+
 ::: warning
 You must restart Claude Desktop ***completely*** for changes to take effect.
 :::
@@ -162,6 +183,13 @@ Type `/gemini-cli` and these commands will appear:
 - `/gemini-cli:sandbox` - Safe code execution
 - `/gemini-cli:help` - Show help information
 - `/gemini-cli:ping` - Test connectivity
+- `/gemini-cli:brainstorm` - Structured brainstorming with methodology frameworks
+
+### New in v1.2.0
+- **Approval mode** — control Gemini's autonomy: `approvalMode: "plan"` (read-only) or `"yolo"` (auto-approve)
+- **Multi-turn sessions** — pass `sessionId` / `resume` to continue conversations across calls
+- **Pluggable backends** — set `GEMINI_MCP_BACKEND=agy` to use the experimental Antigravity CLI
+- **Per-call timeout** — configurable via `GEMINI_MCP_TIMEOUT_MS` (default 30 min)
 
 ## Need a Different Client?
 
@@ -186,7 +214,7 @@ npm install -g @google/gemini-cli
 1. Check your configuration file path
 2. Ensure JSON syntax is correct
 3. Restart your MCP client completely
-4. Verify Gemini CLI works: `gemini -help`
+4. Verify Gemini CLI works: `gemini --help`
 
 
 ### Client-Specific Issues
diff --git a/docs/index.md b/docs/index.md
index 7134bc8..0addc2b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -34,7 +34,10 @@ features:
     details: Gemini-mcp-tool does not require any extra keys.
   - icon: 🚦
     title: Model Selection
-    details: Choose from Gemini-2.5-Pro and Gemini-2.5-Flash, using natural language.
+    details: Choose from Gemini-2.5-Pro, Gemini-2.5-Flash, or let the agy backend use Gemini 3.5 Flash.
+  - icon: 🔧
+    title: Pluggable Backends
+    details: Gemini CLI by default, experimental Antigravity CLI (agy) opt-in — future-proof for June 2026.
 ---
 
 <div class="explore-hint" style="text-align: center; margin: 32px 0 48px; position: relative;">
diff --git a/docs/installation.md b/docs/installation.md
index ef17c46..90d4c20 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -4,7 +4,7 @@ Multiple ways to install Gemini MCP Tool, depending on your needs.
 
 ## Prerequisites
 
-- Node.js v16.0.0 or higher
+- Node.js v18.0.0 or higher
 - Claude Desktop or Claude Code with MCP support
 - Gemini CLI installed (`npm install -g @google/gemini-cli`)
 
diff --git a/docs/resources/faq.md b/docs/resources/faq.md
index a5004e9..6484d18 100644
--- a/docs/resources/faq.md
+++ b/docs/resources/faq.md
@@ -6,7 +6,7 @@
 A bridge between Claude Desktop and Google's Gemini AI, enabling you to use Gemini's powerful capabilities directly within Claude.
 
 ### Does it support windows?
-Windows testing is underway, some users have reported success and other failures.
+Yes. v1.2.0 adds robust Windows executable resolution (`GEMINI_CLI_PATH`, `.cmd` shim detection), hardened `cmd.exe` argument quoting, and `windowsHide` to suppress console popups.
 
 ### Why use this instead of Gemini directly?
 - Integrated into your existing AI workflow
@@ -32,7 +32,7 @@ Then, run "gemini" and complete auth.
 Yes! It works with both Claude Desktop and Claude Code.
 
 ### What Node.js version do I need?
-Node.js v16.0.0 or higher.
+Node.js v18.0.0 or higher.
 
 ## Usage
 
@@ -87,7 +87,16 @@ Check your organization's policies and Google's Gemini API terms of service.
 ## Advanced
 
 ### Can I use this in CI/CD?
-Not recommended - designed for interactive development.
+Yes — set `GEMINI_MCP_APPROVAL_MODE=yolo` (or `plan` for read-only) to avoid interactive approval prompts. Combine with `GEMINI_MCP_TIMEOUT_MS` for a hard time limit.
+
+### What is approval mode?
+Approval mode controls how much autonomy Gemini has. By default, no mode is forced (plain Q&A). Set `approvalMode: "plan"` for a read-only planner, `"yolo"` to auto-approve everything, or `"auto_edit"` to auto-approve edits only. See [Configuration](/concepts/configuration).
+
+### What is the agy backend?
+Antigravity CLI (`agy`) is Google's successor to Gemini CLI. Set `GEMINI_MCP_BACKEND=agy` to try it. It's experimental — print mode is Flash-only and stdout is recovered from transcript files. See [Configuration](/concepts/configuration#backends).
+
+### Can I have multi-turn conversations?
+Yes — pass `sessionId` to start a named session, then `resume` with the same id (or `"latest"`) in a follow-up call. This uses Gemini's native `--session-id` / `--resume` flags.
 
 <div style="text-align: center;">
 
diff --git a/docs/resources/roadmap.md b/docs/resources/roadmap.md
index e41b36a..f8d9dda 100644
--- a/docs/resources/roadmap.md
+++ b/docs/resources/roadmap.md
@@ -21,12 +21,18 @@ flowchart LR
     Auto-Fallback"]
     B --> C["v1.1.3
     Claude Edits, Gemini Reads"]
+    C --> D["v1.1.5
+    Security Fixes"]
+    D --> E["v1.1.6
+    CVE-2026-0755"]
+    E --> F["v1.2.0
+    Backends + Sessions"]
     
     classDef releasedNode fill:#1b5e20,stroke:#fff,color:#fff,stroke-width:2px
     classDef currentNode fill:#e64100,stroke:#fff,color:#fff,stroke-width:2px
     
-    class A,B releasedNode
-    class C currentNode
+    class A,B,C,D,E releasedNode
+    class F currentNode
 ```
 </DiagramModal>
 
@@ -48,21 +54,40 @@ config:
 timeline
     title Gemini MCP Tool Evolution
     
-    section June 2025
-        v1.1.0 Release : Claude uses Gemini!
-                       : Sandbox Mode Testing
+    section 2025
+        v1.1.0-v1.1.3  : Claude uses Gemini!
+                        : Sandbox Mode, Fallback
+                        : Change Mode
+                        
+    section May 2026
+        v1.1.5-v1.1.6  : Security Patches
+                        : CVE-2026-0755
+                        : CWE-22 path traversal
         
-        v1.1.1 Release : Bug Fixes
-                       : Enhanced Tool Descriptions
+        v1.2.0 Release  : Pluggable Backends
+                        : Approval Mode
+                        : Native Sessions
+                        : Per-call Timeout
+                        : Windows Reliability
+                        : Test Suite
                        
-    section July 2025
-        v1.1.2 Release : Fallback System
-                       
-        v1.1.3 Release : Claude Edits, Gemini Reads!
-                       
-        Beta Testing   : Beta Hooks System
-                       : Deterministic Routing
-                       : Streaming
-                       : Improved Caching
+    section Next
+        v1.3.0 Planned  : Streaming output
+                        : output-format support
+                        : Full agy backend
 ```
-</DiagramModal>
\ No newline at end of file
+</DiagramModal>
+
+## What's Next
+
+### v1.3.0 (Planned)
+- **Streaming output** — `--output-format stream-json` for real-time progress
+- **Full agy backend** — once the `agy -p` stdout bug is fixed upstream
+- **ACP persistent process** — reuse a long-lived agy process for performance
+
+### Open PRs (separate merges)
+- **#65** — MCP SDK modernization + OAuth
+- **#44** — LRU cache for performance
+- **#46** — Tool annotations
+- **#50** — Native session-id resume (partially landed in 1.2.0)
+- **#35** — Gemini schema compatibility
\ No newline at end of file
diff --git a/docs/resources/troubleshooting.md b/docs/resources/troubleshooting.md
index 0a7c914..f55430c 100644
--- a/docs/resources/troubleshooting.md
+++ b/docs/resources/troubleshooting.md
@@ -134,9 +134,10 @@ claude mcp add gemini-cli -- npx gemini-mcp-tool
    ```
 
 4. **For very large codebases, the tool prevents timeouts automatically**:
-   - Progress updates keep the connection alive
-   - Clear status messages show processing is active
-   - No manual configuration needed
+    - Progress updates keep the connection alive
+    - Clear status messages show processing is active
+    - No manual configuration needed
+    - You can also configure the timeout via `GEMINI_MCP_TIMEOUT_MS` (default 30 min; set to `0` to disable)
 
 </TroubleshootingModal>
 
@@ -147,7 +148,7 @@ claude mcp add gemini-cli -- npx gemini-mcp-tool
 
 **Common causes**:
 
-1. **Node.js version compatibility** - Ensure Node.js ≥ v16.0.0
+1. **Node.js version compatibility** - Ensure Node.js ≥ v18.0.0
 2. **Gemini CLI not installed** - Install with `npm install -g @google/gemini-cli`
 3. **API key not configured** - Run `gemini config set api_key YOUR_API_KEY`
 4. **PATH issues** - Restart terminal after installing Node.js/npm
@@ -260,6 +261,7 @@ echo $GOOGLE_GENERATIVE_AI_API_KEY
 - Backup heartbeat every 20 seconds to ensure connection stays alive
 - Clear status messages showing the tool is working
 - Automatic completion notification when done
+- Configurable via `GEMINI_MCP_TIMEOUT_MS` env var (default 30 min; `0` disables)
 
 **For very large codebases** (10,000+ files):
 - Consider breaking analysis into smaller chunks
@@ -340,8 +342,9 @@ gemini "Hello"
 
 ### Windows 11
 - **NPX flag issues**: Use `--yes` instead of `-y`
-- **Path problems**: Restart terminal after Node.js installation
+- **Path problems**: Restart terminal after Node.js installation, or set `GEMINI_CLI_PATH` to the full path of `gemini.cmd`
 - **Connection issues**: Ensure Windows Defender isn't blocking Node.js
+- **"Command not found"**: The MCP server may not inherit your shell's PATH. Set `GEMINI_CLI_PATH` in your config `env` block.
 
 ### macOS
 - **Permission issues**: Use `sudo` if npm install fails
diff --git a/docs/usage/commands.md b/docs/usage/commands.md
index f06e0c0..9414a86 100644
--- a/docs/usage/commands.md
+++ b/docs/usage/commands.md
@@ -1,52 +1,67 @@
 # Commands Reference
 
-Complete list of available commands and their usage.
+Complete list of available tools and their arguments.
 
-## Slash Commands
+## Tools
 
-### `/gemini-cli:analyze`
-Analyze files or ask questions about code.
+### `ask-gemini`
 
-```
-/gemini-cli:analyze @file.js explain this code
-/gemini-cli:analyze @src/*.ts find security issues
-/gemini-cli:analyze how do I implement authentication?
-```
+The primary tool — send a prompt to Gemini and get a response.
 
-### `/gemini-cli:sandbox`
-Execute code in a safe environment.
+| Argument | Type | Default | Description |
+|----------|------|---------|-------------|
+| `prompt` | string | *(required)* | Your analysis request. Use `@` to include files |
+| `model` | string | `gemini-2.5-pro` | Model to use (e.g. `gemini-2.5-flash`) |
+| `sandbox` | boolean | `false` | Run in isolated sandbox (`-s` flag) |
+| `changeMode` | boolean | `false` | Structured edit mode for Claude to apply |
+| `approvalMode` | string | *(unset)* | `default` / `auto_edit` / `yolo` / `plan` |
+| `sessionId` | string | — | Start/tag a conversation session |
+| `resume` | string | — | Resume a prior session by id, or `"latest"` |
+| `chunkIndex` | number | — | Which chunk to return (1-based, for changeMode) |
+| `chunkCacheKey` | string | — | Cache key for continuation (changeMode) |
 
 ```
-/gemini-cli:sandbox create a Python fibonacci generator
-/gemini-cli:sandbox test this function: [code]
+/gemini-cli:ask-gemini @file.js explain this code
+/gemini-cli:ask-gemini @src/*.ts find security issues
 ```
 
-### `/gemini-cli:help`
-Show help information and available tools.
+### `brainstorm`
+
+Structured ideation with selectable methodology frameworks.
+
+| Argument | Type | Default | Description |
+|----------|------|---------|-------------|
+| `prompt` | string | *(required)* | Brainstorming challenge or question |
+| `model` | string | `gemini-2.5-pro` | Model to use |
+| `approvalMode` | string | *(unset)* | Gemini approval mode |
+| `methodology` | string | `auto` | `divergent` / `convergent` / `scamper` / `design-thinking` / `lateral` / `auto` |
+| `domain` | string | — | Domain context (e.g. `software`, `business`) |
+| `constraints` | string | — | Known limitations or boundaries |
+| `existingContext` | string | — | Background info to build upon |
+| `ideaCount` | number | `12` | Target number of ideas |
+| `includeAnalysis` | boolean | `true` | Include feasibility/impact scoring |
 
 ```
-/gemini-cli:help
-/gemini-cli:help analyze
+/gemini-cli:brainstorm how can we improve our onboarding flow?
 ```
 
-### `/gemini-cli:ping`
-Test connectivity with Gemini.
+### `Help`
+
+Show Gemini CLI help information.
 
 ```
-/gemini-cli:ping
-/gemini-cli:ping "Custom message"
+/gemini-cli:Help
 ```
 
-## Command Structure
+### `ping`
+
+Test connectivity with an echo.
 
 ```
-/gemini-cli:<tool> [options] <arguments>
+/gemini-cli:ping
+/gemini-cli:ping "Custom message"
 ```
 
-- **tool**: The action to perform (analyze, sandbox, help, ping)
-- **options**: Optional flags (coming soon)
-- **arguments**: Input text, files, or questions
-
 ## Natural Language Alternative
 
 Instead of slash commands, you can use natural language:
@@ -54,6 +69,7 @@ Instead of slash commands, you can use natural language:
 - "Use gemini to analyze index.js"
 - "Ask gemini to create a test file"
 - "Have gemini explain this error"
+- "Brainstorm ideas for the new feature using gemini"
 
 ## File Patterns
 
@@ -61,7 +77,6 @@ Instead of slash commands, you can use natural language:
 ```
 @README.md
 @src/index.js
-@test/unit.test.ts
 ```
 
 ### Multiple Files
@@ -82,21 +97,34 @@ Instead of slash commands, you can use natural language:
 @test/unit/       # All files in test/unit
 ```
 
+::: danger Security
+`@file` references are restricted to the project directory. Paths like `@../secret.txt`, `@~/.ssh/id_rsa`, or `@/etc/passwd` are rejected (CVE-2026-0755).
+:::
+
 ## Advanced Usage
 
-### Combining Files and Questions
+### Approval Mode
+
+Control Gemini's autonomy per-call:
 ```
-/gemini-cli:analyze @package.json @src/index.js is the entry point configured correctly?
+ask gemini with approvalMode "plan" to review the architecture
+ask gemini with approvalMode "yolo" and sandbox to run this test suite
 ```
 
-### Complex Queries
+### Multi-turn Sessions
+
+Continue a conversation across multiple calls:
 ```
-/gemini-cli:analyze @src/**/*.js @test/**/*.test.js what's the test coverage?
+ask gemini with sessionId "review-1" to review the auth module
+ask gemini with resume "review-1" to now suggest improvements
+ask gemini with resume "latest" to continue where we left off
 ```
 
-### Code Generation
+### Change Mode
+
+Get structured edit suggestions that Claude can apply directly:
 ```
-/gemini-cli:analyze @models/user.js generate TypeScript types for this model
+ask gemini in changeMode to refactor @src/utils.js for readability
 ```
 
 ## Tips
diff --git a/scripts/run-tests.mjs b/scripts/run-tests.mjs
index 5ba7268..d1a978f 100644
--- a/scripts/run-tests.mjs
+++ b/scripts/run-tests.mjs
@@ -25,9 +25,15 @@ if (tests.length === 0) {
   process.exit(0);
 }
 
+// tsx is loaded via `--import` on Node >= 20.6, and the older `--loader` flag
+// below that (the engines floor is >=18, where `--import` may be unavailable).
+const [major, minor] = process.versions.node.split(".").map(Number);
+const supportsImport = major > 20 || (major === 20 && minor >= 6);
+const loaderArgs = supportsImport ? ["--import", "tsx"] : ["--loader", "tsx"];
+
 const result = spawnSync(
   process.execPath,
-  ["--import", "tsx", "--test", ...tests],
+  [...loaderArgs, "--test", ...tests],
   { stdio: "inherit" },
 );
 process.exit(result.status ?? 1);
diff --git a/src/tools/ask-gemini.tool.ts b/src/tools/ask-gemini.tool.ts
index bfdc917..09db340 100644
--- a/src/tools/ask-gemini.tool.ts
+++ b/src/tools/ask-gemini.tool.ts
@@ -61,9 +61,13 @@ export const askGeminiTool: UnifiedTool = {
         prompt as string
       );
     }
-    // Surface the active session id so the caller can resume the conversation.
-    const activeSession = (resume as string | undefined) || (sessionId as string | undefined);
-    const sessionNote = activeSession ? `\n\n[session: ${activeSession}]` : '';
+    // Echo back the session id the caller supplied so follow-up calls can continue
+    // it. This is the requested id, not one parsed from the CLI; 'latest' is a
+    // resume selector (not an id), so it is not surfaced.
+    const requestedSession =
+      (typeof resume === 'string' && resume !== 'latest' ? resume : undefined) ||
+      (sessionId as string | undefined);
+    const sessionNote = requestedSession ? `\n\n[session: ${requestedSession}]` : '';
     return `${STATUS_MESSAGES.GEMINI_RESPONSE}\n${result}${sessionNote}`; // changeMode false
   }
 };
\ No newline at end of file
diff --git a/src/utils/commandExecutor.ts b/src/utils/commandExecutor.ts
index f31e42f..0fe6e6f 100644
--- a/src/utils/commandExecutor.ts
+++ b/src/utils/commandExecutor.ts
@@ -36,8 +36,11 @@ export function resolveCommandForExecution(command: string): string {
       });
       const candidates = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
       const byExt = (ext: string) => candidates.find((c) => c.toLowerCase().endsWith(ext));
+      // Prefer extensions cmd.exe can launch directly (.cmd/.exe/.bat). A `.ps1`
+      // shim is NOT runnable via shell:true, so it is never preferred — only the
+      // raw first candidate is used as a last resort.
       resolved =
-        byExt(".cmd") || byExt(".ps1") || byExt(".bat") || byExt(".exe") ||
+        byExt(".cmd") || byExt(".exe") || byExt(".bat") ||
         candidates[0] || `${command}.cmd`;
     } catch {
       resolved = `${command}.cmd`;
@@ -106,6 +109,12 @@ export async function executeCommand(
     });
 
     if (stdinData !== undefined && childProcess.stdin) {
+      // If the child has already exited/closed its stdin, write() emits EPIPE on
+      // the stream; without this listener that becomes an uncaught exception and
+      // crashes the (long-lived) MCP server.
+      childProcess.stdin.on("error", (err) => {
+        Logger.error(`stdin write failed: ${err instanceof Error ? err.message : String(err)}`);
+      });
       childProcess.stdin.write(stdinData);
       childProcess.stdin.end();
     }
@@ -130,11 +139,19 @@ export async function executeCommand(
         if (isResolved) return;
         isResolved = true;
         Logger.error(`Command timed out after ${timeoutMs}ms; terminating: ${command}`);
-        try { childProcess.kill("SIGTERM"); } catch { /* already gone */ }
-        const sigkill = setTimeout(() => {
-          try { childProcess.kill("SIGKILL"); } catch { /* already gone */ }
-        }, 2000);
-        sigkill.unref?.();
+        if (isWindows && childProcess.pid) {
+          // With shell:true the child is cmd.exe; kill() would orphan the real
+          // gemini/agy process. taskkill /T terminates the whole process tree.
+          try {
+            execSync(`taskkill /pid ${childProcess.pid} /T /F`, { stdio: "ignore" });
+          } catch { /* already gone */ }
+        } else {
+          try { childProcess.kill("SIGTERM"); } catch { /* already gone */ }
+          const sigkill = setTimeout(() => {
+            try { childProcess.kill("SIGKILL"); } catch { /* already gone */ }
+          }, 2000);
+          sigkill.unref?.();
+        }
         reject(new Error(`Command timed out after ${timeoutMs}ms: ${command}`));
       }, timeoutMs);
       timeoutHandle.unref?.();

From dc55a5700214faa33c0bc13d4dcaff5891641c84 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 14:21:05 -0700
Subject: [PATCH 3/8] docs: make version badge dynamic in theme layout

---
 docs/.vitepress/theme/Layout.vue | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/.vitepress/theme/Layout.vue b/docs/.vitepress/theme/Layout.vue
index 7185c30..0e09b6a 100644
--- a/docs/.vitepress/theme/Layout.vue
+++ b/docs/.vitepress/theme/Layout.vue
@@ -6,7 +6,7 @@
     </template>
     <template #nav-bar-content-before>
       <div class="nav-warning">
-        🏷️ <span>1.1.4</span>
+        🏷️ <span>{{ version }}</span>
       </div>
     </template>
     <template #sidebar-nav-after>
@@ -36,6 +36,9 @@ import FundingHero from './components/FundingHero.vue'
 import FundingEffects from './components/FundingEffects.vue'
 import FundingLayout from './FundingLayout.vue'
 
+// Import version dynamically from package.json
+import { version } from '../../../package.json'
+
 const { Layout } = DefaultTheme
 const route = useRoute()
 const { frontmatter } = useData()

From 1ffecba22739f457684829138b83dca3c7d7d5bb Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 14:22:04 -0700
Subject: [PATCH 4/8] docs: document version badge dynamic update in CHANGELOG

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6d7ae52..f56e6e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,7 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
 
 ### Changed
 - `engines.node` raised to `>=18`.
-- The server version is now read from `package.json` at runtime, instead of a hardcoded string that had drifted to `1.1.4`.
+- The server version and the documentation navbar badge are now read from `package.json` dynamically, instead of hardcoded strings that had drifted to `1.1.4`.
 - Installing from a Git checkout now builds automatically via a `prepare` script.
 
 ### Fixed

From 5e522456a38eab237cee479384a4c574283de368 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 18:41:30 -0700
Subject: [PATCH 5/8] feat: configurable default model (GEMINI_MODEL) + setup
 doctor; document 1.2.0 env vars

Closes #49.

- GEMINI_MODEL sets the default model when a call doesn't pass one, so the
  assistant can't silently fall back to an older model (#49; also helps #51).
  GEMINI_FLASH_MODEL overrides the quota-fallback target. Precedence:
  per-call model arg > GEMINI_MODEL > Gemini CLI default.
- New setup doctor (scripts/doctor.mjs, 'npm run doctor', and a
  'gemini-mcp-doctor' bin): reports the active backend, detected gemini/agy
  installs (path + version), and the effective model/approval/timeout/env
  configuration with actionable hints. Self-contained, zero-dependency.
- Docs: new README 'Environment Variables' + 'Setup Doctor' sections; document
  GEMINI_MODEL/GEMINI_FLASH_MODEL and the doctor across docs/concepts/
  configuration.md, docs/concepts/models.md and docs/api.md; CHANGELOG entries.
---
 CHANGELOG.md                   |   2 +
 README.md                      |  37 ++++++
 docs/api.md                    |   2 +
 docs/concepts/configuration.md |  40 +++++++
 docs/concepts/models.md        |   8 +-
 package.json                   |   5 +-
 scripts/doctor.mjs             | 199 +++++++++++++++++++++++++++++++++
 src/backends/gemini.test.ts    |  19 +++-
 src/backends/gemini.ts         |  27 ++++-
 src/constants.ts               |   2 +
 10 files changed, 331 insertions(+), 10 deletions(-)
 create mode 100755 scripts/doctor.mjs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f56e6e2..ec8eacf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
 - **Pluggable backends** — the executor is now backend-agnostic. The Gemini CLI stays the default; set `GEMINI_MCP_BACKEND=agy` to use the **experimental** Antigravity CLI (`agy`) backend, ahead of Gemini CLI's 2026-06-18 retirement for free/Pro/Ultra tiers. (agy print-mode is Flash-only, and its reply is recovered from agy's transcript files to work around the upstream `agy -p` empty-stdout bug.)
 - **Per-command timeout** — a hung CLI call is now terminated (SIGTERM → SIGKILL). Configurable via `GEMINI_MCP_TIMEOUT_MS` (default 30 minutes; `0` disables).
 - **Windows executable resolution** — honours `GEMINI_CLI_PATH`, otherwise resolves the real `gemini` shim via `where` (preferring `.cmd`), fixing "command not found" when the MCP server doesn't inherit your shell's PATH.
+- **Configurable default model** — `GEMINI_MODEL` sets the model used when a call doesn't pass one, so the assistant can't silently fall back to an older model (#49); `GEMINI_FLASH_MODEL` overrides the quota-fallback target. Precedence: per-call `model` arg → `GEMINI_MODEL` → Gemini CLI default.
+- **Setup doctor** — `npm run doctor` / the `gemini-mcp-doctor` bin reports the active backend, detected `gemini`/`agy` installs (path + version), and the effective model/approval/timeout/env configuration, with actionable hints.
 - **Test suite** — `node:test` coverage for the `@file` security guard, Windows quoting/resolution, approval-mode and session argument building, backend selection, and timeout parsing (`npm test`).
 
 ### Changed
diff --git a/README.md b/README.md
index 715ef16..e2ba67d 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,43 @@ If you installed globally, use this configuration instead:
 
 After updating the configuration, restart your terminal session.
 
+### Environment Variables (1.2.0)
+
+All optional — set them in your MCP client's `env` block. See the [Configuration docs](docs/concepts/configuration.md) for full detail.
+
+| Variable | Default | Purpose |
+|----------|---------|---------|
+| `GEMINI_MODEL` | *(CLI default)* | Default model when a call doesn't specify one (e.g. `gemini-3-pro-preview`) |
+| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | `default` / `auto_edit` / `yolo` / `plan` → forwarded to `gemini --approval-mode` |
+| `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` (experimental) |
+| `GEMINI_MCP_TIMEOUT_MS` | `1800000` | Per-call timeout in ms; `0` disables |
+| `GEMINI_CLI_PATH` | *(auto)* | Full path to the `gemini` executable (Windows PATH issues) |
+| `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the automatic quota fallback |
+
+Example — pin a default model so the assistant can't fall back to an older one ([#49](https://github.com/jamubc/gemini-mcp-tool/issues/49)):
+
+```json
+{
+  "mcpServers": {
+    "gemini-cli": {
+      "command": "npx",
+      "args": ["-y", "gemini-mcp-tool"],
+      "env": { "GEMINI_MODEL": "gemini-3-pro-preview" }
+    }
+  }
+}
+```
+
+### Setup Doctor
+
+Not sure what's installed or how it's configured? Run the doctor to see the active backend, the detected `gemini` / `agy` versions and paths, and your effective model / approval / timeout settings:
+
+```bash
+npx -p gemini-mcp-tool gemini-mcp-doctor
+# or, from a clone of this repo:
+npm run doctor
+```
+
 ## Example Workflow
 
 - **Natural language**: "use gemini to explain index.html", "understand the massive project using gemini", "ask gemini to search for latest news"
diff --git a/docs/api.md b/docs/api.md
index f9341f1..bb9137d 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -65,6 +65,8 @@ Returns `gemini --help` output.
 
 | Variable | Default | Description |
 |----------|---------|-------------|
+| `GEMINI_MODEL` | *(CLI default)* | Default model when a call omits `model` |
+| `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the quota fallback |
 | `GEMINI_MCP_BACKEND` | `gemini` | Backend: `gemini` or `agy` (experimental) |
 | `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
 | `GEMINI_MCP_TIMEOUT_MS` | `1800000` | Per-call timeout in ms; `0` disables |
diff --git a/docs/concepts/configuration.md b/docs/concepts/configuration.md
index 220729f..6a3c77a 100644
--- a/docs/concepts/configuration.md
+++ b/docs/concepts/configuration.md
@@ -6,6 +6,8 @@ All configuration is done via environment variables in your MCP client config. N
 
 | Variable | Default | Description |
 |----------|---------|-------------|
+| `GEMINI_MODEL` | *(CLI default)* | Default model when a call doesn't pass one |
+| `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the automatic quota fallback |
 | `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
 | `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` |
 | `GEMINI_MCP_TIMEOUT_MS` | `1800000` (30 min) | Per-call timeout; `0` disables |
@@ -36,6 +38,26 @@ claude mcp add gemini-cli -e GEMINI_MCP_APPROVAL_MODE=plan -- npx -y gemini-mcp-
 
 ---
 
+## Default Model <Badge text="1.2.0" type="tip" />
+
+By default the model is chosen per request (natural language or the `model` argument); if none is given, the Gemini CLI uses its own default. Set `GEMINI_MODEL` to pin a default so the assistant can't fall back to an older model ([issue #49](https://github.com/jamubc/gemini-mcp-tool/issues/49)):
+
+```json
+{
+  "env": {
+    "GEMINI_MODEL": "gemini-3-pro-preview"
+  }
+}
+```
+
+**Precedence:** per-call `model` argument → `GEMINI_MODEL` → Gemini CLI default. `GEMINI_FLASH_MODEL` overrides the model used for the automatic quota fallback (default `gemini-2.5-flash`).
+
+::: info
+The `agy` backend ignores model selection — its print mode is hardcoded to Gemini 3.5 Flash.
+:::
+
+---
+
 ## Approval Mode
 
 Controls how much autonomy Gemini has when processing a request. Maps directly to `gemini --approval-mode`.
@@ -167,3 +189,21 @@ If you get "command not found" errors on Windows, set `GEMINI_CLI_PATH` to the f
   }
 }
 ```
+
+---
+
+## Diagnostics: the setup doctor <Badge text="1.2.0" type="tip" />
+
+Run the bundled doctor to see exactly what the tool will do on your machine — the active backend, the detected `gemini` / `agy` versions and paths, your effective model/approval/timeout configuration, and any problems:
+
+```bash
+npx -p gemini-mcp-tool gemini-mcp-doctor
+# or, from a clone of the repo:
+npm run doctor
+```
+
+It exits non-zero if the active backend's CLI can't be found, which makes it handy in setup scripts.
+
+::: info
+The doctor reads the environment of the shell you run it in. Your MCP client sets its own `env` for the server process, so values there may differ from what the doctor prints.
+:::
diff --git a/docs/concepts/models.md b/docs/concepts/models.md
index bcc2828..d9e8071 100644
--- a/docs/concepts/models.md
+++ b/docs/concepts/models.md
@@ -23,18 +23,22 @@ You can also append with '-m' or ask specifically with
 ```
 
 ### In Configuration
+
+Set `GEMINI_MODEL` to choose a default model for **every** call. This is the fix for the assistant occasionally falling back to an older model ([issue #49](https://github.com/jamubc/gemini-mcp-tool/issues/49)) — pin it once in your MCP config:
+
 ```json
 {
   "mcpServers": {
     "gemini-cli": {
       "command": "npx",
-      "args": ["-y", "gemini-mcp-tool"]
+      "args": ["-y", "gemini-mcp-tool"],
+      "env": { "GEMINI_MODEL": "gemini-3-pro-preview" }
     }
   }
 }
 ```
 
-The model is selected per-request via natural language or the `model` tool argument.
+**Precedence:** a per-request `model` argument overrides `GEMINI_MODEL`, which overrides the Gemini CLI's own default. So you can pin a default here and still say "use flash" for a one-off.
 
 ### Per Request
 ```
diff --git a/package.json b/package.json
index 6e5aa3c..eb5143b 100644
--- a/package.json
+++ b/package.json
@@ -5,13 +5,15 @@
   "type": "module",
   "main": "dist/index.js",
   "bin": {
-    "gemini-mcp": "dist/index.js"
+    "gemini-mcp": "dist/index.js",
+    "gemini-mcp-doctor": "scripts/doctor.mjs"
   },
   "scripts": {
     "build": "tsc -p tsconfig.build.json",
     "prepare": "npm run build",
     "start": "node dist/index.js",
     "dev": "tsc && node dist/index.js",
+    "doctor": "node scripts/doctor.mjs",
     "test": "node scripts/run-tests.mjs",
     "lint": "tsc --noEmit",
     "contribute": "tsx src/contribute.ts",
@@ -43,6 +45,7 @@
   },
   "files": [
     "dist/",
+    "scripts/doctor.mjs",
     "README.md",
     "LICENSE"
   ],
diff --git a/scripts/doctor.mjs b/scripts/doctor.mjs
new file mode 100755
index 0000000..91a9e53
--- /dev/null
+++ b/scripts/doctor.mjs
@@ -0,0 +1,199 @@
+#!/usr/bin/env node
+// gemini-mcp-tool setup doctor.
+//
+// Reports what the tool will actually do on this machine: which CLI backend is
+// active, whether the gemini / agy executables are installed (path + version),
+// the effective model configuration, and every related environment variable.
+//
+// Self-contained: pure Node, no build step or dependencies. The constant names
+// below mirror src/constants.ts — keep them in sync.
+
+import { spawnSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+const ENV = {
+  BACKEND: "GEMINI_MCP_BACKEND",
+  APPROVAL_MODE: "GEMINI_MCP_APPROVAL_MODE",
+  TIMEOUT_MS: "GEMINI_MCP_TIMEOUT_MS",
+  GEMINI_CLI_PATH: "GEMINI_CLI_PATH",
+  MODEL: "GEMINI_MODEL",
+  FLASH_MODEL: "GEMINI_FLASH_MODEL",
+};
+const DEFAULT_FLASH_MODEL = "gemini-2.5-flash";
+const DEFAULT_TIMEOUT_MS = 30 * 60 * 1000;
+const APPROVAL_MODES = ["default", "auto_edit", "yolo", "plan"];
+
+const isWindows = process.platform === "win32";
+const useColor = process.stdout.isTTY && !process.env.NO_COLOR;
+const paint = (code, s) => (useColor ? `\x1b[${code}m${s}\x1b[0m` : s);
+const c = {
+  bold: (s) => paint("1", s),
+  dim: (s) => paint("2", s),
+  green: (s) => paint("32", s),
+  yellow: (s) => paint("33", s),
+  red: (s) => paint("31", s),
+  cyan: (s) => paint("36", s),
+};
+const OK = c.green("✓");
+const WARN = c.yellow("⚠");
+const BAD = c.red("✗");
+
+const problems = [];
+
+function runCmd(cmd, args) {
+  try {
+    const r = spawnSync(cmd, args, {
+      encoding: "utf8",
+      timeout: 20000,
+      shell: isWindows, // .cmd shims on Windows need a shell
+      windowsHide: true,
+    });
+    if (r.error) return { ok: false, err: r.error.message };
+    return {
+      ok: r.status === 0,
+      status: r.status,
+      out: (r.stdout || "").trim(),
+      err: (r.stderr || "").trim(),
+    };
+  } catch (e) {
+    return { ok: false, err: e instanceof Error ? e.message : String(e) };
+  }
+}
+
+function locate(cmd) {
+  const r = runCmd(isWindows ? "where" : "which", [cmd]);
+  if (!r.ok || !r.out) return [];
+  return r.out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
+}
+
+function detectCli(cmd, { honourEnvPath = false } = {}) {
+  const override = honourEnvPath ? (process.env[ENV.GEMINI_CLI_PATH] || "").trim() : "";
+  let candidates = locate(cmd);
+  if (override) candidates = [override, ...candidates.filter((p) => p !== override)];
+  const primary = override || candidates[0] || null;
+  const found = candidates.length > 0 || (override && existsSync(override));
+
+  let version = null;
+  if (found) {
+    const v = runCmd(cmd, ["--version"]);
+    if (v.ok && v.out) version = v.out.split(/\r?\n/)[0].trim();
+  }
+  const ext = primary ? path.extname(primary).toLowerCase() : "";
+  return { found: !!found, primary, candidates, override: override || null, version, ext };
+}
+
+function envLine(key, { fallback = c.dim("(unset)"), mask = false } = {}) {
+  const raw = process.env[key];
+  if (raw === undefined || raw === "") return `${key} = ${fallback}`;
+  return `${key} = ${c.cyan(mask ? "********" : raw)}`;
+}
+
+function humanizeMs(ms) {
+  if (ms === 0) return "disabled (waits forever)";
+  if (ms % 60000 === 0) return `${ms / 60000} min`;
+  if (ms % 1000 === 0) return `${ms / 1000} s`;
+  return `${ms} ms`;
+}
+
+function heading(title) {
+  console.log("\n" + c.bold(title));
+  console.log(c.dim("─".repeat(Math.max(title.length, 16))));
+}
+
+// ── System ───────────────────────────────────────────────────────────────────
+heading("System");
+console.log(`  node      ${process.version}`);
+console.log(`  platform  ${process.platform} (${process.arch})`);
+
+// ── Backend selection ──────────────────────────────────────────────────────--
+const rawBackend = (process.env[ENV.BACKEND] || "gemini").trim().toLowerCase();
+const backend = rawBackend === "agy" || rawBackend === "antigravity" ? "agy" : "gemini";
+heading("Active backend");
+console.log(`  ${ENV.BACKEND} = ${process.env[ENV.BACKEND] ? c.cyan(process.env[ENV.BACKEND]) : c.dim("(unset → gemini)")}`);
+console.log(`  → using: ${c.bold(backend)}${backend === "agy" ? c.yellow("  (experimental)") : ""}`);
+if (process.env[ENV.BACKEND] && backend === "gemini" && rawBackend !== "gemini") {
+  console.log(`  ${WARN} unrecognised value ${JSON.stringify(process.env[ENV.BACKEND])} — defaulting to gemini`);
+}
+
+// ── Gemini CLI ─────────────────────────────────────────────────────────────--
+heading("Gemini CLI");
+const gemini = detectCli("gemini", { honourEnvPath: true });
+if (gemini.found) {
+  console.log(`  ${OK} found${gemini.override ? " (via " + ENV.GEMINI_CLI_PATH + ")" : ""}`);
+  console.log(`     path     ${gemini.primary}${gemini.ext ? c.dim("  [" + gemini.ext + "]") : ""}`);
+  console.log(`     version  ${gemini.version ? c.cyan(gemini.version) : c.yellow("(could not read --version)")}`);
+  if (gemini.candidates.length > 1) {
+    console.log(c.dim(`     also on PATH: ${gemini.candidates.slice(1).join(", ")}`));
+  }
+} else {
+  console.log(`  ${BAD} not found on PATH`);
+  if (backend === "gemini") {
+    problems.push(
+      `Gemini CLI not found. Install it (npm i -g @google/gemini-cli) or set ${ENV.GEMINI_CLI_PATH} to its full path.`,
+    );
+  }
+}
+
+// ── Antigravity CLI (agy) ─────────────────────────────────────────────────--
+heading("Antigravity CLI (agy)");
+const agy = detectCli("agy");
+const agyDataDir = path.join(os.homedir(), ".gemini", "antigravity-cli");
+if (agy.found) {
+  console.log(`  ${OK} found`);
+  console.log(`     path     ${agy.primary}`);
+  console.log(`     version  ${agy.version ? c.cyan(agy.version) : c.yellow("(could not read --version)")}`);
+  console.log(`     data dir ${existsSync(agyDataDir) ? OK + " " + agyDataDir : WARN + " missing (run `agy -i` once to authenticate)"}`);
+} else {
+  console.log(`  ${c.dim("not installed")} ${c.dim("— optional; the future backend once Gemini CLI retires 2026-06-18")}`);
+  if (backend === "agy") {
+    problems.push("GEMINI_MCP_BACKEND=agy but the agy executable was not found on PATH.");
+  }
+}
+
+// ── Model configuration ───────────────────────────────────────────────────--
+heading("Model configuration");
+const defaultModel = (process.env[ENV.MODEL] || "").trim();
+const flashModel = (process.env[ENV.FLASH_MODEL] || "").trim() || DEFAULT_FLASH_MODEL;
+console.log(`  default model   ${defaultModel ? c.cyan(defaultModel) + c.dim("  (GEMINI_MODEL)") : c.dim("(Gemini CLI's own default; pass model: or set GEMINI_MODEL)")}`);
+console.log(`  flash fallback  ${c.cyan(flashModel)}${process.env[ENV.FLASH_MODEL] ? c.dim("  (GEMINI_FLASH_MODEL)") : c.dim("  (default)")}`);
+if (backend === "agy") {
+  console.log(`  ${WARN} agy print-mode ignores model selection (hardcoded to Gemini 3.5 Flash)`);
+}
+
+// ── Approval & timeout ─────────────────────────────────────────────────────--
+heading("Behaviour");
+const approval = (process.env[ENV.APPROVAL_MODE] || "").trim();
+if (!approval) {
+  console.log(`  approval mode   ${c.dim("(unset → no flag; plain Q&A)")}`);
+} else if (APPROVAL_MODES.includes(approval)) {
+  console.log(`  approval mode   ${c.cyan(approval)}`);
+  if (approval === "plan") console.log(`  ${WARN} 'plan' makes Gemini an autonomous planner in headless mode — not ideal for plain Q&A`);
+} else {
+  console.log(`  approval mode   ${c.yellow(approval)} ${WARN} not one of ${APPROVAL_MODES.join("/")} — will be ignored`);
+}
+const rawTimeout = (process.env[ENV.TIMEOUT_MS] || "").trim();
+let timeoutMs = DEFAULT_TIMEOUT_MS;
+if (rawTimeout) {
+  const n = Number(rawTimeout);
+  timeoutMs = Number.isFinite(n) && n > 0 ? n : 0;
+}
+console.log(`  timeout         ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? c.dim("  (GEMINI_MCP_TIMEOUT_MS)") : c.dim("  (default)")}`);
+
+// ── Environment variables ──────────────────────────────────────────────────--
+heading("Environment variables (this shell)");
+for (const key of Object.values(ENV)) console.log("  " + envLine(key));
+console.log(c.dim("\n  Note: your MCP client sets its own env for the server process — these are"));
+console.log(c.dim("  the values in the shell running this doctor, which may differ."));
+
+// ── Summary ────────────────────────────────────────────────────────────────--
+heading("Summary");
+if (problems.length === 0) {
+  console.log(`  ${OK} ${c.green("No problems detected.")} Active backend '${backend}' looks ready.`);
+} else {
+  console.log(`  ${BAD} ${c.red(`${problems.length} issue(s) found:`)}`);
+  for (const p of problems) console.log(`     - ${p}`);
+}
+console.log("");
+process.exit(problems.length === 0 ? 0 : 1);
diff --git a/src/backends/gemini.test.ts b/src/backends/gemini.test.ts
index 75749a2..1a068f0 100644
--- a/src/backends/gemini.test.ts
+++ b/src/backends/gemini.test.ts
@@ -1,6 +1,6 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
-import { resolveApprovalMode, buildGeminiArgs } from "./gemini.js";
+import { resolveApprovalMode, buildGeminiArgs, resolveModel } from "./gemini.js";
 
 const ENV_KEY = "GEMINI_MCP_APPROVAL_MODE";
 
@@ -47,6 +47,23 @@ test("buildGeminiArgs forces no approval mode by default", () => {
   });
 });
 
+test("resolveModel: arg > GEMINI_MODEL env > undefined", () => {
+  const prev = process.env.GEMINI_MODEL;
+  delete process.env.GEMINI_MODEL;
+  try {
+    assert.equal(resolveModel(), undefined);
+    assert.equal(resolveModel("gemini-2.5-flash"), "gemini-2.5-flash");
+    process.env.GEMINI_MODEL = "gemini-3-pro-preview";
+    assert.equal(resolveModel(), "gemini-3-pro-preview");
+    assert.equal(resolveModel("gemini-2.5-flash"), "gemini-2.5-flash"); // explicit arg wins
+    process.env.GEMINI_MODEL = "   ";
+    assert.equal(resolveModel(), undefined); // blank env ignored
+  } finally {
+    if (prev === undefined) delete process.env.GEMINI_MODEL;
+    else process.env.GEMINI_MODEL = prev;
+  }
+});
+
 test("buildGeminiArgs adds the approval flag only when requested; resume beats sessionId", () => {
   withEnv(undefined, () => {
     assert.deepEqual(buildGeminiArgs(undefined, { approvalMode: "yolo" }), [
diff --git a/src/backends/gemini.ts b/src/backends/gemini.ts
index cc7ea9c..86e8fb8 100644
--- a/src/backends/gemini.ts
+++ b/src/backends/gemini.ts
@@ -26,6 +26,20 @@ export function resolveApprovalMode(arg?: string): ApprovalMode | undefined {
   return VALID_APPROVAL_MODES.includes(candidate) ? (candidate as ApprovalMode) : undefined;
 }
 
+/**
+ * Resolve the model to use: explicit per-call arg > GEMINI_MODEL env > undefined
+ * (let the Gemini CLI pick its own default). The env default lets users pin a
+ * model in their MCP config so Claude can't fall back to an older one (issue #49).
+ */
+export function resolveModel(argModel?: string): string | undefined {
+  return argModel || process.env[ENV.MODEL]?.trim() || undefined;
+}
+
+/** The model the quota fallback retries on (GEMINI_FLASH_MODEL or the default). */
+export function resolveFlashModel(): string {
+  return process.env[ENV.FLASH_MODEL]?.trim() || MODELS.FLASH;
+}
+
 /** Build the Gemini CLI argv (minus the prompt, which may go on stdin). */
 export function buildGeminiArgs(
   model: string | undefined,
@@ -61,23 +75,24 @@ export const geminiBackend: Backend = {
   name: "gemini",
   supportsModelSelection: true,
   async run(prompt, opts) {
-    const model = opts.model;
+    const model = resolveModel(opts.model);
+    const flashModel = resolveFlashModel();
     try {
       return await runOnce(prompt, model, opts);
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       // gemini-2.5-pro quota exhausted → retry once on flash (unless already flash).
-      if (message.includes(ERROR_MESSAGES.QUOTA_EXCEEDED) && model !== MODELS.FLASH) {
-        Logger.warn(`${ERROR_MESSAGES.QUOTA_EXCEEDED}. Falling back to ${MODELS.FLASH}.`);
+      if (message.includes(ERROR_MESSAGES.QUOTA_EXCEEDED) && model !== flashModel) {
+        Logger.warn(`${ERROR_MESSAGES.QUOTA_EXCEEDED}. Falling back to ${flashModel}.`);
         try {
-          const result = await runOnce(prompt, MODELS.FLASH, opts);
-          Logger.warn(`Successfully executed with ${MODELS.FLASH} fallback.`);
+          const result = await runOnce(prompt, flashModel, opts);
+          Logger.warn(`Successfully executed with ${flashModel} fallback.`);
           return result;
         } catch (fallbackError) {
           const fe =
             fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
           throw new Error(
-            `${MODELS.PRO} quota exceeded, ${MODELS.FLASH} fallback also failed: ${fe}`,
+            `${MODELS.PRO} quota exceeded, ${flashModel} fallback also failed: ${fe}`,
           );
         }
       }
diff --git a/src/constants.ts b/src/constants.ts
index 087ea0f..80f94b1 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -101,6 +101,8 @@ export const ENV = {
   APPROVAL_MODE: "GEMINI_MCP_APPROVAL_MODE", // overridden per-call by the approvalMode arg
   GEMINI_CLI_PATH: "GEMINI_CLI_PATH", // explicit path to the gemini executable (Windows shim resolution)
   TIMEOUT_MS: "GEMINI_MCP_TIMEOUT_MS", // per-call command timeout in milliseconds
+  MODEL: "GEMINI_MODEL", // default model when a call doesn't pass one (issue #49)
+  FLASH_MODEL: "GEMINI_FLASH_MODEL", // overrides the quota-fallback model (default gemini-2.5-flash)
 } as const;
 
 

From 72b9ac80a8d66254dc7601dcf0e52cf687aaa8a9 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sun, 31 May 2026 03:12:44 -0700
Subject: [PATCH 6/8] fix: 1.1.6-parity defaults + keep setup doctor internal

Ensure v1.2.0 behaves identically to v1.1.6 when no env vars are set:
make the per-call timeout strictly opt-in (GEMINI_MCP_TIMEOUT_MS), so it
is disabled by default instead of imposing a 30-minute cap. resolveTimeoutMs
returns 0 when unset/blank/invalid; a positive value enables it.

Keep the setup doctor as a private development/diagnostic tool: drop the
gemini-mcp-doctor bin and the scripts/doctor.mjs files entry so it ships
with the repo but not the npm package, and remove the public README/docs
sections. Still runnable via `npm run doctor`.

Document both in the CHANGELOG.
---
 CHANGELOG.md                     |  9 ++++++---
 README.md                        | 12 +-----------
 docs/concepts/configuration.md   | 30 ++++++------------------------
 package.json                     |  4 +---
 scripts/doctor.mjs               | 18 +++++++++++-------
 src/utils/commandExecutor.ts     |  5 +++--
 src/utils/timeoutManager.test.ts | 14 +++++++++-----
 src/utils/timeoutManager.ts      | 22 +++++++++++++---------
 8 files changed, 50 insertions(+), 64 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ec8eacf..f669510 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,16 +1,15 @@
 # Changelog
 
 ## [1.2.0] - 2026-05-30
-First feature release after the 1.1.6 security patch. Hardens cross-platform execution, adds an opt-in safety control and native multi-turn sessions, makes the CLI backend pluggable (ahead of Gemini CLI's retirement), and adds a real test suite.
+First feature release after the 1.1.6 security patch. Hardens cross-platform execution, adds an opt-in safety control and native multi-turn sessions, makes the CLI backend pluggable (ahead of Gemini CLI's retirement), and adds a real test suite. **With no environment variables set, behaviour is identical to 1.1.6** — every new knob (backend, model, approval mode, timeout, executable path) is off/unset by default and only changes behaviour when you opt in.
 
 ### Added
 - **Approval mode** — optional `approvalMode` argument on `ask-gemini`/`brainstorm` (and `GEMINI_MCP_APPROVAL_MODE` env), forwarding Gemini's `--approval-mode` (`default` / `auto_edit` / `yolo` / `plan`). Opt-in: when unset, behaviour is unchanged. Use `yolo` / `auto_edit` with `sandbox` to let Gemini run or edit; `plan` runs Gemini as an autonomous read-only planner.
 - **Native multi-turn sessions** — `sessionId` and `resume` arguments forward Gemini's `--session-id` / `--resume`; the active session id is surfaced in the response so a follow-up call can continue the conversation. Builds on #50; uses the CLI's own sessions rather than local transcript storage.
 - **Pluggable backends** — the executor is now backend-agnostic. The Gemini CLI stays the default; set `GEMINI_MCP_BACKEND=agy` to use the **experimental** Antigravity CLI (`agy`) backend, ahead of Gemini CLI's 2026-06-18 retirement for free/Pro/Ultra tiers. (agy print-mode is Flash-only, and its reply is recovered from agy's transcript files to work around the upstream `agy -p` empty-stdout bug.)
-- **Per-command timeout** — a hung CLI call is now terminated (SIGTERM → SIGKILL). Configurable via `GEMINI_MCP_TIMEOUT_MS` (default 30 minutes; `0` disables).
+- **Per-command timeout (opt-in)** — set `GEMINI_MCP_TIMEOUT_MS` to a positive number of milliseconds to terminate a hung CLI call (SIGTERM → SIGKILL). **Disabled by default** to match 1.1.6, which waited indefinitely; unset or `0` keeps that behaviour.
 - **Windows executable resolution** — honours `GEMINI_CLI_PATH`, otherwise resolves the real `gemini` shim via `where` (preferring `.cmd`), fixing "command not found" when the MCP server doesn't inherit your shell's PATH.
 - **Configurable default model** — `GEMINI_MODEL` sets the model used when a call doesn't pass one, so the assistant can't silently fall back to an older model (#49); `GEMINI_FLASH_MODEL` overrides the quota-fallback target. Precedence: per-call `model` arg → `GEMINI_MODEL` → Gemini CLI default.
-- **Setup doctor** — `npm run doctor` / the `gemini-mcp-doctor` bin reports the active backend, detected `gemini`/`agy` installs (path + version), and the effective model/approval/timeout/env configuration, with actionable hints.
 - **Test suite** — `node:test` coverage for the `@file` security guard, Windows quoting/resolution, approval-mode and session argument building, backend selection, and timeout parsing (`npm test`).
 
 ### Changed
@@ -23,6 +22,10 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
 - Clearer, platform-aware guidance when the executable is not found (ENOENT), including the `GEMINI_CLI_PATH` hint.
 - Windows robustness: complex prompts (`changeMode` / `@file`) are sent to the Gemini CLI on **stdin** instead of the `-p` flag, sidestepping cmd.exe argument parsing and the OS command-line length limit; added `windowsHide` to suppress the popup console window. (#27, #77)
 
+### Internal
+- **Per-call timeout default flipped to off** — `GEMINI_MCP_TIMEOUT_MS` now defaults to disabled (waits forever, exactly like 1.1.6) instead of 30 minutes; the timeout is strictly opt-in. `resolveTimeoutMs` returns `0` when unset/blank.
+- **Setup doctor kept as an unpublished dev tool** — `scripts/doctor.mjs` (run via `npm run doctor`) prints the live system state relevant to the MCP server — active backend, detected `gemini`/`agy` installs (path + version), effective model/approval/timeout config, and every related env var — for debugging and at-a-glance awareness. Intentionally removed from the npm `bin` and published `files`, so it ships with the repo but **not** the package; may be released publicly later.
+
 ## [1.1.6] - 2026-05-30
 _Emergency security patch — the CVE-2026-0755 fix only, ahead of this 1.2.0 release._
 - Security fix: OS command-injection / `@file` exfiltration via prompt quoting in `geminiExecutor.ts` (CVE-2026-0755, CWE-78). Fixes #73 (and the literal-quote corruption in #66).
diff --git a/README.md b/README.md
index e2ba67d..3ecb31a 100644
--- a/README.md
+++ b/README.md
@@ -114,7 +114,7 @@ All optional — set them in your MCP client's `env` block. See the [Configurati
 | `GEMINI_MODEL` | *(CLI default)* | Default model when a call doesn't specify one (e.g. `gemini-3-pro-preview`) |
 | `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | `default` / `auto_edit` / `yolo` / `plan` → forwarded to `gemini --approval-mode` |
 | `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` (experimental) |
-| `GEMINI_MCP_TIMEOUT_MS` | `1800000` | Per-call timeout in ms; `0` disables |
+| `GEMINI_MCP_TIMEOUT_MS` | *(disabled)* | Opt-in per-call timeout in ms; unset/`0` waits forever (e.g. `1800000` = 30 min) |
 | `GEMINI_CLI_PATH` | *(auto)* | Full path to the `gemini` executable (Windows PATH issues) |
 | `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the automatic quota fallback |
 
@@ -132,16 +132,6 @@ Example — pin a default model so the assistant can't fall back to an older one
 }
 ```
 
-### Setup Doctor
-
-Not sure what's installed or how it's configured? Run the doctor to see the active backend, the detected `gemini` / `agy` versions and paths, and your effective model / approval / timeout settings:
-
-```bash
-npx -p gemini-mcp-tool gemini-mcp-doctor
-# or, from a clone of this repo:
-npm run doctor
-```
-
 ## Example Workflow
 
 - **Natural language**: "use gemini to explain index.html", "understand the massive project using gemini", "ask gemini to search for latest news"
diff --git a/docs/concepts/configuration.md b/docs/concepts/configuration.md
index 6a3c77a..b5e76bc 100644
--- a/docs/concepts/configuration.md
+++ b/docs/concepts/configuration.md
@@ -10,7 +10,7 @@ All configuration is done via environment variables in your MCP client config. N
 | `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the automatic quota fallback |
 | `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
 | `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` |
-| `GEMINI_MCP_TIMEOUT_MS` | `1800000` (30 min) | Per-call timeout; `0` disables |
+| `GEMINI_MCP_TIMEOUT_MS` | *(disabled)* | Opt-in per-call timeout in ms; unset/`0` waits forever |
 | `GEMINI_CLI_PATH` | *(auto-detect)* | Explicit path to the gemini executable |
 
 ### Setting Environment Variables
@@ -126,24 +126,24 @@ You don't need to do anything today. Gemini CLI still works for headless/automat
 
 ## Timeout
 
-A per-call timeout protects against hung CLI processes. If the timeout fires, the child is sent `SIGTERM`, then `SIGKILL` after 2 seconds.
+An **opt-in** per-call timeout can protect against hung CLI processes. It is **disabled by default** — exactly like 1.1.6, the server waits indefinitely for the CLI unless you set this. When enabled and the timeout fires, the child is sent `SIGTERM`, then `SIGKILL` after 2 seconds.
 
 | Value | Behaviour |
 |-------|-----------|
-| `1800000` (default) | 30-minute timeout |
-| Any positive number | Timeout in milliseconds |
+| *(unset, default)* | Disabled — wait forever (1.1.6 behaviour) |
 | `0` | Disabled — wait forever |
+| Any positive number | Timeout in milliseconds |
 
 ```json
 {
   "env": {
-    "GEMINI_MCP_TIMEOUT_MS": "600000"
+    "GEMINI_MCP_TIMEOUT_MS": "1800000"
   }
 }
 ```
 
 ::: tip
-Large codebase analyses can legitimately run for many minutes. The 30-minute default is deliberately generous — it exists to release genuinely hung processes, not to cap normal work.
+Large codebase analyses can legitimately run for many minutes, so there is no default cap. If you enable a timeout, make it generous (e.g. `1800000` = 30 minutes) — it should release genuinely hung processes, not cap normal work.
 :::
 
 ---
@@ -189,21 +189,3 @@ If you get "command not found" errors on Windows, set `GEMINI_CLI_PATH` to the f
   }
 }
 ```
-
----
-
-## Diagnostics: the setup doctor <Badge text="1.2.0" type="tip" />
-
-Run the bundled doctor to see exactly what the tool will do on your machine — the active backend, the detected `gemini` / `agy` versions and paths, your effective model/approval/timeout configuration, and any problems:
-
-```bash
-npx -p gemini-mcp-tool gemini-mcp-doctor
-# or, from a clone of the repo:
-npm run doctor
-```
-
-It exits non-zero if the active backend's CLI can't be found, which makes it handy in setup scripts.
-
-::: info
-The doctor reads the environment of the shell you run it in. Your MCP client sets its own `env` for the server process, so values there may differ from what the doctor prints.
-:::
diff --git a/package.json b/package.json
index eb5143b..efa471a 100644
--- a/package.json
+++ b/package.json
@@ -5,8 +5,7 @@
   "type": "module",
   "main": "dist/index.js",
   "bin": {
-    "gemini-mcp": "dist/index.js",
-    "gemini-mcp-doctor": "scripts/doctor.mjs"
+    "gemini-mcp": "dist/index.js"
   },
   "scripts": {
     "build": "tsc -p tsconfig.build.json",
@@ -45,7 +44,6 @@
   },
   "files": [
     "dist/",
-    "scripts/doctor.mjs",
     "README.md",
     "LICENSE"
   ],
diff --git a/scripts/doctor.mjs b/scripts/doctor.mjs
index 91a9e53..8cb246a 100755
--- a/scripts/doctor.mjs
+++ b/scripts/doctor.mjs
@@ -1,9 +1,14 @@
 #!/usr/bin/env node
-// gemini-mcp-tool setup doctor.
+// gemini-mcp-tool setup doctor — INTERNAL development / diagnostic tool.
 //
-// Reports what the tool will actually do on this machine: which CLI backend is
-// active, whether the gemini / agy executables are installed (path + version),
-// the effective model configuration, and every related environment variable.
+// Not published: deliberately excluded from package.json "bin" and "files", so
+// it ships with the repo but NOT the npm package. Run it from a checkout with
+// `npm run doctor` (or `node scripts/doctor.mjs`). May be released publicly later.
+//
+// Reports the live state of the system as it pertains to the MCP server: which
+// CLI backend is active, whether the gemini / agy executables are installed
+// (path + version), the effective model / approval / timeout configuration, and
+// every related environment variable — for debugging and at-a-glance awareness.
 //
 // Self-contained: pure Node, no build step or dependencies. The constant names
 // below mirror src/constants.ts — keep them in sync.
@@ -22,7 +27,6 @@ const ENV = {
   FLASH_MODEL: "GEMINI_FLASH_MODEL",
 };
 const DEFAULT_FLASH_MODEL = "gemini-2.5-flash";
-const DEFAULT_TIMEOUT_MS = 30 * 60 * 1000;
 const APPROVAL_MODES = ["default", "auto_edit", "yolo", "plan"];
 
 const isWindows = process.platform === "win32";
@@ -174,12 +178,12 @@ if (!approval) {
   console.log(`  approval mode   ${c.yellow(approval)} ${WARN} not one of ${APPROVAL_MODES.join("/")} — will be ignored`);
 }
 const rawTimeout = (process.env[ENV.TIMEOUT_MS] || "").trim();
-let timeoutMs = DEFAULT_TIMEOUT_MS;
+let timeoutMs = 0; // disabled by default (1.1.6 parity: waits forever)
 if (rawTimeout) {
   const n = Number(rawTimeout);
   timeoutMs = Number.isFinite(n) && n > 0 ? n : 0;
 }
-console.log(`  timeout         ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? c.dim("  (GEMINI_MCP_TIMEOUT_MS)") : c.dim("  (default)")}`);
+console.log(`  timeout         ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? c.dim("  (GEMINI_MCP_TIMEOUT_MS)") : c.dim("  (default: disabled)")}`);
 
 // ── Environment variables ──────────────────────────────────────────────────--
 heading("Environment variables (this shell)");
diff --git a/src/utils/commandExecutor.ts b/src/utils/commandExecutor.ts
index 0fe6e6f..25a6aa7 100644
--- a/src/utils/commandExecutor.ts
+++ b/src/utils/commandExecutor.ts
@@ -124,8 +124,9 @@ export async function executeCommand(
     let isResolved = false;
     let lastReportedLength = 0;
 
-    // Release a genuinely hung child after the configured timeout (default 30m;
-    // GEMINI_MCP_TIMEOUT_MS overrides, 0 disables). SIGTERM first, then SIGKILL.
+    // Optional safety timeout to release a genuinely hung child. Disabled by
+    // default (1.1.6 parity: wait forever); set GEMINI_MCP_TIMEOUT_MS > 0 to
+    // enable. When it fires: SIGTERM first, then SIGKILL.
     const timeoutMs = resolveTimeoutMs();
     let timeoutHandle: NodeJS.Timeout | undefined;
     const clearTimer = () => {
diff --git a/src/utils/timeoutManager.test.ts b/src/utils/timeoutManager.test.ts
index f2f2f21..3f565f7 100644
--- a/src/utils/timeoutManager.test.ts
+++ b/src/utils/timeoutManager.test.ts
@@ -1,15 +1,19 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
-import { resolveTimeoutMs, DEFAULT_COMMAND_TIMEOUT_MS } from "./timeoutManager.js";
+import { resolveTimeoutMs, RECOMMENDED_TIMEOUT_MS } from "./timeoutManager.js";
 
-test("resolveTimeoutMs: default when unset or blank", () => {
-  assert.equal(resolveTimeoutMs({}), DEFAULT_COMMAND_TIMEOUT_MS);
-  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "" }), DEFAULT_COMMAND_TIMEOUT_MS);
-  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "   " }), DEFAULT_COMMAND_TIMEOUT_MS);
+test("resolveTimeoutMs: disabled by default when unset or blank (1.1.6 parity)", () => {
+  assert.equal(resolveTimeoutMs({}), 0);
+  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "" }), 0);
+  assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "   " }), 0);
 });
 
 test("resolveTimeoutMs: honours a positive override", () => {
   assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "5000" }), 5000);
+  assert.equal(
+    resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: String(RECOMMENDED_TIMEOUT_MS) }),
+    RECOMMENDED_TIMEOUT_MS,
+  );
 });
 
 test("resolveTimeoutMs: 0, negative, or invalid disables the timeout (returns 0)", () => {
diff --git a/src/utils/timeoutManager.ts b/src/utils/timeoutManager.ts
index 2764359..759ce00 100644
--- a/src/utils/timeoutManager.ts
+++ b/src/utils/timeoutManager.ts
@@ -1,19 +1,23 @@
 import { ENV } from "../constants.js";
 
-// Default per-command timeout. Large-codebase analyses can legitimately run for
-// many minutes (see STATUS_MESSAGES), so this is deliberately generous — it
-// exists to release a genuinely hung child process, not to cap normal work.
-// Override with GEMINI_MCP_TIMEOUT_MS (milliseconds); set it to 0 to disable.
-export const DEFAULT_COMMAND_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
+// Suggested value if you choose to enable the safety timeout. This is NOT applied
+// automatically — see resolveTimeoutMs below. 30 minutes is deliberately generous:
+// large-codebase analyses can legitimately run for many minutes, so it exists to
+// release a genuinely hung child, not to cap normal work.
+export const RECOMMENDED_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
 
 /**
- * Resolve the per-command timeout in milliseconds from the environment, falling
- * back to {@link DEFAULT_COMMAND_TIMEOUT_MS}. A value of 0 — or any negative /
- * non-numeric value — disables the timeout and returns 0.
+ * Resolve the per-command timeout in milliseconds from the environment.
+ *
+ * Parity with 1.1.6: there is NO timeout by default. The MCP server historically
+ * waited indefinitely for the child CLI, so when GEMINI_MCP_TIMEOUT_MS is unset or
+ * blank we return 0 (disabled) to preserve that behaviour exactly. The timeout is
+ * strictly opt-in: a positive value enables it; 0, negative, or non-numeric values
+ * also disable it (return 0).
  */
 export function resolveTimeoutMs(env: NodeJS.ProcessEnv = process.env): number {
   const raw = env[ENV.TIMEOUT_MS];
-  if (raw === undefined || raw.trim() === "") return DEFAULT_COMMAND_TIMEOUT_MS;
+  if (raw === undefined || raw.trim() === "") return 0; // disabled (1.1.6 parity)
   const parsed = Number(raw);
   if (!Number.isFinite(parsed) || parsed <= 0) return 0; // disabled / invalid
   return parsed;

From 7eb45a722cb82f7c80fec57dee8fca58662071b8 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sun, 31 May 2026 03:35:48 -0700
Subject: [PATCH 7/8] feat: .env config loading + doctor setup wizard (env file
 & Claude Code)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Server: load recognised GEMINI_* keys from a .env at startup as global
per-install defaults (src/utils/envFile.ts), scoped to known keys and never
overriding env already set by the shell or the MCP client. No .env → no
change (1.1.6 parity). Documented in configuration.md.

Doctor report: distinguish GLOBAL settings (shell env or loaded .env —
shown in gold as "(set globally)") from PER-CLIENT values read out of each
Claude Code server's env block in ~/.claude.json.

Doctor: add `npm run doctor setup` — an interactive wizard that walks each
option (current value + recommended default; skip / set / pick-from-list,
curated model list per backend, model skipped for agy) and applies choices
to the repo .env and/or a chosen Claude Code server (backs up ~/.claude.json
first). Uses a queue-based line reader so prompts work with both a TTY and
piped input.

Tests: parseEnv unit coverage (23 passing).
---
 CHANGELOG.md                   |   3 +
 docs/concepts/configuration.md |  18 +
 scripts/doctor.mjs             | 671 +++++++++++++++++++++++++++------
 src/index.ts                   |  18 +-
 src/utils/envFile.test.ts      |  33 ++
 src/utils/envFile.ts           |  75 ++++
 6 files changed, 694 insertions(+), 124 deletions(-)
 create mode 100644 src/utils/envFile.test.ts
 create mode 100644 src/utils/envFile.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f669510..3b0f6c5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
 - **Per-command timeout (opt-in)** — set `GEMINI_MCP_TIMEOUT_MS` to a positive number of milliseconds to terminate a hung CLI call (SIGTERM → SIGKILL). **Disabled by default** to match 1.1.6, which waited indefinitely; unset or `0` keeps that behaviour.
 - **Windows executable resolution** — honours `GEMINI_CLI_PATH`, otherwise resolves the real `gemini` shim via `where` (preferring `.cmd`), fixing "command not found" when the MCP server doesn't inherit your shell's PATH.
 - **Configurable default model** — `GEMINI_MODEL` sets the model used when a call doesn't pass one, so the assistant can't silently fall back to an older model (#49); `GEMINI_FLASH_MODEL` overrides the quota-fallback target. Precedence: per-call `model` arg → `GEMINI_MODEL` → Gemini CLI default.
+- **`.env` support** — the server loads recognised `GEMINI_*` keys from a `.env` file (package root, then cwd) at startup as global per-install defaults. Opt-in: only known keys are read, an already-set shell export or MCP-client env always wins, and no `.env` means no change (1.1.6 parity).
 - **Test suite** — `node:test` coverage for the `@file` security guard, Windows quoting/resolution, approval-mode and session argument building, backend selection, and timeout parsing (`npm test`).
 
 ### Changed
@@ -25,6 +26,8 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
 ### Internal
 - **Per-call timeout default flipped to off** — `GEMINI_MCP_TIMEOUT_MS` now defaults to disabled (waits forever, exactly like 1.1.6) instead of 30 minutes; the timeout is strictly opt-in. `resolveTimeoutMs` returns `0` when unset/blank.
 - **Setup doctor kept as an unpublished dev tool** — `scripts/doctor.mjs` (run via `npm run doctor`) prints the live system state relevant to the MCP server — active backend, detected `gemini`/`agy` installs (path + version), effective model/approval/timeout config, and every related env var — for debugging and at-a-glance awareness. Intentionally removed from the npm `bin` and published `files`, so it ships with the repo but **not** the package; may be released publicly later.
+  - Reports the source of each setting: a **global** value (this shell's env or the loaded `.env`, shown in gold as `(set globally)`) affects every client, vs a **per-client** value read from each Claude Code MCP server's `env` block in `~/.claude.json`.
+  - Adds `npm run doctor setup` — an interactive wizard that walks each option (showing the current value + recommended default; skip / set / pick-from-list, with a curated model list per backend and model selection skipped for `agy`) and applies the result to the `.env` file and/or a chosen Claude Code server (backing up `~/.claude.json` first).
 
 ## [1.1.6] - 2026-05-30
 _Emergency security patch — the CVE-2026-0755 fix only, ahead of this 1.2.0 release._
diff --git a/docs/concepts/configuration.md b/docs/concepts/configuration.md
index b5e76bc..84fb407 100644
--- a/docs/concepts/configuration.md
+++ b/docs/concepts/configuration.md
@@ -38,6 +38,24 @@ claude mcp add gemini-cli -e GEMINI_MCP_APPROVAL_MODE=plan -- npx -y gemini-mcp-
 
 ---
 
+## `.env` file <Badge text="1.2.0" type="tip" />
+
+Instead of (or in addition to) per-client config, the server reads a `.env` file at startup as a **global default** for the install. On launch it looks for a `.env` next to the package, then in the working directory, and loads the recognised keys below.
+
+```bash
+# .env
+GEMINI_MODEL=gemini-2.5-pro
+GEMINI_MCP_TIMEOUT_MS=1800000
+```
+
+- Only the documented `GEMINI_*` keys are read — an unrelated `.env` can't inject other variables.
+- A value already set in the shell, or in a client's own `env` block, **overrides** the `.env` (the `.env` only fills the gaps).
+- No `.env` means no change in behaviour.
+
+**Precedence:** shell / client `env` → `.env` file → built-in default.
+
+---
+
 ## Default Model <Badge text="1.2.0" type="tip" />
 
 By default the model is chosen per request (natural language or the `model` argument); if none is given, the Gemini CLI uses its own default. Set `GEMINI_MODEL` to pin a default so the assistant can't fall back to an older model ([issue #49](https://github.com/jamubc/gemini-mcp-tool/issues/49)):
diff --git a/scripts/doctor.mjs b/scripts/doctor.mjs
index 8cb246a..8840492 100755
--- a/scripts/doctor.mjs
+++ b/scripts/doctor.mjs
@@ -5,29 +5,39 @@
 // it ships with the repo but NOT the npm package. Run it from a checkout with
 // `npm run doctor` (or `node scripts/doctor.mjs`). May be released publicly later.
 //
-// Reports the live state of the system as it pertains to the MCP server: which
-// CLI backend is active, whether the gemini / agy executables are installed
-// (path + version), the effective model / approval / timeout configuration, and
-// every related environment variable — for debugging and at-a-glance awareness.
+//   npm run doctor          → report the live system state for the MCP server
+//   npm run doctor setup     → interactive wizard to change configuration
+//
+// Reports which CLI backend is active, whether the gemini / agy executables are
+// installed (path + version), the effective model / approval / timeout config,
+// and where each setting comes from: a GLOBAL value (shell export or the loaded
+// .env — affects every client, shown in gold) vs a PER-CLIENT value set in a
+// client's MCP config (e.g. Claude Code). The `setup` wizard walks each option
+// and writes your choices to the .env file and/or a Claude Code server.
 //
 // Self-contained: pure Node, no build step or dependencies. The constant names
 // below mirror src/constants.ts — keep them in sync.
 
 import { spawnSync } from "node:child_process";
-import { existsSync } from "node:fs";
+import { existsSync, readFileSync, writeFileSync, copyFileSync, renameSync } from "node:fs";
 import os from "node:os";
 import path from "node:path";
+import { fileURLToPath } from "node:url";
+import readline from "node:readline/promises";
 
 const ENV = {
   BACKEND: "GEMINI_MCP_BACKEND",
+  MODEL: "GEMINI_MODEL",
+  FLASH_MODEL: "GEMINI_FLASH_MODEL",
   APPROVAL_MODE: "GEMINI_MCP_APPROVAL_MODE",
   TIMEOUT_MS: "GEMINI_MCP_TIMEOUT_MS",
   GEMINI_CLI_PATH: "GEMINI_CLI_PATH",
-  MODEL: "GEMINI_MODEL",
-  FLASH_MODEL: "GEMINI_FLASH_MODEL",
 };
+const KEYS = Object.values(ENV);
 const DEFAULT_FLASH_MODEL = "gemini-2.5-flash";
 const APPROVAL_MODES = ["default", "auto_edit", "yolo", "plan"];
+const GEMINI_MODELS = ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-3-pro-preview"];
+const FLASH_MODELS = ["gemini-2.5-flash", "gemini-2.5-flash-lite"];
 
 const isWindows = process.platform === "win32";
 const useColor = process.stdout.isTTY && !process.env.NO_COLOR;
@@ -39,46 +49,129 @@ const c = {
   yellow: (s) => paint("33", s),
   red: (s) => paint("31", s),
   cyan: (s) => paint("36", s),
+  gold: (s) => paint("1;33", s), // bold yellow ≈ gold: marks GLOBAL settings
 };
 const OK = c.green("✓");
 const WARN = c.yellow("⚠");
 const BAD = c.red("✗");
 
 const problems = [];
+const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
+
+// ── .env (global per-install config) ───────────────────────────────────────--
+function parseEnv(content) {
+  const out = {};
+  for (const rawLine of content.split(/\r?\n/)) {
+    const line = rawLine.trim();
+    if (!line || line.startsWith("#")) continue;
+    const eq = line.indexOf("=");
+    if (eq === -1) continue;
+    let key = line.slice(0, eq).trim();
+    if (key.startsWith("export ")) key = key.slice(7).trim();
+    let val = line.slice(eq + 1).trim();
+    if (val.length >= 2 && ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'")))) {
+      val = val.slice(1, -1);
+    }
+    out[key] = val;
+  }
+  return out;
+}
+const envFilePath = path.join(repoRoot, ".env");
+function readRepoEnv() {
+  if (!existsSync(envFilePath)) return {};
+  try {
+    return parseEnv(readFileSync(envFilePath, "utf8"));
+  } catch {
+    return {};
+  }
+}
+function writeRepoEnv(map) {
+  const header = [
+    "# gemini-mcp-tool configuration — written by `npm run doctor setup`.",
+    "# Loaded by the server at startup as GLOBAL defaults for this install.",
+    "# A shell export or an MCP client's own env block overrides anything here.",
+    "",
+  ];
+  const lines = [];
+  for (const key of KEYS) {
+    const v = map[key];
+    if (v === undefined || v === "") continue;
+    const needsQuote = /\s/.test(v) || v === "";
+    lines.push(`${key}=${needsQuote ? JSON.stringify(v) : v}`);
+  }
+  const content = header.join("\n") + lines.join("\n") + "\n";
+  const tmp = `${envFilePath}.tmp-${process.pid}`;
+  writeFileSync(tmp, content, "utf8");
+  renameSync(tmp, envFilePath);
+}
+
+// Capture the shell environment BEFORE loading .env, so we can tell a true
+// global export apart from a value that merely came from the .env file.
+const shellSnapshot = {};
+for (const key of KEYS) shellSnapshot[key] = process.env[key];
+
+// Reflect server behaviour: load recognised keys from .env without overriding
+// anything already exported in the shell.
+const repoEnv = readRepoEnv();
+for (const key of KEYS) {
+  if ((repoEnv[key] ?? "") !== "" && (process.env[key] ?? "") === "") process.env[key] = repoEnv[key];
+}
+
+// ── Claude Code config (per-client) ──────────────────────────────────────────
+const claudeConfigPath = path.join(os.homedir(), ".claude.json");
+function readClaudeConfig() {
+  if (!existsSync(claudeConfigPath)) return null;
+  try {
+    return JSON.parse(readFileSync(claudeConfigPath, "utf8"));
+  } catch (e) {
+    return null;
+  }
+}
+function looksLikeGemini(name, cfg) {
+  const blob = JSON.stringify(cfg || {});
+  return /gemini/i.test(name) || /gemini-mcp-tool|dist\/index\.js/.test(blob);
+}
+// Enumerate gemini MCP servers across user + project scopes (no health checks).
+function findGeminiServers(json) {
+  const servers = [];
+  if (json?.mcpServers) {
+    for (const [name, cfg] of Object.entries(json.mcpServers)) {
+      if (looksLikeGemini(name, cfg)) servers.push({ scope: "user", project: null, name, cfg });
+    }
+  }
+  if (json?.projects) {
+    for (const [project, pcfg] of Object.entries(json.projects)) {
+      const ms = pcfg?.mcpServers;
+      if (!ms) continue;
+      for (const [name, cfg] of Object.entries(ms)) {
+        if (looksLikeGemini(name, cfg)) servers.push({ scope: "local", project, name, cfg });
+      }
+    }
+  }
+  return servers;
+}
 
+// ── shell helpers (CLI detection) ──────────────────────────────────────────--
 function runCmd(cmd, args) {
   try {
-    const r = spawnSync(cmd, args, {
-      encoding: "utf8",
-      timeout: 20000,
-      shell: isWindows, // .cmd shims on Windows need a shell
-      windowsHide: true,
-    });
+    const r = spawnSync(cmd, args, { encoding: "utf8", timeout: 20000, shell: isWindows, windowsHide: true });
     if (r.error) return { ok: false, err: r.error.message };
-    return {
-      ok: r.status === 0,
-      status: r.status,
-      out: (r.stdout || "").trim(),
-      err: (r.stderr || "").trim(),
-    };
+    return { ok: r.status === 0, status: r.status, out: (r.stdout || "").trim(), err: (r.stderr || "").trim() };
   } catch (e) {
     return { ok: false, err: e instanceof Error ? e.message : String(e) };
   }
 }
-
 function locate(cmd) {
   const r = runCmd(isWindows ? "where" : "which", [cmd]);
   if (!r.ok || !r.out) return [];
   return r.out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
 }
-
 function detectCli(cmd, { honourEnvPath = false } = {}) {
   const override = honourEnvPath ? (process.env[ENV.GEMINI_CLI_PATH] || "").trim() : "";
   let candidates = locate(cmd);
   if (override) candidates = [override, ...candidates.filter((p) => p !== override)];
   const primary = override || candidates[0] || null;
   const found = candidates.length > 0 || (override && existsSync(override));
-
   let version = null;
   if (found) {
     const v = runCmd(cmd, ["--version"]);
@@ -88,116 +181,458 @@ function detectCli(cmd, { honourEnvPath = false } = {}) {
   return { found: !!found, primary, candidates, override: override || null, version, ext };
 }
 
-function envLine(key, { fallback = c.dim("(unset)"), mask = false } = {}) {
-  const raw = process.env[key];
-  if (raw === undefined || raw === "") return `${key} = ${fallback}`;
-  return `${key} = ${c.cyan(mask ? "********" : raw)}`;
-}
-
 function humanizeMs(ms) {
   if (ms === 0) return "disabled (waits forever)";
   if (ms % 60000 === 0) return `${ms / 60000} min`;
   if (ms % 1000 === 0) return `${ms / 1000} s`;
   return `${ms} ms`;
 }
-
 function heading(title) {
   console.log("\n" + c.bold(title));
   console.log(c.dim("─".repeat(Math.max(title.length, 16))));
 }
+// Where a globally-effective value came from (shell export vs .env), or null.
+function globalSourceLabel(key) {
+  if ((shellSnapshot[key] ?? "") !== "") return c.gold("(set globally)");
+  if ((repoEnv[key] ?? "") !== "") return c.gold("(from .env)");
+  return null;
+}
 
-// ── System ───────────────────────────────────────────────────────────────────
-heading("System");
-console.log(`  node      ${process.version}`);
-console.log(`  platform  ${process.platform} (${process.arch})`);
+function resolveBackend(val) {
+  const b = (val || "gemini").trim().toLowerCase();
+  return b === "agy" || b === "antigravity" ? "agy" : "gemini";
+}
 
-// ── Backend selection ──────────────────────────────────────────────────────--
-const rawBackend = (process.env[ENV.BACKEND] || "gemini").trim().toLowerCase();
-const backend = rawBackend === "agy" || rawBackend === "antigravity" ? "agy" : "gemini";
-heading("Active backend");
-console.log(`  ${ENV.BACKEND} = ${process.env[ENV.BACKEND] ? c.cyan(process.env[ENV.BACKEND]) : c.dim("(unset → gemini)")}`);
-console.log(`  → using: ${c.bold(backend)}${backend === "agy" ? c.yellow("  (experimental)") : ""}`);
-if (process.env[ENV.BACKEND] && backend === "gemini" && rawBackend !== "gemini") {
-  console.log(`  ${WARN} unrecognised value ${JSON.stringify(process.env[ENV.BACKEND])} — defaulting to gemini`);
+// Robust line reader. readline's rl.question can drop buffered lines and stall
+// when stdin is a pipe (not a TTY); this queues 'line' events so prompts work
+// for both interactive use and scripted/piped input. EOF yields null.
+function createLineReader() {
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+  const buffered = [];
+  const waiters = [];
+  let closed = false;
+  rl.on("line", (line) => {
+    if (waiters.length) waiters.shift()(line);
+    else buffered.push(line);
+  });
+  rl.on("close", () => {
+    closed = true;
+    while (waiters.length) waiters.shift()(null);
+  });
+  return {
+    next() {
+      if (buffered.length) return Promise.resolve(buffered.shift());
+      if (closed) return Promise.resolve(null);
+      return new Promise((resolve) => waiters.push(resolve));
+    },
+    close() {
+      rl.close();
+    },
+  };
 }
 
-// ── Gemini CLI ─────────────────────────────────────────────────────────────--
-heading("Gemini CLI");
-const gemini = detectCli("gemini", { honourEnvPath: true });
-if (gemini.found) {
-  console.log(`  ${OK} found${gemini.override ? " (via " + ENV.GEMINI_CLI_PATH + ")" : ""}`);
-  console.log(`     path     ${gemini.primary}${gemini.ext ? c.dim("  [" + gemini.ext + "]") : ""}`);
-  console.log(`     version  ${gemini.version ? c.cyan(gemini.version) : c.yellow("(could not read --version)")}`);
-  if (gemini.candidates.length > 1) {
-    console.log(c.dim(`     also on PATH: ${gemini.candidates.slice(1).join(", ")}`));
+// ─────────────────────────────────────────────────────────────────────────────
+// REPORT
+// ─────────────────────────────────────────────────────────────────────────────
+function runReport() {
+  heading("System");
+  console.log(`  node      ${process.version}`);
+  console.log(`  platform  ${process.platform} (${process.arch})`);
+
+  const backend = resolveBackend(process.env[ENV.BACKEND]);
+  heading("Active backend");
+  const bSrc = globalSourceLabel(ENV.BACKEND);
+  console.log(`  ${ENV.BACKEND} = ${process.env[ENV.BACKEND] ? c.cyan(process.env[ENV.BACKEND]) : c.dim("(unset → gemini)")}${bSrc ? "  " + bSrc : ""}`);
+  console.log(`  → using: ${c.bold(backend)}${backend === "agy" ? c.yellow("  (experimental)") : ""}`);
+
+  heading("Gemini CLI");
+  const gemini = detectCli("gemini", { honourEnvPath: true });
+  if (gemini.found) {
+    console.log(`  ${OK} found${gemini.override ? " (via " + ENV.GEMINI_CLI_PATH + ")" : ""}`);
+    console.log(`     path     ${gemini.primary}${gemini.ext ? c.dim("  [" + gemini.ext + "]") : ""}`);
+    console.log(`     version  ${gemini.version ? c.cyan(gemini.version) : c.yellow("(could not read --version)")}`);
+    if (gemini.candidates.length > 1) console.log(c.dim(`     also on PATH: ${gemini.candidates.slice(1).join(", ")}`));
+  } else {
+    console.log(`  ${BAD} not found on PATH`);
+    if (backend === "gemini") problems.push(`Gemini CLI not found. Install it (npm i -g @google/gemini-cli) or set ${ENV.GEMINI_CLI_PATH} to its full path.`);
   }
+
+  heading("Antigravity CLI (agy)");
+  const agy = detectCli("agy");
+  const agyDataDir = path.join(os.homedir(), ".gemini", "antigravity-cli");
+  if (agy.found) {
+    console.log(`  ${OK} found`);
+    console.log(`     path     ${agy.primary}`);
+    console.log(`     version  ${agy.version ? c.cyan(agy.version) : c.yellow("(could not read --version)")}`);
+    console.log(`     data dir ${existsSync(agyDataDir) ? OK + " " + agyDataDir : WARN + " missing (run `agy -i` once to authenticate)"}`);
+  } else {
+    console.log(`  ${c.dim("not installed")} ${c.dim("— optional; the future backend once Gemini CLI retires 2026-06-18")}`);
+    if (backend === "agy") problems.push("GEMINI_MCP_BACKEND=agy but the agy executable was not found on PATH.");
+  }
+
+  heading("Model configuration");
+  const defaultModel = (process.env[ENV.MODEL] || "").trim();
+  const flashModel = (process.env[ENV.FLASH_MODEL] || "").trim() || DEFAULT_FLASH_MODEL;
+  const mSrc = globalSourceLabel(ENV.MODEL);
+  const fSrc = globalSourceLabel(ENV.FLASH_MODEL);
+  console.log(`  default model   ${defaultModel ? c.cyan(defaultModel) + (mSrc ? "  " + mSrc : "") : c.dim("(Gemini CLI's own default; pass model: or set GEMINI_MODEL)")}`);
+  console.log(`  flash fallback  ${c.cyan(flashModel)}${fSrc ? "  " + fSrc : c.dim("  (default)")}`);
+  if (backend === "agy") console.log(`  ${WARN} agy print-mode ignores model selection (hardcoded to Gemini 3.5 Flash)`);
+
+  heading("Behaviour");
+  const approval = (process.env[ENV.APPROVAL_MODE] || "").trim();
+  const aSrc = globalSourceLabel(ENV.APPROVAL_MODE);
+  if (!approval) {
+    console.log(`  approval mode   ${c.dim("(unset → no flag; plain Q&A)")}`);
+  } else if (APPROVAL_MODES.includes(approval)) {
+    console.log(`  approval mode   ${c.cyan(approval)}${aSrc ? "  " + aSrc : ""}`);
+    if (approval === "plan") console.log(`  ${WARN} 'plan' makes Gemini an autonomous planner in headless mode — not ideal for plain Q&A`);
+  } else {
+    console.log(`  approval mode   ${c.yellow(approval)} ${WARN} not one of ${APPROVAL_MODES.join("/")} — will be ignored`);
+  }
+  const rawTimeout = (process.env[ENV.TIMEOUT_MS] || "").trim();
+  let timeoutMs = 0; // disabled by default (1.1.6 parity: waits forever)
+  if (rawTimeout) {
+    const n = Number(rawTimeout);
+    timeoutMs = Number.isFinite(n) && n > 0 ? n : 0;
+  }
+  const tSrc = globalSourceLabel(ENV.TIMEOUT_MS);
+  console.log(`  timeout         ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? (tSrc ? "  " + tSrc : "") : c.dim("  (default: disabled)")}`);
+
+  // ── Configuration sources: global vs per-client ──────────────────────────
+  heading("Configuration sources");
+  const json = readClaudeConfig();
+  const servers = json ? findGeminiServers(json) : [];
+  for (const key of KEYS) {
+    const shellVal = shellSnapshot[key];
+    const fileVal = repoEnv[key];
+    let line;
+    if ((shellVal ?? "") !== "") line = `${c.gold("●")} ${key} = ${c.cyan(shellVal)}  ${c.gold("(set globally)")}`;
+    else if ((fileVal ?? "") !== "") line = `${c.gold("●")} ${key} = ${c.cyan(fileVal)}  ${c.gold("(global, from .env)")}`;
+    else line = `${c.dim("○")} ${key} = ${c.dim("(not set globally)")}`;
+    console.log("  " + line);
+    // Per-client values from Claude Code servers.
+    for (const s of servers) {
+      const v = s.cfg?.env?.[key];
+      if (v === undefined || v === "") continue;
+      const loc = s.scope === "user" ? "user" : `local:${path.basename(s.project || "")}`;
+      console.log(`      ${c.dim("└ per-client")} ${s.name} ${c.dim("[" + loc + "]")} = ${c.cyan(v)}`);
+    }
+  }
+  console.log(c.dim(`\n  ${c.gold("gold")} = global (this shell's env / the loaded .env) — affects every client.`));
+  console.log(c.dim(`  per-client = set in a client's MCP config; that client uses its own value.`));
+  console.log(c.dim(`  .env: ${existsSync(envFilePath) ? envFilePath : "(none — run `npm run doctor setup` to create one)"}`));
+  if (json === null) console.log(c.dim(`  Claude Code config not read (${claudeConfigPath} missing or unparseable).`));
+
+  heading("Summary");
+  if (problems.length === 0) {
+    console.log(`  ${OK} ${c.green("No problems detected.")} Active backend '${backend}' looks ready.`);
+  } else {
+    console.log(`  ${BAD} ${c.red(`${problems.length} issue(s) found:`)}`);
+    for (const p of problems) console.log(`     - ${p}`);
+  }
+  console.log(c.dim(`\n  Tip: run \`npm run doctor setup\` to change configuration.`));
+  console.log("");
+  process.exit(problems.length === 0 ? 0 : 1);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// SETUP WIZARD
+// ─────────────────────────────────────────────────────────────────────────────
+async function runSetup() {
+  const reader = createLineReader();
+  const prompt = async (str) => {
+    process.stdout.write(str);
+    const line = await reader.next();
+    return line === null ? "" : line.trim();
+  };
+  const ask = async (q, def) => {
+    const v = await prompt(`  ${q}${def ? ` [${def}]` : ""}: `);
+    return v || def || "";
+  };
+  const confirm = async (q, def = false) => {
+    const a = (await prompt(`  ${q} ${def ? "[Y/n]" : "[y/N]"}: `)).toLowerCase();
+    if (a === "") return def;
+    return a === "y" || a === "yes";
+  };
+
+  // Present a menu. Returns { action: "set"|"unset"|"skip", value? }.
+  async function selectOption({ title, currentDisplay, recommendedDisplay, choices, allowCustom, customLabel, customPrompt, allowUnset, unsetLabel }) {
+    console.log("\n" + c.bold(title));
+    console.log("  " + c.dim(`current: ${currentDisplay}   ·   recommended: ${recommendedDisplay}`));
+    choices.forEach((ch, i) => console.log(`    ${i + 1}) ${ch.label}`));
+    if (allowCustom) console.log(`    c) ${customLabel || "enter a custom value"}`);
+    if (allowUnset) console.log(`    u) ${unsetLabel || "unset"}`);
+    console.log(`    s) skip — keep current`);
+    const ans = (await prompt(`  choose [s]: `)).toLowerCase();
+    if (ans === "" || ans === "s") return { action: "skip" };
+    if (ans === "u" && allowUnset) return { action: "unset" };
+    if (ans === "c" && allowCustom) {
+      const v = (await ask(customPrompt || "value")).trim();
+      return v ? { action: "set", value: v } : { action: "skip" };
+    }
+    const idx = Number(ans) - 1;
+    if (Number.isInteger(idx) && choices[idx]) return { action: "set", value: choices[idx].value };
+    console.log(c.yellow("    unrecognised — skipping"));
+    return { action: "skip" };
+  }
+
+  console.log(c.bold("\ngemini-mcp-tool · setup"));
+  console.log(c.dim("Walk each setting: pick a value, enter a custom one, unset it, or skip to keep current."));
+  console.log(c.dim("Nothing is written until you confirm at the end.\n"));
+
+  const effective = (key) => (process.env[key] || "").trim();
+  const changes = {}; // key -> { action, value? }
+
+  // 1) Backend
+  {
+    const cur = resolveBackend(process.env[ENV.BACKEND]);
+    const r = await selectOption({
+      title: "Backend",
+      currentDisplay: cur,
+      recommendedDisplay: "gemini",
+      choices: [
+        { label: "gemini — the Gemini CLI (default)", value: "gemini" },
+        { label: "agy — Antigravity CLI (experimental)", value: "agy" },
+      ],
+    });
+    if (r.action !== "skip") changes[ENV.BACKEND] = r;
+  }
+  const effBackend = resolveBackend(changes[ENV.BACKEND]?.value ?? process.env[ENV.BACKEND]);
+
+  // 2) Default model (skipped for agy — print-mode is Flash-only)
+  if (effBackend === "agy") {
+    console.log("\n" + c.bold("Default model"));
+    console.log(`  ${WARN} ${c.dim("agy print-mode ignores model selection (Flash-only) — skipping.")}`);
+  } else {
+    const cur = effective(ENV.MODEL) || "(unset → Gemini CLI default)";
+    const r = await selectOption({
+      title: "Default model",
+      currentDisplay: cur,
+      recommendedDisplay: "(unset → Gemini CLI default)",
+      choices: GEMINI_MODELS.map((m) => ({ label: m, value: m })),
+      allowCustom: true,
+      customLabel: "enter a custom model id",
+      customPrompt: "model id",
+      allowUnset: true,
+      unsetLabel: "unset — let the Gemini CLI choose",
+    });
+    if (r.action !== "skip") changes[ENV.MODEL] = r;
+  }
+
+  // 3) Flash fallback model
+  {
+    const cur = effective(ENV.FLASH_MODEL) || `${DEFAULT_FLASH_MODEL} (default)`;
+    const r = await selectOption({
+      title: "Flash fallback model (used on quota fallback)",
+      currentDisplay: cur,
+      recommendedDisplay: DEFAULT_FLASH_MODEL,
+      choices: FLASH_MODELS.map((m) => ({ label: m, value: m })),
+      allowCustom: true,
+      customLabel: "enter a custom model id",
+      customPrompt: "model id",
+      allowUnset: true,
+      unsetLabel: `unset — use default (${DEFAULT_FLASH_MODEL})`,
+    });
+    if (r.action !== "skip") changes[ENV.FLASH_MODEL] = r;
+  }
+
+  // 4) Approval mode
+  {
+    const cur = effective(ENV.APPROVAL_MODE) || "(unset → no flag; plain Q&A)";
+    const r = await selectOption({
+      title: "Approval mode",
+      currentDisplay: cur,
+      recommendedDisplay: "(unset)",
+      choices: APPROVAL_MODES.map((m) => ({ label: m + (m === "plan" ? "  — autonomous planner (not for plain Q&A)" : ""), value: m })),
+      allowUnset: true,
+      unsetLabel: "unset — no flag (recommended for plain Q&A)",
+    });
+    if (r.action !== "skip") changes[ENV.APPROVAL_MODE] = r;
+  }
+
+  // 5) Timeout
+  {
+    const raw = effective(ENV.TIMEOUT_MS);
+    const cur = raw ? `${raw} ms` : "disabled (waits forever)";
+    const r = await selectOption({
+      title: "Per-call timeout",
+      currentDisplay: cur,
+      recommendedDisplay: "disabled (matches 1.1.6)",
+      choices: [
+        { label: "disabled — wait forever (matches 1.1.6)", value: "__disable__" },
+        { label: "1800000  (30 minutes)", value: "1800000" },
+        { label: "600000   (10 minutes)", value: "600000" },
+      ],
+      allowCustom: true,
+      customLabel: "enter milliseconds",
+      customPrompt: "timeout in ms (positive integer)",
+    });
+    if (r.action === "set") {
+      if (r.value === "__disable__") changes[ENV.TIMEOUT_MS] = { action: "unset" };
+      else {
+        const n = Number(r.value);
+        if (Number.isFinite(n) && n > 0) changes[ENV.TIMEOUT_MS] = { action: "set", value: String(Math.trunc(n)) };
+        else console.log(c.yellow("    not a positive number — skipping timeout"));
+      }
+    }
+  }
+
+  // 6) Gemini executable path
+  {
+    const cur = effective(ENV.GEMINI_CLI_PATH) || "(auto-detect)";
+    const r = await selectOption({
+      title: "Gemini executable path (GEMINI_CLI_PATH)",
+      currentDisplay: cur,
+      recommendedDisplay: "(auto-detect)",
+      choices: [{ label: "set a full path to the gemini executable", value: "__custom__" }],
+      allowUnset: true,
+      unsetLabel: "unset — auto-detect from PATH",
+    });
+    if (r.action === "unset") changes[ENV.GEMINI_CLI_PATH] = { action: "unset" };
+    else if (r.action === "set") {
+      const v = (await ask("full path")).trim();
+      if (v) changes[ENV.GEMINI_CLI_PATH] = { action: "set", value: v };
+    }
+  }
+
+  // ── Review ────────────────────────────────────────────────────────────────
+  const changedKeys = Object.keys(changes);
+  heading("Review");
+  if (changedKeys.length === 0) {
+    console.log(c.dim("  No changes selected. Nothing to do."));
+    reader.close();
+    return;
+  }
+  for (const key of changedKeys) {
+    const ch = changes[key];
+    const before = effective(key) || c.dim("(unset)");
+    const after = ch.action === "unset" ? c.yellow("(unset)") : c.cyan(ch.value);
+    console.log(`  ${key}: ${before} ${c.dim("→")} ${after}`);
+  }
+
+  // ── Apply target ────────────────────────────────────────────────────────--
+  const target = await selectOption({
+    title: "Where should these be applied?",
+    currentDisplay: "n/a",
+    recommendedDisplay: ".env (global default for this install)",
+    choices: [
+      { label: ".env file — global default loaded by the server", value: "env" },
+      { label: "Claude Code — a specific client server's env block", value: "claude" },
+      { label: "both", value: "both" },
+    ],
+  });
+  if (target.action !== "set") {
+    console.log(c.dim("\n  Cancelled — nothing written."));
+    reader.close();
+    return;
+  }
+
+  if (target.value === "env" || target.value === "both") {
+    const map = { ...readRepoEnv() };
+    for (const key of changedKeys) {
+      if (changes[key].action === "unset") delete map[key];
+      else map[key] = changes[key].value;
+    }
+    writeRepoEnv(map);
+    console.log(`\n  ${OK} wrote ${c.cyan(envFilePath)}`);
+    if (changedKeys.some((k) => (shellSnapshot[k] ?? "") !== "")) {
+      console.log(`  ${WARN} ${c.dim("some keys are also exported in your shell — that export overrides .env.")}`);
+    }
+  }
+
+  if (target.value === "claude" || target.value === "both") {
+    await applyToClaudeCode(changes, changedKeys, { prompt, ask, confirm });
+  }
+
+  console.log("");
+  reader.close();
+}
+
+async function applyToClaudeCode(changes, changedKeys, { prompt, ask, confirm }) {
+  const json = readClaudeConfig();
+  if (!json) {
+    console.log(`\n  ${BAD} Claude Code config not found/parseable at ${claudeConfigPath} — skipping Claude Code.`);
+    return;
+  }
+  const servers = findGeminiServers(json);
+  console.log("\n" + c.bold("Claude Code — pick a server to update"));
+  servers.forEach((s, i) => {
+    const loc = s.scope === "user" ? "user" : `local:${s.project}`;
+    console.log(`    ${i + 1}) ${s.name}  ${c.dim("[" + loc + "]")}`);
+  });
+  console.log(`    n) enter a different name (create under this project if missing)`);
+  console.log(`    s) skip Claude Code`);
+  const ans = (await prompt(`  choose [s]: `)).toLowerCase();
+
+  let ref;
+  if (ans === "" || ans === "s") {
+    console.log(c.dim("  skipped Claude Code."));
+    return;
+  } else if (ans === "n") {
+    const name = (await ask("server name", "gemini-cli")).trim() || "gemini-cli";
+    // Search existing across scopes; else create under the current project (local).
+    const existing = servers.find((s) => s.name === name) ||
+      (json.mcpServers?.[name] && { scope: "user", project: null, name, cfg: json.mcpServers[name] }) ||
+      (json.projects?.[repoRoot]?.mcpServers?.[name] && { scope: "local", project: repoRoot, name, cfg: json.projects[repoRoot].mcpServers[name] });
+    if (existing) {
+      ref = existing;
+    } else {
+      console.log(c.dim(`  '${name}' not found — will create it under project ${repoRoot} (local scope) using \`npx -y gemini-mcp-tool\`.`));
+      if (!(await confirm("create it?", true))) return;
+      json.projects = json.projects || {};
+      json.projects[repoRoot] = json.projects[repoRoot] || {};
+      json.projects[repoRoot].mcpServers = json.projects[repoRoot].mcpServers || {};
+      json.projects[repoRoot].mcpServers[name] = { type: "stdio", command: "npx", args: ["-y", "gemini-mcp-tool"], env: {} };
+      ref = { scope: "local", project: repoRoot, name, cfg: json.projects[repoRoot].mcpServers[name] };
+    }
+  } else {
+    const idx = Number(ans) - 1;
+    if (!Number.isInteger(idx) || !servers[idx]) {
+      console.log(c.yellow("  unrecognised — skipping Claude Code."));
+      return;
+    }
+    ref = servers[idx];
+  }
+
+  // Merge env into the chosen server.
+  const target = ref.scope === "user" ? json.mcpServers[ref.name] : json.projects[ref.project].mcpServers[ref.name];
+  target.env = target.env || {};
+  for (const key of changedKeys) {
+    if (changes[key].action === "unset") delete target.env[key];
+    else target.env[key] = changes[key].value;
+  }
+
+  const loc = ref.scope === "user" ? "user" : `local:${ref.project}`;
+  console.log(`\n  Resulting env for ${c.cyan(ref.name)} ${c.dim("[" + loc + "]")}:`);
+  const entries = Object.entries(target.env);
+  if (entries.length === 0) console.log(c.dim("    (empty)"));
+  for (const [k, v] of entries) console.log(`    ${k} = ${c.cyan(v)}`);
+  console.log(c.dim(`  Editing ${claudeConfigPath} (a ${c.bold("backup")} will be written to .bak first).`));
+  if (!(await confirm("write this change?", true))) {
+    console.log(c.dim("  not written."));
+    return;
+  }
+
+  try {
+    copyFileSync(claudeConfigPath, claudeConfigPath + ".bak");
+    const tmp = `${claudeConfigPath}.tmp-${process.pid}`;
+    writeFileSync(tmp, JSON.stringify(json, null, 2) + "\n", "utf8");
+    renameSync(tmp, claudeConfigPath);
+    console.log(`  ${OK} updated ${c.cyan(ref.name)} in ${claudeConfigPath} ${c.dim("(backup: " + claudeConfigPath + ".bak)")}`);
+    console.log(`  ${WARN} ${c.dim("restart Claude Code to pick up the change (avoid editing while it's running).")}`);
+  } catch (e) {
+    console.log(`  ${BAD} failed to write config: ${e instanceof Error ? e.message : String(e)}`);
+  }
+}
+
+// ── dispatch ────────────────────────────────────────────────────────────────
+const mode = (process.argv[2] || "").toLowerCase();
+if (mode === "setup") {
+  runSetup().catch((e) => {
+    console.error(e instanceof Error ? e.message : String(e));
+    process.exit(1);
+  });
 } else {
-  console.log(`  ${BAD} not found on PATH`);
-  if (backend === "gemini") {
-    problems.push(
-      `Gemini CLI not found. Install it (npm i -g @google/gemini-cli) or set ${ENV.GEMINI_CLI_PATH} to its full path.`,
-    );
-  }
-}
-
-// ── Antigravity CLI (agy) ─────────────────────────────────────────────────--
-heading("Antigravity CLI (agy)");
-const agy = detectCli("agy");
-const agyDataDir = path.join(os.homedir(), ".gemini", "antigravity-cli");
-if (agy.found) {
-  console.log(`  ${OK} found`);
-  console.log(`     path     ${agy.primary}`);
-  console.log(`     version  ${agy.version ? c.cyan(agy.version) : c.yellow("(could not read --version)")}`);
-  console.log(`     data dir ${existsSync(agyDataDir) ? OK + " " + agyDataDir : WARN + " missing (run `agy -i` once to authenticate)"}`);
-} else {
-  console.log(`  ${c.dim("not installed")} ${c.dim("— optional; the future backend once Gemini CLI retires 2026-06-18")}`);
-  if (backend === "agy") {
-    problems.push("GEMINI_MCP_BACKEND=agy but the agy executable was not found on PATH.");
-  }
-}
-
-// ── Model configuration ───────────────────────────────────────────────────--
-heading("Model configuration");
-const defaultModel = (process.env[ENV.MODEL] || "").trim();
-const flashModel = (process.env[ENV.FLASH_MODEL] || "").trim() || DEFAULT_FLASH_MODEL;
-console.log(`  default model   ${defaultModel ? c.cyan(defaultModel) + c.dim("  (GEMINI_MODEL)") : c.dim("(Gemini CLI's own default; pass model: or set GEMINI_MODEL)")}`);
-console.log(`  flash fallback  ${c.cyan(flashModel)}${process.env[ENV.FLASH_MODEL] ? c.dim("  (GEMINI_FLASH_MODEL)") : c.dim("  (default)")}`);
-if (backend === "agy") {
-  console.log(`  ${WARN} agy print-mode ignores model selection (hardcoded to Gemini 3.5 Flash)`);
-}
-
-// ── Approval & timeout ─────────────────────────────────────────────────────--
-heading("Behaviour");
-const approval = (process.env[ENV.APPROVAL_MODE] || "").trim();
-if (!approval) {
-  console.log(`  approval mode   ${c.dim("(unset → no flag; plain Q&A)")}`);
-} else if (APPROVAL_MODES.includes(approval)) {
-  console.log(`  approval mode   ${c.cyan(approval)}`);
-  if (approval === "plan") console.log(`  ${WARN} 'plan' makes Gemini an autonomous planner in headless mode — not ideal for plain Q&A`);
-} else {
-  console.log(`  approval mode   ${c.yellow(approval)} ${WARN} not one of ${APPROVAL_MODES.join("/")} — will be ignored`);
-}
-const rawTimeout = (process.env[ENV.TIMEOUT_MS] || "").trim();
-let timeoutMs = 0; // disabled by default (1.1.6 parity: waits forever)
-if (rawTimeout) {
-  const n = Number(rawTimeout);
-  timeoutMs = Number.isFinite(n) && n > 0 ? n : 0;
-}
-console.log(`  timeout         ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? c.dim("  (GEMINI_MCP_TIMEOUT_MS)") : c.dim("  (default: disabled)")}`);
-
-// ── Environment variables ──────────────────────────────────────────────────--
-heading("Environment variables (this shell)");
-for (const key of Object.values(ENV)) console.log("  " + envLine(key));
-console.log(c.dim("\n  Note: your MCP client sets its own env for the server process — these are"));
-console.log(c.dim("  the values in the shell running this doctor, which may differ."));
-
-// ── Summary ────────────────────────────────────────────────────────────────--
-heading("Summary");
-if (problems.length === 0) {
-  console.log(`  ${OK} ${c.green("No problems detected.")} Active backend '${backend}' looks ready.`);
-} else {
-  console.log(`  ${BAD} ${c.red(`${problems.length} issue(s) found:`)}`);
-  for (const p of problems) console.log(`     - ${p}`);
+  runReport();
 }
-console.log("");
-process.exit(problems.length === 0 ? 0 : 1);
diff --git a/src/index.ts b/src/index.ts
index a1d10ee..32b17f4 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -18,16 +18,22 @@ import {
 } from "@modelcontextprotocol/sdk/types.js";
 import { readFileSync } from "node:fs";
 import { Logger } from "./utils/logger.js";
+import { loadEnvFile } from "./utils/envFile.js";
 import { PROTOCOL, ToolArguments } from "./constants.js";
 
-import { 
-  getToolDefinitions, 
-  getPromptDefinitions, 
-  executeTool, 
-  toolExists, 
-  getPromptMessage 
+import {
+  getToolDefinitions,
+  getPromptDefinitions,
+  executeTool,
+  toolExists,
+  getPromptMessage
 } from "./tools/index.js";
 
+// Load the optional .env (global per-install config from `npm run doctor setup`)
+// before anything reads process.env. No-op when no .env is present; never
+// overrides env already set by the shell or the MCP client.
+loadEnvFile();
+
 // Read the version from package.json at runtime so it never drifts from the
 // published version (it previously hardcoded an out-of-date "1.1.4").
 const pkg = JSON.parse(
diff --git a/src/utils/envFile.test.ts b/src/utils/envFile.test.ts
new file mode 100644
index 0000000..03d6e68
--- /dev/null
+++ b/src/utils/envFile.test.ts
@@ -0,0 +1,33 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { parseEnv } from "./envFile.js";
+
+test("parseEnv: basic KEY=VALUE pairs", () => {
+  const r = parseEnv("GEMINI_MODEL=gemini-2.5-pro\nGEMINI_MCP_TIMEOUT_MS=1800000");
+  assert.equal(r.GEMINI_MODEL, "gemini-2.5-pro");
+  assert.equal(r.GEMINI_MCP_TIMEOUT_MS, "1800000");
+});
+
+test("parseEnv: skips blanks and # comments", () => {
+  const r = parseEnv("# a comment\n\n  # indented comment\nGEMINI_MODEL=x\n");
+  assert.deepEqual(Object.keys(r), ["GEMINI_MODEL"]);
+  assert.equal(r.GEMINI_MODEL, "x");
+});
+
+test("parseEnv: strips one layer of matching quotes and honours `export`", () => {
+  const r = parseEnv(`export GEMINI_MODEL="gemini 2.5"\nGEMINI_CLI_PATH='/a/b c/gemini'`);
+  assert.equal(r.GEMINI_MODEL, "gemini 2.5");
+  assert.equal(r.GEMINI_CLI_PATH, "/a/b c/gemini");
+});
+
+test("parseEnv: keeps '=' inside values and trims surrounding whitespace", () => {
+  const r = parseEnv("  GEMINI_MODEL = a=b=c  \nGEMINI_FLASH_MODEL=  flash  ");
+  assert.equal(r.GEMINI_MODEL, "a=b=c");
+  assert.equal(r.GEMINI_FLASH_MODEL, "flash");
+});
+
+test("parseEnv: ignores malformed lines without '='", () => {
+  const r = parseEnv("NOT_AN_ASSIGNMENT\nGEMINI_MODEL=ok");
+  assert.equal(r.NOT_AN_ASSIGNMENT, undefined);
+  assert.equal(r.GEMINI_MODEL, "ok");
+});
diff --git a/src/utils/envFile.ts b/src/utils/envFile.ts
new file mode 100644
index 0000000..ecf5535
--- /dev/null
+++ b/src/utils/envFile.ts
@@ -0,0 +1,75 @@
+import { readFileSync, existsSync } from "node:fs";
+import * as path from "node:path";
+import { fileURLToPath } from "node:url";
+import { ENV } from "../constants.js";
+import { Logger } from "./logger.js";
+
+// Only these recognised keys are imported from a .env — never arbitrary keys —
+// so an unrelated .env sitting in the launch directory can't inject variables
+// into the server process.
+const KNOWN_KEYS: ReadonlySet<string> = new Set(Object.values(ENV));
+
+export function parseEnv(content: string): Record<string, string> {
+  const out: Record<string, string> = {};
+  for (const rawLine of content.split(/\r?\n/)) {
+    const line = rawLine.trim();
+    if (!line || line.startsWith("#")) continue;
+    const eq = line.indexOf("=");
+    if (eq === -1) continue;
+    let key = line.slice(0, eq).trim();
+    if (key.startsWith("export ")) key = key.slice(7).trim();
+    let val = line.slice(eq + 1).trim();
+    // Strip a single layer of matching surrounding quotes.
+    if (
+      val.length >= 2 &&
+      ((val.startsWith('"') && val.endsWith('"')) ||
+        (val.startsWith("'") && val.endsWith("'")))
+    ) {
+      val = val.slice(1, -1);
+    }
+    out[key] = val;
+  }
+  return out;
+}
+
+function candidatePaths(): string[] {
+  // dist/utils/envFile.js → package root is two levels up.
+  const here = path.dirname(fileURLToPath(import.meta.url));
+  const packageRoot = path.resolve(here, "..", "..");
+  const cwd = process.cwd();
+  const paths = [path.join(packageRoot, ".env")];
+  if (path.resolve(cwd) !== packageRoot) paths.push(path.join(cwd, ".env"));
+  return paths;
+}
+
+/**
+ * Load recognised GEMINI_* config keys from a `.env` file into `process.env`,
+ * WITHOUT overriding anything already set — a shell export or the MCP client's
+ * own `env` block always wins. Only the keys in {@link ENV} are imported.
+ *
+ * This is the "global" (per-install) config source written by `npm run doctor
+ * setup`. It is a no-op when no `.env` exists, so installs that don't use one
+ * behave exactly as before (1.1.6 parity).
+ */
+export function loadEnvFile(): void {
+  for (const p of candidatePaths()) {
+    if (!existsSync(p)) continue;
+    let parsed: Record<string, string>;
+    try {
+      parsed = parseEnv(readFileSync(p, "utf8"));
+    } catch (e) {
+      Logger.warn(`Could not read ${p}: ${e instanceof Error ? e.message : String(e)}`);
+      continue;
+    }
+    let applied = 0;
+    for (const [k, v] of Object.entries(parsed)) {
+      if (!KNOWN_KEYS.has(k)) continue;
+      // Don't override an already-set value (shell/client env wins over .env).
+      if (process.env[k] !== undefined && process.env[k] !== "") continue;
+      process.env[k] = v;
+      applied++;
+    }
+    if (applied > 0) Logger.debug(`Loaded ${applied} setting(s) from ${p}`);
+    return; // first existing .env wins
+  }
+}

From 32b855fc087c7b6b72dd7dc7b17e007e19c09839 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sun, 31 May 2026 18:27:10 -0700
Subject: [PATCH 8/8] test: categorized suite (unit/integration/e2e) + CI
 gating

Restructure into test/{unit,integration,e2e}: migrate the 7 colocated unit
tests; add changeMode pipeline / chunkCache / registry / brainstorm unit
coverage; hermetic integration tests (changeMode->fetch-chunk flow, registry
-> tool contract); and a live e2e suite driving the real gemini through the
MCP server (auto-skips without gemini). Category-aware runner, NODE_ENV=test
log muting, tsconfig.test.json typecheck, CI gating on Node 18/20/22.

Reference branch for the phased re-derivation off main.
---
 .github/workflows/ci.yml                      |  21 ++-
 package.json                                  |   7 +-
 scripts/run-tests.mjs                         |  65 ++++++-
 src/tools/brainstorm.tool.ts                  |   4 +-
 src/utils/logger.ts                           |  10 ++
 test/README.md                                |  85 +++++++++
 test/e2e/ask-gemini.e2e.test.ts               |  75 ++++++++
 test/e2e/fixtures/sentinel.txt                |   3 +
 test/e2e/harness.ts                           | 165 ++++++++++++++++++
 test/e2e/server.e2e.test.ts                   |  62 +++++++
 test/integration/changeMode-pipeline.test.ts  |  77 ++++++++
 test/integration/tool-contract.test.ts        |  51 ++++++
 {src => test/unit}/backends/agy.test.ts       |   2 +-
 {src => test/unit}/backends/gemini.test.ts    |   2 +-
 {src => test/unit}/backends/index.test.ts     |   2 +-
 test/unit/tools/brainstorm.test.ts            |  61 +++++++
 test/unit/tools/registry.test.ts              |  63 +++++++
 test/unit/utils/changeModeChunker.test.ts     |  73 ++++++++
 test/unit/utils/changeModeParser.test.ts      |  89 ++++++++++
 test/unit/utils/changeModeTranslator.test.ts  |  72 ++++++++
 test/unit/utils/chunkCache.test.ts            |  96 ++++++++++
 .../unit}/utils/commandExecutor.test.ts       |   2 +-
 {src => test/unit}/utils/envFile.test.ts      |   2 +-
 .../unit}/utils/geminiExecutor.test.ts        |   2 +-
 .../unit}/utils/timeoutManager.test.ts        |   2 +-
 tsconfig.test.json                            |   8 +
 26 files changed, 1073 insertions(+), 28 deletions(-)
 create mode 100644 test/README.md
 create mode 100644 test/e2e/ask-gemini.e2e.test.ts
 create mode 100644 test/e2e/fixtures/sentinel.txt
 create mode 100644 test/e2e/harness.ts
 create mode 100644 test/e2e/server.e2e.test.ts
 create mode 100644 test/integration/changeMode-pipeline.test.ts
 create mode 100644 test/integration/tool-contract.test.ts
 rename {src => test/unit}/backends/agy.test.ts (92%)
 rename {src => test/unit}/backends/gemini.test.ts (98%)
 rename {src => test/unit}/backends/index.test.ts (92%)
 create mode 100644 test/unit/tools/brainstorm.test.ts
 create mode 100644 test/unit/tools/registry.test.ts
 create mode 100644 test/unit/utils/changeModeChunker.test.ts
 create mode 100644 test/unit/utils/changeModeParser.test.ts
 create mode 100644 test/unit/utils/changeModeTranslator.test.ts
 create mode 100644 test/unit/utils/chunkCache.test.ts
 rename {src => test/unit}/utils/commandExecutor.test.ts (97%)
 rename {src => test/unit}/utils/envFile.test.ts (95%)
 rename {src => test/unit}/utils/geminiExecutor.test.ts (91%)
 rename {src => test/unit}/utils/timeoutManager.test.ts (90%)
 create mode 100644 tsconfig.test.json

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f865fbe..acfe29d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,22 +12,27 @@ jobs:
     
     strategy:
       matrix:
-        node-version: [16.x, 18.x, 20.x]
-    
+        # node:test (used by the suite) requires Node >= 18.
+        node-version: [18.x, 20.x, 22.x]
+
     steps:
     - uses: actions/checkout@v4
-    
+
     - name: Use Node.js ${{ matrix.node-version }}
       uses: actions/setup-node@v4
       with:
         node-version: ${{ matrix.node-version }}
-    
+
     - name: Install dependencies
       run: npm ci
-    
+
     - name: Build
       run: npm run build
-    
+
+    - name: Type-check (source + tests)
+      run: npm run lint
+
+    # Hermetic suite only (unit + integration). The e2e suite needs an
+    # authenticated gemini CLI and is run on demand via `npm run test:e2e`.
     - name: Run tests
-      run: npm test
-      continue-on-error: true
\ No newline at end of file
+      run: npm test
\ No newline at end of file
diff --git a/package.json b/package.json
index efa471a..c124fbf 100644
--- a/package.json
+++ b/package.json
@@ -13,8 +13,11 @@
     "start": "node dist/index.js",
     "dev": "tsc && node dist/index.js",
     "doctor": "node scripts/doctor.mjs",
-    "test": "node scripts/run-tests.mjs",
-    "lint": "tsc --noEmit",
+    "test": "node scripts/run-tests.mjs unit integration",
+    "test:unit": "node scripts/run-tests.mjs unit",
+    "test:integration": "node scripts/run-tests.mjs integration",
+    "test:e2e": "npm run build && node scripts/run-tests.mjs e2e",
+    "lint": "tsc -p tsconfig.test.json",
     "contribute": "tsx src/contribute.ts",
     "prepublishOnly": "echo '⚠️  Remember to test locally first!' && npm run build",
     "docs:dev": "vitepress dev docs",
diff --git a/scripts/run-tests.mjs b/scripts/run-tests.mjs
index d1a978f..f671b6e 100644
--- a/scripts/run-tests.mjs
+++ b/scripts/run-tests.mjs
@@ -1,16 +1,47 @@
 #!/usr/bin/env node
-// Discover and run every *.test.ts under src/ with the built-in node:test
-// runner, using the tsx loader so the TypeScript sources run directly.
+// Category-aware test runner. Discovers *.test.ts under the selected category
+// folders (test/unit, test/integration, test/e2e) and runs them with the
+// built-in node:test runner via the tsx loader, so the TypeScript sources run
+// directly.
+//
+// Usage:
+//   node scripts/run-tests.mjs                  # default: unit + integration (hermetic)
+//   node scripts/run-tests.mjs unit             # one category
+//   node scripts/run-tests.mjs integration e2e  # several
+//   node scripts/run-tests.mjs all              # unit + integration + e2e
+//
+// Categories:
+//   unit         pure, single-module tests. No subprocess, no network, no real CLI.
+//   integration  several real modules wired together. Still hermetic — never the real gemini CLI.
+//   e2e          the real gemini CLI driven through the real MCP server over stdio. Opt-in (live).
 import { spawnSync } from "node:child_process";
-import { readdirSync, statSync } from "node:fs";
+import { readdirSync, statSync, existsSync } from "node:fs";
 import path from "node:path";
 import { fileURLToPath } from "node:url";
 
 const scriptDir = path.dirname(fileURLToPath(import.meta.url));
-const srcDir = path.join(scriptDir, "..", "src");
+const testDir = path.join(scriptDir, "..", "test");
+
+const KNOWN = ["unit", "integration", "e2e"];
+const DEFAULT = ["unit", "integration"]; // the hermetic suite `npm test` runs and CI gates on
+
+function resolveCategories(argv) {
+  const args = argv.slice(2).map((a) => a.toLowerCase());
+  if (args.length === 0) return DEFAULT;
+  if (args.includes("all")) return KNOWN;
+  const unknown = args.filter((a) => !KNOWN.includes(a));
+  if (unknown.length > 0) {
+    console.error(`Unknown test category: ${unknown.join(", ")}`);
+    console.error(`Valid categories: ${KNOWN.join(", ")}, all`);
+    process.exit(2);
+  }
+  // De-dupe while preserving the documented order.
+  return KNOWN.filter((c) => args.includes(c));
+}
 
 function findTests(dir) {
   const found = [];
+  if (!existsSync(dir)) return found;
   for (const entry of readdirSync(dir)) {
     const full = path.join(dir, entry);
     if (statSync(full).isDirectory()) found.push(...findTests(full));
@@ -19,21 +50,37 @@ function findTests(dir) {
   return found;
 }
 
-const tests = findTests(srcDir);
+const categories = resolveCategories(process.argv);
+const tests = categories.flatMap((c) => findTests(path.join(testDir, c)));
+
 if (tests.length === 0) {
-  console.log("No test files found.");
+  console.log(`No test files found for: ${categories.join(", ")}`);
   process.exit(0);
 }
 
+console.log(`Running ${tests.length} test file(s) [${categories.join(", ")}]`);
+
 // tsx is loaded via `--import` on Node >= 20.6, and the older `--loader` flag
-// below that (the engines floor is >=18, where `--import` may be unavailable).
+// below that.
 const [major, minor] = process.versions.node.split(".").map(Number);
 const supportsImport = major > 20 || (major === 20 && minor >= 6);
 const loaderArgs = supportsImport ? ["--import", "tsx"] : ["--loader", "tsx"];
 
+// Mute routine [GMCPT] logging for the hermetic categories so the reporter
+// output stays readable. The e2e suite keeps full server logs (its child
+// server process inherits this env), which is useful for debugging live calls.
+const env = { ...process.env };
+if (!categories.includes("e2e")) env.NODE_ENV = "test";
+
+// Run test files serially (--test-concurrency=1). The changeMode chunk cache is
+// a single shared on-disk dir (os.tmpdir()/gemini-mcp-chunks); files that touch
+// it (chunkCache, changeMode-pipeline) would otherwise race across parallel
+// worker processes. Serial e2e also avoids hitting the gemini quota in parallel.
+// The hermetic suite is tiny, so the cost is negligible. (Flag available on the
+// Node 18.19+/20.10+/22 versions CI runs.)
 const result = spawnSync(
   process.execPath,
-  [...loaderArgs, "--test", ...tests],
-  { stdio: "inherit" },
+  [...loaderArgs, "--test", "--test-concurrency=1", ...tests],
+  { stdio: "inherit", env },
 );
 process.exit(result.status ?? 1);
diff --git a/src/tools/brainstorm.tool.ts b/src/tools/brainstorm.tool.ts
index e5680d9..fac8418 100644
--- a/src/tools/brainstorm.tool.ts
+++ b/src/tools/brainstorm.tool.ts
@@ -4,7 +4,7 @@ import { Logger } from '../utils/logger.js';
 import { executeGeminiCLI } from '../utils/geminiExecutor.js';
 import { type ApprovalMode } from '../constants.js';
 
-function buildBrainstormPrompt(config: {
+export function buildBrainstormPrompt(config: {
   prompt: string;
   methodology: string;
   domain?: string;
@@ -67,7 +67,7 @@ Begin brainstorming session:`;
 /**
  * Returns methodology-specific instructions for structured brainstorming
  */
-function getMethodologyInstructions(methodology: string, domain?: string): string {
+export function getMethodologyInstructions(methodology: string, domain?: string): string {
   const methodologies: Record<string, string> = {
     'divergent': `**Divergent Thinking Approach:**
 - Generate maximum quantity of ideas without self-censoring
diff --git a/src/utils/logger.ts b/src/utils/logger.ts
index e5be641..35269f2 100644
--- a/src/utils/logger.ts
+++ b/src/utils/logger.ts
@@ -6,11 +6,20 @@ export class Logger {
     return `${LOG_PREFIX} ${message}` + "\n";
   }
 
+  // Routine logging is muted when NODE_ENV=test so the test reporter output
+  // stays readable; errors are never muted. Production never sets NODE_ENV=test,
+  // so default (1.1.6-parity) behaviour is unchanged.
+  private static get muted(): boolean {
+    return process.env.NODE_ENV === "test";
+  }
+
   static log(message: string, ...args: any[]): void {
+    if (this.muted) return;
     console.warn(this.formatMessage(message), ...args);
   }
 
   static warn(message: string, ...args: any[]): void {
+    if (this.muted) return;
     console.warn(this.formatMessage(message), ...args);
   }
 
@@ -19,6 +28,7 @@ export class Logger {
   }
 
   static debug(message: string, ...args: any[]): void {
+    if (this.muted) return;
     console.warn(this.formatMessage(message), ...args);
   }
 
diff --git a/test/README.md b/test/README.md
new file mode 100644
index 0000000..392d407
--- /dev/null
+++ b/test/README.md
@@ -0,0 +1,85 @@
+# Tests
+
+The suite is split into three categories by **how much of the real world they touch**. Each lives in its own folder and runs with the built-in [`node:test`](https://nodejs.org/api/test.html) runner via the `tsx` loader (no extra test framework).
+
+| Category | Folder | Touches the real gemini CLI? | Runs in CI? | Command |
+|---|---|---|---|---|
+| **unit** | `test/unit/` | No | Yes (gates merges) | `npm run test:unit` |
+| **integration** | `test/integration/` | No | Yes (gates merges) | `npm run test:integration` |
+| **e2e** | `test/e2e/` | **Yes — the real CLI** | No (opt-in, local) | `npm run test:e2e` |
+
+```bash
+npm test              # unit + integration (the hermetic, CI-gating suite)
+npm run test:unit
+npm run test:integration
+npm run test:e2e      # builds, then drives the REAL gemini CLI through the MCP server
+node scripts/run-tests.mjs all   # everything (unit + integration + e2e)
+```
+
+## What goes where
+
+### `unit/` — pure, single-module
+Fast, deterministic tests of one module's logic. **No subprocess, no network, no real CLI.**
+Mirrors `src/` (`test/unit/utils/...`, `test/unit/backends/...`, `test/unit/tools/...`).
+Examples: argument builders, model/approval/timeout resolution, the changeMode
+parser/chunker/translator, the chunk cache, the registry's schema/prompt helpers,
+brainstorm prompt construction.
+
+### `integration/` — several real modules wired together
+Still **hermetic** — it never invokes the real gemini CLI. The "Gemini output" is a
+fixture string fed into the real downstream pipeline. Covers the cross-module flows a
+user actually hits:
+- the full **changeMode pipeline**: response string → parse → validate → chunk → cache →
+  `fetch-chunk` retrieval of later chunks;
+- the **registry → tool contract**: argument validation surfaced as friendly errors, and
+  every tool guard/error branch that resolves *without* calling Gemini.
+
+> Integration tests must **not** spawn the gemini CLI. Anything that needs a real model
+> response belongs in `e2e/`.
+
+### `e2e/` — the real product, end to end
+Spawns the **built MCP server** (`dist/index.js`) over stdio and connects with the MCP
+SDK client — exactly how Claude / mcpjam do. Tool calls exercise the whole path:
+protocol → registry → tool → backend → spawned **gemini** CLI. This is the automated
+replacement for manual mcpjam testing.
+
+- Gemini-dependent tests **auto-skip** when the `gemini` CLI is not on `PATH`, so the
+  suite degrades gracefully. The non-gemini tools (`ping`, `timeout-test`, `fetch-chunk`,
+  `tools/list`, `prompts/list`) always run.
+- `npm run test:e2e` builds first, so it tests exactly what ships.
+- Live model calls are slow and use your gemini quota; the model is pinned to
+  `gemini-2.5-flash` and each test has a generous timeout.
+- Every E2E MCP response is printed as a `node:test` diagnostic by default, so
+  passing results still show the exact raw response that each assertion checked.
+- Shared setup (spawning/closing the server, `gemini` detection, reading tool text) lives
+  in `test/e2e/harness.ts`.
+
+## Adding a test
+
+1. Pick the category by the table above. If it needs a real model answer, it's `e2e`.
+2. Create `test/<category>/<area>.test.ts` (e2e files are named `*.e2e.test.ts`).
+3. Use `node:test` + `node:assert/strict`:
+
+   ```ts
+   import { test } from "node:test";
+   import assert from "node:assert/strict";
+   import { thing } from "../../../src/utils/thing.js"; // unit: 3 levels up to src/
+
+   test("does the thing", () => {
+     assert.equal(thing(1), 2);
+   });
+   ```
+
+   For e2e, drive the server via the harness:
+
+   ```ts
+   import { startServer, textOf, GEMINI_SKIP } from "./harness.js";
+   // ...callTool, then assert on textOf(result)
+   ```
+
+4. Keep `unit`/`integration` hermetic. Run `npm test` (and `npm run lint` to type-check).
+
+## Notes
+- `npm test` sets `NODE_ENV=test`, which mutes routine `[GMCPT]` logging (errors still
+  print) so the reporter output stays readable. The e2e run keeps full server logs.
+- `npm run lint` type-checks `src/` **and** `test/` via `tsconfig.test.json`.
diff --git a/test/e2e/ask-gemini.e2e.test.ts b/test/e2e/ask-gemini.e2e.test.ts
new file mode 100644
index 0000000..313c770
--- /dev/null
+++ b/test/e2e/ask-gemini.e2e.test.ts
@@ -0,0 +1,75 @@
+import { test, before, after } from "node:test";
+import assert from "node:assert/strict";
+import { callGemini, callTool, startServer, textOf, GEMINI_SKIP, type ServerHandle } from "./harness.js";
+
+// LIVE tests: these drive the real gemini CLI through the real MCP server. They
+// auto-skip when gemini is not on PATH, so the suite degrades gracefully. Real
+// model calls are slow, hence the generous per-test timeout. Model is pinned to
+// flash for speed and to spare the pro daily quota.
+const LIVE = { skip: GEMINI_SKIP, timeout: 120_000 } as const;
+const MODEL = "gemini-2.5-flash";
+
+let server: ServerHandle;
+
+before(async () => {
+  server = await startServer();
+});
+after(async () => {
+  await server?.close();
+});
+
+test("ask-gemini answers a deterministic factual question", LIVE, async (t) => {
+  const { isError, text } = await callGemini(t, server, {
+    name: "ask-gemini",
+    arguments: { prompt: "What is 2 + 2? Reply with only the number.", model: MODEL },
+  });
+  assert.equal(isError, false, text);
+  assert.match(text, /Gemini response:/); // the tool's wrapper is always present
+  assert.match(text, /\b4\b/); // ...and the model actually answered
+});
+
+test("ask-gemini echoes the session id so a follow-up can resume it", LIVE, async (t) => {
+  // Unique per run: gemini persists sessions to disk, so a fixed id collides
+  // ("Session ID already exists") on the next run. The [session: …] marker is
+  // added by the tool itself, so asserting on this exact id is deterministic.
+  const sessionId = `e2e-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
+  const res = await callTool(t, server, {
+    name: "ask-gemini",
+    arguments: { prompt: "Reply with the single word: ok", model: MODEL, sessionId },
+  });
+  const text = textOf(res);
+  assert.equal(res.isError ?? false, false, text);
+  assert.ok(text.includes(`[session: ${sessionId}]`), text);
+});
+
+test("ask-gemini inlines an in-project @file reference", LIVE, async (t) => {
+  const { isError, text } = await callGemini(t, server, {
+    name: "ask-gemini",
+    arguments: {
+      prompt:
+        "@test/e2e/fixtures/sentinel.txt Reply with only the sentinel token that appears in this file.",
+      model: MODEL,
+    },
+  });
+  assert.equal(isError, false, text);
+  assert.match(text, /BANANA_SENTINEL_42/);
+});
+
+test("Help returns the gemini CLI help text", LIVE, async (t) => {
+  const res = await callTool(t, server, { name: "Help", arguments: {} });
+  const text = textOf(res);
+  assert.equal(res.isError ?? false, false, text);
+  assert.match(text, /usage|--model|gemini/i);
+});
+
+// brainstorm generates free-form ideas: the slowest call, and nondeterministic
+// (flash can even return empty). Its prompt construction is unit-tested, and its
+// integration path is identical to ask-gemini (proven above), so here we only
+// verify the live round-trip succeeds end-to-end. Larger timeout, single attempt.
+test("brainstorm completes a real round-trip through gemini", { skip: GEMINI_SKIP, timeout: 180_000 }, async (t) => {
+  const res = await callTool(t, server, {
+    name: "brainstorm",
+    arguments: { prompt: "one quick way to speed up CI", model: MODEL, ideaCount: 1, includeAnalysis: false },
+  });
+  assert.equal(res.isError ?? false, false, textOf(res));
+});
diff --git a/test/e2e/fixtures/sentinel.txt b/test/e2e/fixtures/sentinel.txt
new file mode 100644
index 0000000..107d5f6
--- /dev/null
+++ b/test/e2e/fixtures/sentinel.txt
@@ -0,0 +1,3 @@
+This fixture is read by the e2e @file test.
+The sentinel token is BANANA_SENTINEL_42.
+If you can read this line, the @file inlining worked.
diff --git a/test/e2e/harness.ts b/test/e2e/harness.ts
new file mode 100644
index 0000000..556a9fc
--- /dev/null
+++ b/test/e2e/harness.ts
@@ -0,0 +1,165 @@
+// Shared harness for the live e2e suite. Spawns the REAL MCP server (the built
+// dist/index.js) over stdio and connects with the MCP SDK client — the same way
+// a real client (Claude, mcpjam, etc.) does. Tool calls therefore exercise the
+// entire product: protocol -> registry -> tool -> backend -> spawned gemini CLI.
+//
+// This file is intentionally not named *.test.ts so the runner does not execute
+// it directly.
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
+import { execSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import path from "node:path";
+import type { TestContext } from "node:test";
+import { inspect } from "node:util";
+import { fileURLToPath } from "node:url";
+
+const here = path.dirname(fileURLToPath(import.meta.url));
+export const REPO_ROOT = path.resolve(here, "..", "..");
+const SERVER_ENTRY = path.join(REPO_ROOT, "dist", "index.js");
+
+/** True when the real gemini CLI is installed and resolvable on PATH. */
+export function hasGemini(): boolean {
+  try {
+    execSync("gemini --version", { stdio: "ignore" });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/** Skip reason for gemini-dependent tests, or false when gemini is available. */
+export const GEMINI_SKIP: string | false = hasGemini()
+  ? false
+  : "gemini CLI not on PATH — run `npm i -g @google/gemini-cli` and authenticate";
+
+export interface ServerHandle {
+  client: Client;
+  close: () => Promise<void>;
+}
+
+/** Start the built MCP server and return a connected client. */
+export async function startServer(extraEnv: Record<string, string> = {}): Promise<ServerHandle> {
+  if (!existsSync(SERVER_ENTRY)) {
+    throw new Error(
+      `Server entry not found at ${SERVER_ENTRY}. Run \`npm run build\` first ` +
+        `(\`npm run test:e2e\` does this for you).`,
+    );
+  }
+
+  // Pass the parent environment through (PATH so gemini resolves, HOME so the
+  // gemini auth/config is found), plus any per-test overrides.
+  const env: Record<string, string> = {};
+  for (const [k, v] of Object.entries(process.env)) {
+    if (typeof v === "string") env[k] = v;
+  }
+  Object.assign(env, extraEnv);
+
+  const transport = new StdioClientTransport({
+    command: process.execPath,
+    args: [SERVER_ENTRY],
+    env,
+    stderr: "inherit", // surface server logs/errors during e2e runs
+  });
+  const client = new Client({ name: "gmcpt-e2e", version: "0.0.0" }, { capabilities: {} });
+  await client.connect(transport);
+
+  return { client, close: () => transport.close() };
+}
+
+function jsonReplacer(_key: string, value: unknown): unknown {
+  if (value instanceof Error) {
+    const raw: Record<string, unknown> = {
+      name: value.name,
+      message: value.message,
+    };
+    for (const key of Object.getOwnPropertyNames(value)) {
+      if (key !== "stack") raw[key] = (value as unknown as Record<string, unknown>)[key];
+    }
+    return raw;
+  }
+  return value;
+}
+
+function formatRaw(value: unknown): string {
+  try {
+    const json = JSON.stringify(value, jsonReplacer, 2);
+    if (json !== undefined) return json;
+  } catch {
+    // Fall back to inspect for unexpected non-JSON SDK objects.
+  }
+  return inspect(value, {
+    depth: null,
+    maxArrayLength: null,
+    maxStringLength: null,
+    breakLength: 100,
+  });
+}
+
+export function rawResponse(t: TestContext, label: string, value: unknown): void {
+  t.diagnostic(`${label} raw response:\n${formatRaw(value)}`);
+}
+
+export async function listTools(t: TestContext, server: ServerHandle) {
+  const result = await server.client.listTools();
+  rawResponse(t, "tools/list", result);
+  return result;
+}
+
+export async function listPrompts(t: TestContext, server: ServerHandle) {
+  const result = await server.client.listPrompts();
+  rawResponse(t, "prompts/list", result);
+  return result;
+}
+
+export async function callTool(
+  t: TestContext,
+  server: ServerHandle,
+  params: Parameters<Client["callTool"]>[0],
+) {
+  const result = await server.client.callTool(params);
+  rawResponse(t, `tools/call ${params.name}`, result);
+  return result;
+}
+
+/**
+ * Call a tool whose assertions depend on the live MODEL output, retrying on a
+ * transient empty/errored response (the model occasionally returns nothing).
+ * This verifies we eventually get a *valid* response without masking a real,
+ * persistent failure. Each attempt's raw response is printed. Use plain
+ * `callTool` for tools whose checks are deterministic (e.g. the session marker,
+ * `gemini --help`) so they aren't retried needlessly.
+ */
+export async function callGemini(
+  t: TestContext,
+  server: ServerHandle,
+  params: Parameters<Client["callTool"]>[0],
+  retries = 2,
+): Promise<{ isError: boolean; text: string }> {
+  let isError = false;
+  let text = "";
+  for (let attempt = 1; attempt <= retries + 1; attempt++) {
+    const res = await server.client.callTool(params);
+    isError = (res as { isError?: boolean }).isError ?? false;
+    text = textOf(res);
+    rawResponse(t, `tools/call ${params.name}${attempt > 1 ? ` (attempt ${attempt})` : ""}`, res);
+    if (!isError && text.trim().length > 0) break;
+    if (attempt <= retries) {
+      t.diagnostic(`${params.name}: empty/errored response — retrying (${attempt}/${retries})`);
+    }
+  }
+  return { isError, text };
+}
+
+/**
+ * Concatenate the text parts of a tool result. Typed as `unknown` because the
+ * SDK's callTool return is a union (the back-compat shape has no `content`);
+ * we narrow structurally here.
+ */
+export function textOf(result: unknown): string {
+  const content = (result as { content?: Array<{ type?: string; text?: string }> }).content ?? [];
+  return content
+    .filter((c) => c?.type === "text" && typeof c.text === "string")
+    .map((c) => c.text as string)
+    .join("\n");
+}
diff --git a/test/e2e/server.e2e.test.ts b/test/e2e/server.e2e.test.ts
new file mode 100644
index 0000000..2824d98
--- /dev/null
+++ b/test/e2e/server.e2e.test.ts
@@ -0,0 +1,62 @@
+import { test, before, after } from "node:test";
+import assert from "node:assert/strict";
+import { callTool, listPrompts, listTools, rawResponse, startServer, textOf, type ServerHandle } from "./harness.js";
+
+// Server + protocol + the tools that do NOT need the gemini CLI. These run
+// anywhere the project is built, with no gemini install or network.
+let server: ServerHandle;
+
+before(async () => {
+  server = await startServer();
+});
+after(async () => {
+  await server?.close();
+});
+
+test("lists every registered tool with a valid input schema", async (t) => {
+  const { tools } = await listTools(t, server);
+  const names = tools.map((t) => t.name);
+  for (const expected of ["ask-gemini", "brainstorm", "fetch-chunk", "ping", "Help", "timeout-test"]) {
+    assert.ok(names.includes(expected), `tools/list is missing "${expected}" (got: ${names.join(", ")})`);
+  }
+  const ask = tools.find((t) => t.name === "ask-gemini");
+  assert.ok(ask);
+  assert.equal(ask!.inputSchema.type, "object");
+});
+
+test("lists prompts derived from the registry", async (t) => {
+  const { prompts } = await listPrompts(t, server);
+  assert.ok(prompts.map((p) => p.name).includes("ask-gemini"));
+});
+
+test("ping echoes a message back over the full MCP round-trip", async (t) => {
+  const res = await callTool(t, server, { name: "ping", arguments: { prompt: "hello-e2e" } });
+  assert.equal(res.isError ?? false, false);
+  assert.match(textOf(res), /hello-e2e/);
+});
+
+test("timeout-test runs and reports completion", async (t) => {
+  const res = await callTool(t, server, { name: "timeout-test", arguments: { duration: 50 } });
+  assert.equal(res.isError ?? false, false);
+  assert.match(textOf(res), /Timeout test completed successfully/);
+});
+
+test("fetch-chunk returns a clean cache-miss message for an unknown key", async (t) => {
+  const res = await callTool(t, server, {
+    name: "fetch-chunk",
+    arguments: { cacheKey: "00000000", chunkIndex: 1 },
+  });
+  assert.equal(res.isError ?? false, false);
+  assert.match(textOf(res), /Cache miss/);
+});
+
+test("an unknown tool name is reported as an error", async (t) => {
+  await assert.rejects(async () => {
+    try {
+      await server.client.callTool({ name: "not-a-real-tool", arguments: {} });
+    } catch (error) {
+      rawResponse(t, "tools/call not-a-real-tool thrown response", error);
+      throw error;
+    }
+  });
+});
diff --git a/test/integration/changeMode-pipeline.test.ts b/test/integration/changeMode-pipeline.test.ts
new file mode 100644
index 0000000..faf5ca7
--- /dev/null
+++ b/test/integration/changeMode-pipeline.test.ts
@@ -0,0 +1,77 @@
+import { test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+// This wires the full changeMode path that the ask-gemini tool drives for large
+// edits: a Gemini-style response string -> parse -> validate -> chunk -> cache,
+// then the fetch-chunk tool retrieving subsequent chunks. No CLI is involved —
+// the "Gemini output" is a fixture string, so the test is hermetic.
+import { processChangeModeOutput } from "../../src/utils/geminiExecutor.js";
+import { fetchChunkTool } from "../../src/tools/fetch-chunk.tool.js";
+import { clearCache } from "../../src/utils/chunkCache.js";
+
+const FENCE = "```";
+
+function block(file: string, line: number, oldCode: string, newCode: string): string {
+  return [`**FILE: ${file}:${line}**`, FENCE, "OLD:", oldCode, "NEW:", newCode, FENCE].join("\n");
+}
+
+// Four edits with large bodies (~6 KB each) exceed the 20 KB chunk budget,
+// forcing the response to be split and cached.
+function bigMultiEditResponse(): string {
+  const big = "a".repeat(6000);
+  return [
+    block("src/one.ts", 10, big, "one"),
+    block("src/two.ts", 20, big, "two"),
+    block("src/three.ts", 30, big, "three"),
+    block("src/four.ts", 40, big, "four"),
+  ].join("\n\n");
+}
+
+beforeEach(() => clearCache());
+afterEach(() => clearCache());
+
+test("a single-edit response renders one chunk with no continuation", async () => {
+  const out = await processChangeModeOutput(block("src/a.ts", 1, "const x = 1;", "const x = 2;"), undefined, undefined, "prompt-a");
+  assert.match(out, /CHANGEMODE OUTPUT/);
+  assert.ok(out.includes("const x = 2;"));
+  assert.doesNotMatch(out, /Chunk 1 of/); // single chunk => no chunk header
+});
+
+test("a large multi-edit response chunks, caches, and advertises fetch-chunk", async () => {
+  const first = await processChangeModeOutput(bigMultiEditResponse(), undefined, undefined, "prompt-big");
+  assert.match(first, /Chunk 1 of 2/);
+
+  // The continuation must surface a real 8-char cache key.
+  const m = first.match(/cacheKey="([a-f0-9]{8})"/);
+  assert.ok(m, "expected a fetch-chunk cacheKey in the first chunk");
+  const cacheKey = m![1];
+
+  // The fetch-chunk tool retrieves the next chunk from that key.
+  const second = await fetchChunkTool.execute({ cacheKey, chunkIndex: 2 });
+  assert.match(second, /Chunk 2 of 2/);
+
+  // ...and chunk 1 is still retrievable.
+  const again = await fetchChunkTool.execute({ cacheKey, chunkIndex: 1 });
+  assert.match(again, /Chunk 1 of 2/);
+});
+
+test("fetch-chunk reports an out-of-range index", async () => {
+  const first = await processChangeModeOutput(bigMultiEditResponse(), undefined, undefined, "prompt-range");
+  const cacheKey = first.match(/cacheKey="([a-f0-9]{8})"/)![1];
+  const out = await fetchChunkTool.execute({ cacheKey, chunkIndex: 99 });
+  assert.match(out, /Invalid chunk index/);
+});
+
+test("fetch-chunk reports a cache miss for an unknown (but well-formed) key", async () => {
+  const out = await fetchChunkTool.execute({ cacheKey: "00000000", chunkIndex: 1 });
+  assert.match(out, /Cache miss/);
+});
+
+test("fetch-chunk rejects a malformed cache key before touching the cache", async () => {
+  const out = await fetchChunkTool.execute({ cacheKey: "../../etc/passwd", chunkIndex: 1 });
+  assert.match(out, /Invalid cacheKey format/);
+});
+
+test("a response with no OLD/NEW edits yields a clear message", async () => {
+  const out = await processChangeModeOutput("Gemini replied with prose and no edits.", undefined, undefined, "prompt-none");
+  assert.match(out, /No edits found/);
+});
diff --git a/test/integration/tool-contract.test.ts b/test/integration/tool-contract.test.ts
new file mode 100644
index 0000000..f9df500
--- /dev/null
+++ b/test/integration/tool-contract.test.ts
@@ -0,0 +1,51 @@
+import { test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+// Drives the registry -> tool boundary for every path that resolves WITHOUT
+// invoking the Gemini CLI: argument validation, and the guard/error branches
+// inside the tools. (The happy path that actually calls Gemini is covered by
+// the e2e suite.) These must never spawn a subprocess.
+import { executeTool } from "../../src/tools/index.js";
+import { clearCache } from "../../src/utils/chunkCache.js";
+
+beforeEach(() => clearCache());
+
+test("executeTool surfaces zod validation as a friendly error", async () => {
+  // ask-gemini requires a non-empty prompt; the error names the offending field.
+  await assert.rejects(() => executeTool("ask-gemini", {}), /Invalid arguments for ask-gemini.*prompt/s);
+});
+
+test("executeTool throws for an unknown tool", async () => {
+  await assert.rejects(() => executeTool("no-such-tool", {}), /Unknown tool/);
+});
+
+test("fetch-chunk via the registry returns a cache-miss message (no spawn)", async () => {
+  const out = await executeTool("fetch-chunk", { cacheKey: "deadbeef", chunkIndex: 1 });
+  assert.match(out, /Cache miss/);
+});
+
+test("fetch-chunk via the registry rejects a malformed cache key (no spawn)", async () => {
+  const out = await executeTool("fetch-chunk", { cacheKey: "not-a-key", chunkIndex: 1 });
+  assert.match(out, /Invalid cacheKey format/);
+});
+
+test("ask-gemini rejects a malformed chunkCacheKey before calling Gemini", async () => {
+  const out = await executeTool("ask-gemini", {
+    prompt: "x",
+    changeMode: true,
+    chunkIndex: 1,
+    chunkCacheKey: "bad!key!",
+  });
+  assert.match(out, /Invalid chunkCacheKey format/);
+});
+
+test("ask-gemini changeMode continuation with a missing cache reports no edits (no spawn)", async () => {
+  // Well-formed key, but nothing cached -> the continuation path returns the
+  // "no edits found" message rather than shelling out to Gemini.
+  const out = await executeTool("ask-gemini", {
+    prompt: "x",
+    changeMode: true,
+    chunkIndex: 1,
+    chunkCacheKey: "deadbeef",
+  });
+  assert.match(out, /No edits found/);
+});
diff --git a/src/backends/agy.test.ts b/test/unit/backends/agy.test.ts
similarity index 92%
rename from src/backends/agy.test.ts
rename to test/unit/backends/agy.test.ts
index 3e9418c..8703b10 100644
--- a/src/backends/agy.test.ts
+++ b/test/unit/backends/agy.test.ts
@@ -1,6 +1,6 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
-import { buildAgyArgs } from "./agy.js";
+import { buildAgyArgs } from "../../../src/backends/agy.js";
 
 test("buildAgyArgs maps prompt, sessions, sandbox, and yolo", () => {
   assert.deepEqual(buildAgyArgs("hi", {}), ["-p", "hi"]);
diff --git a/src/backends/gemini.test.ts b/test/unit/backends/gemini.test.ts
similarity index 98%
rename from src/backends/gemini.test.ts
rename to test/unit/backends/gemini.test.ts
index 1a068f0..325a40b 100644
--- a/src/backends/gemini.test.ts
+++ b/test/unit/backends/gemini.test.ts
@@ -1,6 +1,6 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
-import { resolveApprovalMode, buildGeminiArgs, resolveModel } from "./gemini.js";
+import { resolveApprovalMode, buildGeminiArgs, resolveModel } from "../../../src/backends/gemini.js";
 
 const ENV_KEY = "GEMINI_MCP_APPROVAL_MODE";
 
diff --git a/src/backends/index.test.ts b/test/unit/backends/index.test.ts
similarity index 92%
rename from src/backends/index.test.ts
rename to test/unit/backends/index.test.ts
index e5c3e4f..e853237 100644
--- a/src/backends/index.test.ts
+++ b/test/unit/backends/index.test.ts
@@ -1,6 +1,6 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
-import { getBackend } from "./index.js";
+import { getBackend } from "../../../src/backends/index.js";
 
 test("getBackend defaults to gemini", () => {
   assert.equal(getBackend({}).name, "gemini");
diff --git a/test/unit/tools/brainstorm.test.ts b/test/unit/tools/brainstorm.test.ts
new file mode 100644
index 0000000..6abe47a
--- /dev/null
+++ b/test/unit/tools/brainstorm.test.ts
@@ -0,0 +1,61 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+  buildBrainstormPrompt,
+  getMethodologyInstructions,
+} from "../../../src/tools/brainstorm.tool.js";
+
+test("getMethodologyInstructions returns the requested framework", () => {
+  assert.match(getMethodologyInstructions("scamper"), /SCAMPER/);
+  assert.match(getMethodologyInstructions("scamper"), /Substitute/);
+  assert.match(getMethodologyInstructions("divergent"), /Divergent Thinking/);
+  assert.match(getMethodologyInstructions("design-thinking"), /Empathize/);
+});
+
+test("getMethodologyInstructions falls back to the auto framework for unknown methodologies", () => {
+  const out = getMethodologyInstructions("not-a-real-methodology");
+  assert.match(out, /AI-Optimized Approach/);
+});
+
+test("getMethodologyInstructions weaves the domain into the auto framework", () => {
+  assert.match(getMethodologyInstructions("auto", "fintech"), /fintech/);
+});
+
+test("buildBrainstormPrompt embeds the challenge, idea count, and chosen framework", () => {
+  const prompt = buildBrainstormPrompt({
+    prompt: "How do we reduce churn?",
+    methodology: "scamper",
+    ideaCount: 7,
+    includeAnalysis: true,
+  });
+  assert.match(prompt, /# BRAINSTORMING SESSION/);
+  assert.ok(prompt.includes("How do we reduce churn?"));
+  assert.match(prompt, /Generate 7 distinct/);
+  assert.match(prompt, /SCAMPER/);
+  assert.match(prompt, /## Analysis Framework/); // analysis requested
+});
+
+test("buildBrainstormPrompt omits the analysis framework when not requested", () => {
+  const prompt = buildBrainstormPrompt({
+    prompt: "ideas",
+    methodology: "divergent",
+    ideaCount: 5,
+    includeAnalysis: false,
+  });
+  assert.doesNotMatch(prompt, /## Analysis Framework/);
+});
+
+test("buildBrainstormPrompt injects optional domain, constraints, and context", () => {
+  const prompt = buildBrainstormPrompt({
+    prompt: "ideas",
+    methodology: "auto",
+    domain: "healthcare",
+    constraints: "HIPAA compliant",
+    existingContext: "prior pilot failed",
+    ideaCount: 3,
+    includeAnalysis: false,
+  });
+  assert.ok(prompt.includes("healthcare"));
+  assert.ok(prompt.includes("HIPAA compliant"));
+  assert.ok(prompt.includes("prior pilot failed"));
+});
diff --git a/test/unit/tools/registry.test.ts b/test/unit/tools/registry.test.ts
new file mode 100644
index 0000000..e15287f
--- /dev/null
+++ b/test/unit/tools/registry.test.ts
@@ -0,0 +1,63 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+// Importing the tools index registers every tool in the shared registry.
+import {
+  getToolDefinitions,
+  getPromptDefinitions,
+  getPromptMessage,
+  toolExists,
+} from "../../../src/tools/index.js";
+
+test("every registered tool exposes a valid JSON-schema definition", () => {
+  const defs = getToolDefinitions();
+  assert.ok(defs.length >= 6); // ask-gemini, ping, Help, brainstorm, fetch-chunk, timeout-test
+  for (const def of defs) {
+    assert.equal(typeof def.name, "string");
+    assert.equal(typeof def.description, "string");
+    assert.equal(def.inputSchema.type, "object");
+    assert.equal(typeof def.inputSchema.properties, "object");
+    assert.ok(Array.isArray(def.inputSchema.required));
+  }
+});
+
+test("ask-gemini requires a prompt; ping's prompt is optional", () => {
+  const defs = getToolDefinitions();
+  const ask = defs.find((d) => d.name === "ask-gemini");
+  const ping = defs.find((d) => d.name === "ping");
+  assert.ok(ask && ping);
+  assert.ok((ask!.inputSchema.properties as any).prompt);
+  assert.ok((ask!.inputSchema.required as string[]).includes("prompt"));
+  assert.ok(!(ping!.inputSchema.required as string[]).includes("prompt"));
+});
+
+test("toolExists reflects the registry", () => {
+  assert.equal(toolExists("ask-gemini"), true);
+  assert.equal(toolExists("fetch-chunk"), true);
+  assert.equal(toolExists("does-not-exist"), false);
+});
+
+test("getPromptDefinitions lists tools that declare a prompt", () => {
+  const prompts = getPromptDefinitions();
+  const names = prompts.map((p) => p.name);
+  assert.ok(names.includes("ask-gemini"));
+  assert.ok(names.includes("brainstorm"));
+  const ask = prompts.find((p) => p.name === "ask-gemini");
+  assert.equal(typeof ask!.description, "string");
+});
+
+test("getPromptMessage formats prompt text, boolean flags, and key/value params", () => {
+  const msg = getPromptMessage("ask-gemini", {
+    prompt: "explain this",
+    model: "gemini-2.5-flash",
+    sandbox: true,
+    changeMode: false, // false values are omitted
+  });
+  assert.match(msg, /^Use the ask-gemini tool: explain this/);
+  assert.ok(msg.includes("(model: gemini-2.5-flash)"));
+  assert.ok(msg.includes("[sandbox]")); // boolean true rendered as a flag
+  assert.ok(!msg.includes("changeMode")); // false omitted
+});
+
+test("getPromptMessage handles a bare tool reference with no params", () => {
+  assert.equal(getPromptMessage("Help", {}), "Use the Help tool");
+});
diff --git a/test/unit/utils/changeModeChunker.test.ts b/test/unit/utils/changeModeChunker.test.ts
new file mode 100644
index 0000000..f21da95
--- /dev/null
+++ b/test/unit/utils/changeModeChunker.test.ts
@@ -0,0 +1,73 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { chunkChangeModeEdits, summarizeChunking } from "../../../src/utils/changeModeChunker.js";
+import type { ChangeModeEdit } from "../../../src/utils/changeModeParser.js";
+
+function edit(filename: string, oldCode = "x", newCode = "y"): ChangeModeEdit {
+  const lines = (s: string) => (s === "" ? 0 : s.split("\n").length);
+  return {
+    filename,
+    oldStartLine: 1,
+    oldEndLine: Math.max(1, lines(oldCode)),
+    oldCode,
+    newStartLine: 1,
+    newEndLine: Math.max(1, lines(newCode)),
+    newCode,
+  };
+}
+
+test("chunkChangeModeEdits returns one empty chunk for no edits", () => {
+  const chunks = chunkChangeModeEdits([]);
+  assert.equal(chunks.length, 1);
+  assert.equal(chunks[0].edits.length, 0);
+  assert.equal(chunks[0].totalChunks, 1);
+  assert.equal(chunks[0].hasMore, false);
+});
+
+test("chunkChangeModeEdits keeps small edits together in a single chunk (default budget)", () => {
+  const chunks = chunkChangeModeEdits([edit("a.ts"), edit("b.ts"), edit("c.ts")]);
+  assert.equal(chunks.length, 1);
+  assert.equal(chunks[0].edits.length, 3);
+  assert.equal(chunks[0].totalChunks, 1);
+  assert.equal(chunks[0].hasMore, false);
+});
+
+test("chunkChangeModeEdits keeps edits to the same file grouped together", () => {
+  // Two edits to the same file fit easily under the default budget.
+  const chunks = chunkChangeModeEdits([edit("same.ts", "aaa"), edit("same.ts", "bbb")]);
+  assert.equal(chunks.length, 1);
+  assert.equal(chunks[0].edits.length, 2);
+});
+
+test("chunkChangeModeEdits splits across chunks when the budget is exceeded", () => {
+  // Each edit is ~260 chars (250 overhead + filename*2 + code). A 300-char budget
+  // forces one edit per chunk across three distinct files.
+  const chunks = chunkChangeModeEdits([edit("a.ts"), edit("b.ts"), edit("c.ts")], 300);
+  assert.equal(chunks.length, 3);
+  assert.deepEqual(
+    chunks.map((c) => c.chunkIndex),
+    [1, 2, 3],
+  );
+  assert.deepEqual(
+    chunks.map((c) => c.hasMore),
+    [true, true, false],
+  );
+  assert.deepEqual(
+    chunks.map((c) => c.totalChunks),
+    [3, 3, 3],
+  );
+});
+
+test("chunkChangeModeEdits splits a single oversized file across chunks", () => {
+  const chunks = chunkChangeModeEdits([edit("big.ts", "aaa"), edit("big.ts", "bbb")], 300);
+  assert.equal(chunks.length, 2);
+  assert.equal(chunks[0].edits[0].filename, "big.ts");
+  assert.equal(chunks[1].edits[0].filename, "big.ts");
+});
+
+test("summarizeChunking reports edit and chunk counts", () => {
+  const chunks = chunkChangeModeEdits([edit("a.ts"), edit("b.ts")], 300);
+  const summary = summarizeChunking(chunks);
+  assert.match(summary, /# edits: 2/);
+  assert.match(summary, /# chunks: 2/);
+});
diff --git a/test/unit/utils/changeModeParser.test.ts b/test/unit/utils/changeModeParser.test.ts
new file mode 100644
index 0000000..da73851
--- /dev/null
+++ b/test/unit/utils/changeModeParser.test.ts
@@ -0,0 +1,89 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+  parseChangeModeOutput,
+  validateChangeModeEdits,
+  type ChangeModeEdit,
+} from "../../../src/utils/changeModeParser.js";
+
+// The markdown fence is built as a plain string so the fixtures can be written
+// as template literals without colliding with the backtick delimiter.
+const FENCE = "```";
+
+function block(file: string, line: number, oldCode: string, newCode: string): string {
+  return [`**FILE: ${file}:${line}**`, FENCE, "OLD:", oldCode, "NEW:", newCode, FENCE].join("\n");
+}
+
+test("parseChangeModeOutput parses a single markdown OLD/NEW block", () => {
+  const out = parseChangeModeOutput(
+    ["Here is the edit:", block("src/a.ts", 10, "const x = 1;", "const x = 2;")].join("\n\n"),
+  );
+  assert.equal(out.length, 1);
+  const e = out[0];
+  assert.equal(e.filename, "src/a.ts");
+  assert.equal(e.oldStartLine, 10);
+  assert.equal(e.oldEndLine, 10); // single line
+  assert.equal(e.oldCode, "const x = 1;");
+  assert.equal(e.newCode, "const x = 2;");
+});
+
+test("parseChangeModeOutput computes end lines from multi-line OLD/NEW content", () => {
+  const out = parseChangeModeOutput(block("src/b.ts", 20, "foo();\nbar();", "baz();"));
+  assert.equal(out.length, 1);
+  const e = out[0];
+  assert.equal(e.oldStartLine, 20);
+  assert.equal(e.oldEndLine, 21); // two old lines: 20..21
+  assert.equal(e.newStartLine, 20);
+  assert.equal(e.newEndLine, 20); // one new line
+  assert.equal(e.oldCode, "foo();\nbar();");
+});
+
+test("parseChangeModeOutput parses multiple blocks in order", () => {
+  const out = parseChangeModeOutput(
+    [block("a.ts", 1, "a", "A"), block("b.ts", 2, "b", "B")].join("\n\n"),
+  );
+  assert.equal(out.length, 2);
+  assert.deepEqual(
+    out.map((e) => e.filename),
+    ["a.ts", "b.ts"],
+  );
+});
+
+test("parseChangeModeOutput returns [] for empty or non-matching input", () => {
+  assert.deepEqual(parseChangeModeOutput(""), []);
+  assert.deepEqual(parseChangeModeOutput("just some prose with no edits"), []);
+});
+
+test("validateChangeModeEdits accepts well-formed edits", () => {
+  const edits: ChangeModeEdit[] = [
+    {
+      filename: "a.ts",
+      oldStartLine: 1,
+      oldEndLine: 1,
+      oldCode: "a",
+      newStartLine: 1,
+      newEndLine: 1,
+      newCode: "A",
+    },
+  ];
+  assert.deepEqual(validateChangeModeEdits(edits), { valid: true, errors: [] });
+});
+
+test("validateChangeModeEdits flags missing filename, inverted ranges, and empty edits", () => {
+  const edits: ChangeModeEdit[] = [
+    {
+      filename: "",
+      oldStartLine: 5,
+      oldEndLine: 1, // inverted
+      oldCode: "",
+      newStartLine: 1,
+      newEndLine: 1,
+      newCode: "", // empty edit
+    },
+  ];
+  const result = validateChangeModeEdits(edits);
+  assert.equal(result.valid, false);
+  assert.ok(result.errors.some((e) => /missing filename/i.test(e)));
+  assert.ok(result.errors.some((e) => /Invalid line range/i.test(e)));
+  assert.ok(result.errors.some((e) => /Empty edit/i.test(e)));
+});
diff --git a/test/unit/utils/changeModeTranslator.test.ts b/test/unit/utils/changeModeTranslator.test.ts
new file mode 100644
index 0000000..d53f0e7
--- /dev/null
+++ b/test/unit/utils/changeModeTranslator.test.ts
@@ -0,0 +1,72 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+  formatChangeModeResponse,
+  summarizeChangeModeEdits,
+} from "../../../src/utils/changeModeTranslator.js";
+import type { ChangeModeEdit } from "../../../src/utils/changeModeParser.js";
+
+function edit(filename: string, oldCode: string, newCode: string): ChangeModeEdit {
+  return {
+    filename,
+    oldStartLine: 1,
+    oldEndLine: 1,
+    oldCode,
+    newStartLine: 1,
+    newEndLine: 1,
+    newCode,
+  };
+}
+
+test("formatChangeModeResponse renders a single-chunk response with the exact code", () => {
+  const out = formatChangeModeResponse([edit("a.ts", "OLD_CODE", "NEW_CODE")]);
+  assert.match(out, /CHANGEMODE OUTPUT/);
+  assert.match(out, /1 modification\b/); // singular
+  assert.match(out, /Replace this exact text:/);
+  assert.ok(out.includes("OLD_CODE"));
+  assert.ok(out.includes("NEW_CODE"));
+  assert.match(out, /Apply these edits in order/);
+});
+
+test("formatChangeModeResponse pluralizes the modification count", () => {
+  const out = formatChangeModeResponse([edit("a.ts", "1", "1"), edit("b.ts", "2", "2")]);
+  assert.match(out, /2 modifications\b/);
+});
+
+test("formatChangeModeResponse emits chunk headers and a fetch-chunk continuation", () => {
+  const out = formatChangeModeResponse([edit("a.ts", "x", "y")], {
+    current: 1,
+    total: 3,
+    cacheKey: "abcd1234",
+  });
+  assert.match(out, /Chunk 1 of 3/);
+  assert.ok(out.includes('fetch-chunk cacheKey="abcd1234" chunkIndex=2'));
+  assert.match(out, /2 more chunks/);
+});
+
+test("formatChangeModeResponse omits the continuation on the final chunk", () => {
+  const out = formatChangeModeResponse([edit("a.ts", "x", "y")], {
+    current: 3,
+    total: 3,
+    cacheKey: "abcd1234",
+  });
+  assert.match(out, /Chunk 3 of 3/);
+  assert.doesNotMatch(out, /fetch-chunk cacheKey/);
+});
+
+test("summarizeChangeModeEdits counts edits and affected files", () => {
+  const summary = summarizeChangeModeEdits([
+    edit("a.ts", "1", "1"),
+    edit("a.ts", "2", "2"),
+    edit("b.ts", "3", "3"),
+  ]);
+  assert.match(summary, /Total edits: 3/);
+  assert.match(summary, /Files affected: 2/);
+  assert.match(summary, /- a\.ts: 2 edits/);
+  assert.match(summary, /- b\.ts: 1 edit\b/);
+});
+
+test("summarizeChangeModeEdits marks the partial (multi-chunk) view", () => {
+  const summary = summarizeChangeModeEdits([edit("a.ts", "1", "1")], true);
+  assert.match(summary, /across all chunks/);
+});
diff --git a/test/unit/utils/chunkCache.test.ts b/test/unit/utils/chunkCache.test.ts
new file mode 100644
index 0000000..f3e2ded
--- /dev/null
+++ b/test/unit/utils/chunkCache.test.ts
@@ -0,0 +1,96 @@
+import { test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import {
+  cacheChunks,
+  getChunks,
+  clearCache,
+  getCacheStats,
+} from "../../../src/utils/chunkCache.js";
+import type { EditChunk } from "../../../src/utils/changeModeChunker.js";
+
+// chunkCache persists to a shared scratch dir under os.tmpdir() (10-min TTL).
+// clearCache() isolates each test; it only touches that scratch dir.
+beforeEach(() => clearCache());
+afterEach(() => clearCache());
+
+function chunk(n: number): EditChunk {
+  return {
+    edits: [
+      {
+        filename: `file${n}.ts`,
+        oldStartLine: 1,
+        oldEndLine: 1,
+        oldCode: `old${n}`,
+        newStartLine: 1,
+        newEndLine: 1,
+        newCode: `new${n}`,
+      },
+    ],
+    chunkIndex: n,
+    totalChunks: 1,
+    hasMore: false,
+    estimatedChars: 100,
+  };
+}
+
+test("cacheChunks returns an 8-char hex key and getChunks round-trips the data", () => {
+  const key = cacheChunks("a prompt", [chunk(1), chunk(2)]);
+  assert.match(key, /^[a-f0-9]{8}$/);
+
+  const got = getChunks(key);
+  assert.ok(got);
+  assert.equal(got!.length, 2);
+  assert.equal(got![0].edits[0].newCode, "new1");
+});
+
+test("cacheChunks is deterministic for the same prompt", () => {
+  const a = cacheChunks("identical", [chunk(1)]);
+  const b = cacheChunks("identical", [chunk(1)]);
+  assert.equal(a, b);
+});
+
+test("getChunks rejects malformed keys (path traversal / wrong shape)", () => {
+  assert.equal(getChunks("../../etc/passwd"), null);
+  assert.equal(getChunks("ZZZZZZZZ"), null); // not hex
+  assert.equal(getChunks("abc"), null); // too short
+  assert.equal(getChunks("deadbeef99"), null); // too long
+});
+
+test("getChunks returns null for a valid-format key with no cached file", () => {
+  assert.equal(getChunks("00000000"), null);
+});
+
+test("getChunks expires entries past the TTL and deletes the file", () => {
+  const key = cacheChunks("expire me", [chunk(1)]);
+  const { cacheDir } = getCacheStats();
+  const file = path.join(cacheDir, `${key}.json`);
+
+  // Backdate the stored timestamp beyond the 10-minute TTL.
+  const data = JSON.parse(fs.readFileSync(file, "utf-8"));
+  data.timestamp = Date.now() - 11 * 60 * 1000;
+  fs.writeFileSync(file, JSON.stringify(data));
+
+  assert.equal(getChunks(key), null);
+  assert.equal(fs.existsSync(file), false); // expired file is removed
+});
+
+test("the cache enforces a maximum file count (FIFO eviction)", () => {
+  const { maxSize } = getCacheStats();
+  for (let i = 0; i < maxSize + 5; i++) {
+    cacheChunks(`prompt-${i}`, [chunk(i)]);
+  }
+  assert.equal(getCacheStats().size, maxSize);
+});
+
+test("getCacheStats reports the TTL and max size; clearCache empties the dir", () => {
+  const stats = getCacheStats();
+  assert.equal(stats.ttl, 10 * 60 * 1000);
+  assert.equal(stats.maxSize, 50);
+
+  cacheChunks("something", [chunk(1)]);
+  assert.ok(getCacheStats().size >= 1);
+  clearCache();
+  assert.equal(getCacheStats().size, 0);
+});
diff --git a/src/utils/commandExecutor.test.ts b/test/unit/utils/commandExecutor.test.ts
similarity index 97%
rename from src/utils/commandExecutor.test.ts
rename to test/unit/utils/commandExecutor.test.ts
index 5f510ca..35b3a1c 100644
--- a/src/utils/commandExecutor.test.ts
+++ b/test/unit/utils/commandExecutor.test.ts
@@ -4,7 +4,7 @@ import {
   quoteForCmd,
   resolveCommandForExecution,
   buildEnoentErrorMessage,
-} from "./commandExecutor.js";
+} from "../../../src/utils/commandExecutor.js";
 
 test("quoteForCmd wraps in double quotes and doubles embedded quotes", () => {
   assert.equal(quoteForCmd("hello"), '"hello"');
diff --git a/src/utils/envFile.test.ts b/test/unit/utils/envFile.test.ts
similarity index 95%
rename from src/utils/envFile.test.ts
rename to test/unit/utils/envFile.test.ts
index 03d6e68..62e3aca 100644
--- a/src/utils/envFile.test.ts
+++ b/test/unit/utils/envFile.test.ts
@@ -1,6 +1,6 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
-import { parseEnv } from "./envFile.js";
+import { parseEnv } from "../../../src/utils/envFile.js";
 
 test("parseEnv: basic KEY=VALUE pairs", () => {
   const r = parseEnv("GEMINI_MODEL=gemini-2.5-pro\nGEMINI_MCP_TIMEOUT_MS=1800000");
diff --git a/src/utils/geminiExecutor.test.ts b/test/unit/utils/geminiExecutor.test.ts
similarity index 91%
rename from src/utils/geminiExecutor.test.ts
rename to test/unit/utils/geminiExecutor.test.ts
index 2fd922c..dc52663 100644
--- a/src/utils/geminiExecutor.test.ts
+++ b/test/unit/utils/geminiExecutor.test.ts
@@ -1,6 +1,6 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
-import { assertSafeFileReferences } from "./geminiExecutor.js";
+import { assertSafeFileReferences } from "../../../src/utils/geminiExecutor.js";
 
 const root = process.cwd();
 
diff --git a/src/utils/timeoutManager.test.ts b/test/unit/utils/timeoutManager.test.ts
similarity index 90%
rename from src/utils/timeoutManager.test.ts
rename to test/unit/utils/timeoutManager.test.ts
index 3f565f7..418725d 100644
--- a/src/utils/timeoutManager.test.ts
+++ b/test/unit/utils/timeoutManager.test.ts
@@ -1,6 +1,6 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
-import { resolveTimeoutMs, RECOMMENDED_TIMEOUT_MS } from "./timeoutManager.js";
+import { resolveTimeoutMs, RECOMMENDED_TIMEOUT_MS } from "../../../src/utils/timeoutManager.js";
 
 test("resolveTimeoutMs: disabled by default when unset or blank (1.1.6 parity)", () => {
   assert.equal(resolveTimeoutMs({}), 0);
diff --git a/tsconfig.test.json b/tsconfig.test.json
new file mode 100644
index 0000000..15d6a9c
--- /dev/null
+++ b/tsconfig.test.json
@@ -0,0 +1,8 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "noEmit": true,
+    "rootDir": "."
+  },
+  "include": ["src/**/*", "test/**/*"]
+}