From 855730e85033328343dfd294740d66995eb465de Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 13:55:52 -0700
Subject: [PATCH 1/8] =?UTF-8?q?feat:=20v1.2.0=20=E2=80=94=20pluggable=20ba?=
=?UTF-8?q?ckends,=20approval=20mode,=20native=20sessions,=20Windows=20rel?=
=?UTF-8?q?iability,=20timeouts,=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A feature release for the 1.2.0 line, on top of the 1.1.6 security patch. Hardens
cross-platform execution, adds an opt-in safety control and native multi-turn
sessions, makes the CLI backend pluggable ahead of the Gemini CLI retirement
(2026-06-18 -> Antigravity agy), and adds a real test suite.
- Backend abstraction (src/backends/): the Gemini CLI stays the default; add an
experimental Antigravity CLI (agy) backend behind GEMINI_MCP_BACKEND, with a
transcript-file fallback for agy's empty-stdout -p bug (Flash-only).
- Opt-in approval mode: approvalMode arg + GEMINI_MCP_APPROVAL_MODE env forward
gemini --approval-mode. Not forced by default — defaulting to 'plan' turns
headless gemini into an autonomous planner that breaks plain Q&A.
- Native multi-turn sessions: sessionId/resume forward gemini --session-id/--resume
and the active session id is surfaced in the response.
- Windows executable resolution: GEMINI_CLI_PATH, then 'where gemini' preferring
the .cmd shim; plus platform-aware ENOENT guidance.
- Per-command timeout (SIGTERM -> SIGKILL), GEMINI_MCP_TIMEOUT_MS (default 30m,
0 disables); implements the previously-empty timeoutManager.
- Fix Help tool: 'gemini --help' (was '-help', mis-parsed by yargs as -h -e -l -p).
- Read server version from package.json at runtime (was hardcoded, stale at 1.1.4);
engines >=18; prepare script for Git-checkout installs.
- Complex prompts (changeMode / @file) are sent on stdin instead of -p; windowsHide
suppresses the popup console window on Windows.
- node:test suite + tsconfig.build.json so tests are type-checked but not shipped in dist.
---
CHANGELOG.md | 23 ++++-
SECURITY-REPORT-2026-05-28.md | 92 +++++++++++++++++
package.json | 9 +-
scripts/run-tests.mjs | 33 ++++++
src/backends/agy.test.ts | 26 +++++
src/backends/agy.ts | 138 ++++++++++++++++++++++++++
src/backends/gemini.test.ts | 61 ++++++++++++
src/backends/gemini.ts | 87 ++++++++++++++++
src/backends/index.test.ts | 16 +++
src/backends/index.ts | 26 +++++
src/backends/types.ts | 29 ++++++
src/constants.ts | 31 +++++-
src/index.ts | 9 +-
src/tools/ask-gemini.tool.ts | 39 +++++---
src/tools/brainstorm.tool.ts | 9 +-
src/tools/simple-tools.ts | 5 +-
src/utils/commandExecutor.test.ts | 41 ++++++++
src/utils/commandExecutor.ts | 160 ++++++++++++++++++++++++------
src/utils/geminiExecutor.test.ts | 17 ++++
src/utils/geminiExecutor.ts | 87 ++++++----------
src/utils/timeoutManager.test.ts | 19 ++++
src/utils/timeoutManager.ts | 20 ++++
tsconfig.build.json | 4 +
23 files changed, 871 insertions(+), 110 deletions(-)
create mode 100644 SECURITY-REPORT-2026-05-28.md
create mode 100644 scripts/run-tests.mjs
create mode 100644 src/backends/agy.test.ts
create mode 100644 src/backends/agy.ts
create mode 100644 src/backends/gemini.test.ts
create mode 100644 src/backends/gemini.ts
create mode 100644 src/backends/index.test.ts
create mode 100644 src/backends/index.ts
create mode 100644 src/backends/types.ts
create mode 100644 src/utils/commandExecutor.test.ts
create mode 100644 src/utils/geminiExecutor.test.ts
create mode 100644 src/utils/timeoutManager.test.ts
create mode 100644 tsconfig.build.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3739f96..6d7ae52 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,28 @@
# Changelog
+## [1.2.0] - 2026-05-30
+First feature release after the 1.1.6 security patch. Hardens cross-platform execution, adds an opt-in safety control and native multi-turn sessions, makes the CLI backend pluggable (ahead of Gemini CLI's retirement), and adds a real test suite.
+
+### Added
+- **Approval mode** — optional `approvalMode` argument on `ask-gemini`/`brainstorm` (and `GEMINI_MCP_APPROVAL_MODE` env), forwarding Gemini's `--approval-mode` (`default` / `auto_edit` / `yolo` / `plan`). Opt-in: when unset, behaviour is unchanged. Use `yolo` / `auto_edit` with `sandbox` to let Gemini run or edit; `plan` runs Gemini as an autonomous read-only planner.
+- **Native multi-turn sessions** — `sessionId` and `resume` arguments forward Gemini's `--session-id` / `--resume`; the active session id is surfaced in the response so a follow-up call can continue the conversation. Builds on #50; uses the CLI's own sessions rather than local transcript storage.
+- **Pluggable backends** — the executor is now backend-agnostic. The Gemini CLI stays the default; set `GEMINI_MCP_BACKEND=agy` to use the **experimental** Antigravity CLI (`agy`) backend, ahead of Gemini CLI's 2026-06-18 retirement for free/Pro/Ultra tiers. (agy print-mode is Flash-only, and its reply is recovered from agy's transcript files to work around the upstream `agy -p` empty-stdout bug.)
+- **Per-command timeout** — a hung CLI call is now terminated (SIGTERM → SIGKILL). Configurable via `GEMINI_MCP_TIMEOUT_MS` (default 30 minutes; `0` disables).
+- **Windows executable resolution** — honours `GEMINI_CLI_PATH`, otherwise resolves the real `gemini` shim via `where` (preferring `.cmd`), fixing "command not found" when the MCP server doesn't inherit your shell's PATH.
+- **Test suite** — `node:test` coverage for the `@file` security guard, Windows quoting/resolution, approval-mode and session argument building, backend selection, and timeout parsing (`npm test`).
+
+### Changed
+- `engines.node` raised to `>=18`.
+- The server version is now read from `package.json` at runtime, instead of a hardcoded string that had drifted to `1.1.4`.
+- Installing from a Git checkout now builds automatically via a `prepare` script.
+
+### Fixed
+- The `Help` tool now invokes `gemini --help` instead of `-help`, which yargs mis-parsed as `-h -e -l -p`.
+- Clearer, platform-aware guidance when the executable is not found (ENOENT), including the `GEMINI_CLI_PATH` hint.
+- Windows robustness: complex prompts (`changeMode` / `@file`) are sent to the Gemini CLI on **stdin** instead of the `-p` flag, sidestepping cmd.exe argument parsing and the OS command-line length limit; added `windowsHide` to suppress the popup console window. (#27, #77)
+
## [1.1.6] - 2026-05-30
-_Emergency security patch — the CVE-2026-0755 fix only, ahead of the larger 1.2.0 release._
+_Emergency security patch — the CVE-2026-0755 fix only, ahead of this 1.2.0 release._
- Security fix: OS command-injection / `@file` exfiltration via prompt quoting in `geminiExecutor.ts` (CVE-2026-0755, CWE-78). Fixes #73 (and the literal-quote corruption in #66).
- Removed the broken double-quote wrapping from both the primary and fallback paths. With `spawn` running `shell: false`, those quotes were passed as literal characters — they provided no protection and corrupted `@file` references. Windows `.cmd` argument quoting is hardened separately (see below).
- Added `assertSafeFileReferences()`, which rejects any `@file` reference that resolves outside the project working directory (absolute paths, `~` home references, and `../` traversal), closing the arbitrary-file-read exfiltration vector while preserving legitimate in-project `@file` usage.
diff --git a/SECURITY-REPORT-2026-05-28.md b/SECURITY-REPORT-2026-05-28.md
new file mode 100644
index 0000000..8af60be
--- /dev/null
+++ b/SECURITY-REPORT-2026-05-28.md
@@ -0,0 +1,92 @@
+# Security Report — gemini-mcp-tool
+
+- **Date:** 2026-05-28
+- **Repository:** `jamubc/gemini-mcp-tool`
+- **Branch reviewed:** `security/cve-2026-0755` (PR #75)
+- **Scope:** All hand-written source under `src/`, plus declared npm dependencies.
+- **Method:** Manual code review + sink analysis (`child_process` / `fs` / network / `eval`), `npm audit` with runtime-vs-dev tree attribution, and a cross-check of open GitHub issues.
+
+> No security issue was filed today (2026-05-28). The most recent security report is **#73 (CVE-2026-0755)**, which is fixed on this branch (PR #75).
+
+---
+
+## Executive summary
+
+| Area | Critical | High | Moderate | Low / Info |
+|--------------|:--------:|:----:|:--------:|:----------:|
+| Code | 1 (fixed)| 0 | 0 | 4 |
+| Dependencies | 0 | 8* | 15 | 2 |
+
+\* Only **2 of the 8 dependency HIGHs reach the published/runtime tree** (`@modelcontextprotocol/sdk`, and `tmp` via the unused `inquirer` dep). The other 6 HIGHs live exclusively in the docs/build toolchain (`vitepress`, `mermaid`, `archiver`) and are never installed for end users.
+
+---
+
+## Code findings
+
+### C1 — CVE-2026-0755: OS command-injection / `@file` exfiltration — **Critical — FIXED (PR #75)**
+`geminiExecutor.ts` wrapped any prompt containing `@` in literal `"` before passing it to `spawn` (`shell: false`), which injected literal quote characters and corrupted `@file` references, while leaving an arbitrary-file-read vector through the Gemini CLI's `@file` parser.
+
+**Fix (this branch):** removed the broken quoting from the primary and fallback paths; added `assertSafeFileReferences()` which rejects `@file` references that resolve outside the project working directory (absolute, `~`, and `../` traversal). The guard runs on the fully-processed prompt, so it also protects the `brainstorm` and `changeMode` code paths.
+
+### C2 — Windows `cmd.exe` variable expansion in prompts — **Low (Windows-only)**
+`commandExecutor.ts` uses `shell: true` on Windows and wraps whitespace/quote args in `"..."` (escaping `"`→`""`). `cmd.exe` still expands `%VAR%` **inside** double quotes, so a prompt containing e.g. `%USERNAME%` / `%PATH%` is substituted before reaching `gemini`. This is not a command-execution break-out, but it is a correctness + minor information-substitution issue. Unix is unaffected (`shell: false`).
+**Recommendation:** adopt the issue #62 approach — spawn `process.execPath` with the resolved `gemini.js` path and `shell: false` on Windows too — eliminating the shell (and the quoting fragility) entirely.
+
+### C3 — Verbose logging of full tool arguments / prompts — **Low / Informational**
+`logger.ts` logs raw args via `JSON.stringify` on every invocation (`Logger.toolInvocation`), and `Logger.debug` is wired to `console.warn`, so prompt bodies are written to stderr **regardless of any debug flag**. Prompts may contain pasted file contents or secrets; on shared hosts or captured MCP logs this is a disclosure risk.
+**Recommendation:** gate full-argument logging behind an explicit debug env var; avoid logging full prompt bodies at the default level.
+
+### C4 — Raw `error.message` returned to client — **Informational**
+`index.ts` returns `Error executing ${tool}: ${error.message}`. CLI/`fs` errors may embed absolute local paths. Low impact for a local stdio server; noted for completeness.
+
+### C5 — Unbounded lazy regex over model output — **Informational**
+`changeModeParser.ts` uses `[\s\S]*?` groups. Input is Gemini's *response* (model-controlled, not direct attacker network input), so ReDoS exposure is low. Acceptable today; revisit if these inputs ever become untrusted.
+
+### Positives observed
+- `commandExecutor.ts` uses `spawn` with `shell: false` on Unix and an args array — no shell injection.
+- #72 path-traversal hardening on `cacheKey` is solid: format regex (`/^[a-f0-9]{8}$/`) + `path.resolve` containment + removal of the silent `unlink` primitive.
+- All tool arguments are validated through `zod` before execution.
+- The server is **stdio-only** — there is no network listener by default.
+
+---
+
+## Dependency findings
+
+`npm audit`: **25 vulnerabilities (8 high, 15 moderate, 2 low)**. The published package ships only `dist/`, but its `dependencies` are installed transitively for every end user, so the runtime-vs-dev split below is what actually matters.
+
+### D1 — `@modelcontextprotocol/sdk@0.5.0` — **High — runtime, USED**
+- Advisories: ReDoS (high); "DNS-rebinding protection not enabled by default" (high).
+- **DNS rebinding does not apply** here: this server uses `StdioServerTransport`, not the Streamable-HTTP transport the advisory concerns.
+- ReDoS applies to SDK message handling; with a trusted local stdio client, exposure is limited but real.
+- `0.5.0` is far behind the current `1.x` line. **Upgrading is recommended but is a breaking API change** and will require edits to `index.ts`.
+
+### D2 — `inquirer@9.3.7` → `external-editor` → `tmp@0.0.33` — **High path traversal — runtime, UNUSED**
+- `inquirer`, `ai`, `chalk`, `d3-shape`, and `prismjs` are declared as runtime `dependencies` but are **not imported anywhere in `src/`**. They are still installed for every user, and `inquirer` drags in the HIGH `tmp` path-traversal advisory.
+- **Recommendation (high value, low effort):** remove these unused runtime deps. This eliminates the only runtime-tree HIGH besides the SDK and significantly shrinks install/attack surface. (Note: `package.json` references a `contribute` script at `src/contribute.ts` which does not exist in the tree — confirm nothing relies on these before removal.)
+
+### D3 — Docs/build toolchain HIGHs — **Not shipped, lower priority**
+All remaining HIGHs are confined to `devDependencies` and are not installed for end users or used by the running server:
+- `archiver` → `glob`, `minimatch`, `lodash`
+- `vitepress` → `rollup`, `vite`, `esbuild`, `preact`
+- `mermaid` → `dompurify`
+
+Patch opportunistically with `npm audit fix`, but these do not affect deployed MCP servers.
+
+---
+
+## Additional observations (full source-tree read)
+
+These do **not** affect the published npm package or the running MCP server (the docs site is built/deployed separately to GitHub Pages), but are noted for completeness:
+
+- **Docs site loads a third-party ad script.** `docs/.vitepress/theme/components/AdBanner.vue` injects `//cdn.carbonads.com/carbon.js` into the page `
`. It is currently an inert placeholder (`serve=YOUR_CARBON_ID`), but any third-party script on the docs origin is a supply-chain/privacy consideration. *(Informational — docs site only.)*
+- **`v-html` in `CodeBlock.vue`.** Renders Prism-highlighted output via `v-html`. Input is build-time-authored doc content and Prism escapes HTML, so this is not an exploitable XSS today. *(Informational — docs site only.)*
+- **Dead / duplicate files.** `src/utils/timeoutManager.ts` is effectively empty (1 line) and imported nowhere; `src/scripts/deploy-wiki.sh` is a byte-for-byte duplicate of `scripts/deploy-wiki.sh`. Housekeeping, not security — safe to remove.
+
+## Prioritized recommendations
+
+1. **Merge PR #75** — CVE-2026-0755 fix. *(Critical — done, pending merge.)*
+2. **Remove unused runtime deps** (`ai`, `chalk`, `d3-shape`, `inquirer`, `prismjs`) — removes the `tmp` HIGH from the shipped tree. *(High, low effort.)*
+3. **Plan `@modelcontextprotocol/sdk` 0.5 → 1.x upgrade.** *(High, breaking — needs code changes.)*
+4. **Gate verbose prompt/argument logging** behind a debug flag. *(Low.)*
+5. **Windows:** drop `shell: true` in favor of the node + `gemini.js` approach (issue #62) to remove `%VAR%` expansion and quoting fragility. *(Low.)*
+6. **`npm audit fix`** for the docs/build toolchain. *(Low.)*
diff --git a/package.json b/package.json
index cb1f096..6e5aa3c 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "gemini-mcp-tool",
- "version": "1.1.6",
+ "version": "1.2.0",
"description": "MCP server for Gemini CLI integration",
"type": "module",
"main": "dist/index.js",
@@ -8,10 +8,11 @@
"gemini-mcp": "dist/index.js"
},
"scripts": {
- "build": "tsc",
+ "build": "tsc -p tsconfig.build.json",
+ "prepare": "npm run build",
"start": "node dist/index.js",
"dev": "tsc && node dist/index.js",
- "test": "echo \"No tests yet\" && exit 0",
+ "test": "node scripts/run-tests.mjs",
"lint": "tsc --noEmit",
"contribute": "tsx src/contribute.ts",
"prepublishOnly": "echo '⚠️ Remember to test locally first!' && npm run build",
@@ -38,7 +39,7 @@
},
"homepage": "https://github.com/jamubc/gemini-mcp-tool#readme",
"engines": {
- "node": ">=16.0.0"
+ "node": ">=18.0.0"
},
"files": [
"dist/",
diff --git a/scripts/run-tests.mjs b/scripts/run-tests.mjs
new file mode 100644
index 0000000..5ba7268
--- /dev/null
+++ b/scripts/run-tests.mjs
@@ -0,0 +1,33 @@
+#!/usr/bin/env node
+// Discover and run every *.test.ts under src/ with the built-in node:test
+// runner, using the tsx loader so the TypeScript sources run directly.
+import { spawnSync } from "node:child_process";
+import { readdirSync, statSync } from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = path.dirname(fileURLToPath(import.meta.url));
+const srcDir = path.join(scriptDir, "..", "src");
+
+function findTests(dir) {
+ const found = [];
+ for (const entry of readdirSync(dir)) {
+ const full = path.join(dir, entry);
+ if (statSync(full).isDirectory()) found.push(...findTests(full));
+ else if (entry.endsWith(".test.ts")) found.push(full);
+ }
+ return found;
+}
+
+const tests = findTests(srcDir);
+if (tests.length === 0) {
+ console.log("No test files found.");
+ process.exit(0);
+}
+
+const result = spawnSync(
+ process.execPath,
+ ["--import", "tsx", "--test", ...tests],
+ { stdio: "inherit" },
+);
+process.exit(result.status ?? 1);
diff --git a/src/backends/agy.test.ts b/src/backends/agy.test.ts
new file mode 100644
index 0000000..3e9418c
--- /dev/null
+++ b/src/backends/agy.test.ts
@@ -0,0 +1,26 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { buildAgyArgs } from "./agy.js";
+
+test("buildAgyArgs maps prompt, sessions, sandbox, and yolo", () => {
+ assert.deepEqual(buildAgyArgs("hi", {}), ["-p", "hi"]);
+ assert.deepEqual(buildAgyArgs("hi", { resume: "latest" }), ["--continue", "-p", "hi"]);
+ assert.deepEqual(buildAgyArgs("hi", { resume: "conv-1" }), [
+ "--conversation",
+ "conv-1",
+ "-p",
+ "hi",
+ ]);
+ assert.deepEqual(buildAgyArgs("hi", { sessionId: "conv-2" }), [
+ "--conversation",
+ "conv-2",
+ "-p",
+ "hi",
+ ]);
+ assert.deepEqual(buildAgyArgs("hi", { sandbox: true, approvalMode: "yolo" }), [
+ "--sandbox",
+ "--dangerously-skip-permissions",
+ "-p",
+ "hi",
+ ]);
+});
diff --git a/src/backends/agy.ts b/src/backends/agy.ts
new file mode 100644
index 0000000..f0c1d26
--- /dev/null
+++ b/src/backends/agy.ts
@@ -0,0 +1,138 @@
+import { readFileSync } from "fs";
+import os from "os";
+import path from "path";
+import { Logger } from "../utils/logger.js";
+import { CLI, APPROVAL_MODES } from "../constants.js";
+import { executeCommand } from "../utils/commandExecutor.js";
+import type { Backend, BackendRunOptions } from "./types.js";
+
+/**
+ * EXPERIMENTAL Antigravity CLI (`agy`) backend — opt in with GEMINI_MCP_BACKEND=agy.
+ *
+ * agy is gemini-cli's successor (Gemini CLI is retired 2026-06-18 for free/Pro/
+ * Ultra tiers). Two caveats drive this implementation:
+ * 1. Print-mode (`agy -p`) is broken in 1.0.x — it returns exit 0 but writes
+ * nothing to stdout. We therefore recover the reply from agy's own transcript
+ * on disk when stdout is empty (matching the community MCP bridge).
+ * 2. Print-mode is hardcoded to Gemini 3.5 Flash; `model` is ignored.
+ */
+
+const AGY_BASE = path.join(os.homedir(), ".gemini", "antigravity-cli");
+const LAST_CONVERSATIONS = path.join(AGY_BASE, "cache", "last_conversations.json");
+const transcriptPath = (id: string) =>
+ path.join(AGY_BASE, "brain", id, ".system_generated", "logs", "transcript.jsonl");
+
+interface TranscriptEntry {
+ source?: string;
+ type?: string;
+ status?: string;
+ content?: string;
+}
+
+/** Map the current workspace directory to its most recent agy conversation id. */
+function conversationIdForCwd(cwd: string): string | undefined {
+ try {
+ const map = JSON.parse(readFileSync(LAST_CONVERSATIONS, "utf8")) as Record;
+ return map[cwd] ?? map[path.resolve(cwd)];
+ } catch (e) {
+ Logger.warn(`agy: could not read last_conversations.json: ${(e as Error).message}`);
+ return undefined;
+ }
+}
+
+/** Read the model's reply(s) for a conversation from the transcript on disk. */
+export function readTranscriptResponse(id: string): string {
+ let lines: string[];
+ try {
+ lines = readFileSync(transcriptPath(id), "utf8").split(/\r?\n/).filter(Boolean);
+ } catch (e) {
+ throw new Error(
+ `agy: response transcript not found for conversation ${id}: ${(e as Error).message}`,
+ );
+ }
+
+ const entries: TranscriptEntry[] = [];
+ for (const line of lines) {
+ try {
+ entries.push(JSON.parse(line) as TranscriptEntry);
+ } catch {
+ /* skip malformed lines */
+ }
+ }
+
+ // Take the model planner responses that follow the last user input.
+ let lastUserIdx = -1;
+ for (let i = entries.length - 1; i >= 0; i--) {
+ if (entries[i].type === "USER_INPUT") {
+ lastUserIdx = i;
+ break;
+ }
+ }
+ const replies = entries
+ .slice(lastUserIdx + 1)
+ .filter(
+ (e) =>
+ e.source === "MODEL" &&
+ e.type === "PLANNER_RESPONSE" &&
+ e.status === "DONE" &&
+ typeof e.content === "string",
+ )
+ .map((e) => e.content as string);
+
+ const text = replies.join("\n\n").trim();
+ if (!text) {
+ throw new Error(`agy: no model response found in transcript for conversation ${id}`);
+ }
+ return text;
+}
+
+export function buildAgyArgs(prompt: string, opts: BackendRunOptions): string[] {
+ const args: string[] = [];
+ // Sessions: --continue resumes the most recent; --conversation a specific one.
+ if (opts.resume) {
+ if (opts.resume === "latest") args.push("--continue");
+ else args.push("--conversation", opts.resume);
+ } else if (opts.sessionId) {
+ args.push("--conversation", opts.sessionId);
+ }
+ if (opts.sandbox) args.push("--sandbox");
+ // agy has no graded approval modes; only "skip all prompts" maps cleanly.
+ if (opts.approvalMode === APPROVAL_MODES.YOLO) args.push("--dangerously-skip-permissions");
+ args.push("-p", prompt);
+ return args;
+}
+
+// Serialize agy calls: each run rewrites last_conversations.json, so concurrent
+// runs would read each other's conversation ids back.
+let agyQueue: Promise = Promise.resolve();
+
+export const agyBackend: Backend = {
+ name: "agy",
+ supportsModelSelection: false, // print-mode is hardcoded to Gemini 3.5 Flash
+ run(prompt, opts) {
+ const task = agyQueue.then(async () => {
+ Logger.warn(
+ "[experimental] agy backend: print-mode is Flash-only and recovers output from transcript files.",
+ );
+ const cwd = process.cwd();
+ const args = buildAgyArgs(prompt, opts);
+ const stdout = await executeCommand(CLI.COMMANDS.AGY, args, opts.onProgress);
+ if (stdout && stdout.trim()) return stdout.trim(); // future agy may fix -p stdout
+
+ const id = conversationIdForCwd(cwd);
+ if (!id) {
+ throw new Error(
+ `agy: produced no stdout and no conversation id was found for ${cwd}. ` +
+ "Run `agy -i` once to authenticate, then retry.",
+ );
+ }
+ return readTranscriptResponse(id);
+ });
+ // Keep the chain alive regardless of this call's outcome.
+ agyQueue = task.then(
+ () => undefined,
+ () => undefined,
+ );
+ return task;
+ },
+};
diff --git a/src/backends/gemini.test.ts b/src/backends/gemini.test.ts
new file mode 100644
index 0000000..75749a2
--- /dev/null
+++ b/src/backends/gemini.test.ts
@@ -0,0 +1,61 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { resolveApprovalMode, buildGeminiArgs } from "./gemini.js";
+
+const ENV_KEY = "GEMINI_MCP_APPROVAL_MODE";
+
+function withEnv(value: string | undefined, fn: () => void): void {
+ const prev = process.env[ENV_KEY];
+ if (value === undefined) delete process.env[ENV_KEY];
+ else process.env[ENV_KEY] = value;
+ try {
+ fn();
+ } finally {
+ if (prev === undefined) delete process.env[ENV_KEY];
+ else process.env[ENV_KEY] = prev;
+ }
+}
+
+test("resolveApprovalMode is opt-in (undefined unless set) and rejects unknown values", () => {
+ withEnv(undefined, () => {
+ assert.equal(resolveApprovalMode(), undefined);
+ assert.equal(resolveApprovalMode("bogus"), undefined);
+ assert.equal(resolveApprovalMode("yolo"), "yolo");
+ assert.equal(resolveApprovalMode("plan"), "plan");
+ });
+});
+
+test("resolveApprovalMode reads the env var, but the arg overrides it", () => {
+ withEnv("auto_edit", () => {
+ assert.equal(resolveApprovalMode(), "auto_edit");
+ assert.equal(resolveApprovalMode("plan"), "plan");
+ });
+});
+
+test("buildGeminiArgs forces no approval mode by default", () => {
+ withEnv(undefined, () => {
+ assert.deepEqual(buildGeminiArgs("gemini-2.5-flash", { sandbox: true }), [
+ "-m",
+ "gemini-2.5-flash",
+ "-s",
+ ]);
+ assert.deepEqual(buildGeminiArgs(undefined, { resume: "abc" }), ["--resume", "abc"]);
+ assert.deepEqual(buildGeminiArgs(undefined, { sessionId: "xyz" }), [
+ "--session-id",
+ "xyz",
+ ]);
+ });
+});
+
+test("buildGeminiArgs adds the approval flag only when requested; resume beats sessionId", () => {
+ withEnv(undefined, () => {
+ assert.deepEqual(buildGeminiArgs(undefined, { approvalMode: "yolo" }), [
+ "--approval-mode",
+ "yolo",
+ ]);
+ assert.deepEqual(
+ buildGeminiArgs(undefined, { approvalMode: "plan", resume: "r1", sessionId: "s1" }),
+ ["--approval-mode", "plan", "--resume", "r1"],
+ );
+ });
+});
diff --git a/src/backends/gemini.ts b/src/backends/gemini.ts
new file mode 100644
index 0000000..cc7ea9c
--- /dev/null
+++ b/src/backends/gemini.ts
@@ -0,0 +1,87 @@
+import { executeCommand } from "../utils/commandExecutor.js";
+import { Logger } from "../utils/logger.js";
+import {
+ CLI,
+ MODELS,
+ ERROR_MESSAGES,
+ APPROVAL_MODES,
+ ENV,
+ type ApprovalMode,
+} from "../constants.js";
+import type { Backend, BackendRunOptions } from "./types.js";
+
+const VALID_APPROVAL_MODES = Object.values(APPROVAL_MODES) as string[];
+
+/**
+ * Resolve the approval mode: explicit arg > GEMINI_MCP_APPROVAL_MODE env. This
+ * is OPT-IN — when neither is set we return undefined and pass no flag, so the
+ * Gemini CLI behaves exactly as it does today for plain Q&A. (We deliberately do
+ * NOT default to "plan": in headless `-p` mode that turns Gemini into an
+ * autonomous planner that ignores simple questions and can error out.) Unknown
+ * values are ignored rather than forced.
+ */
+export function resolveApprovalMode(arg?: string): ApprovalMode | undefined {
+ const candidate = arg || process.env[ENV.APPROVAL_MODE];
+ if (!candidate) return undefined;
+ return VALID_APPROVAL_MODES.includes(candidate) ? (candidate as ApprovalMode) : undefined;
+}
+
+/** Build the Gemini CLI argv (minus the prompt, which may go on stdin). */
+export function buildGeminiArgs(
+ model: string | undefined,
+ opts: BackendRunOptions,
+): string[] {
+ const args: string[] = [];
+ if (model) args.push(CLI.FLAGS.MODEL, model);
+ if (opts.sandbox) args.push(CLI.FLAGS.SANDBOX);
+ const approval = resolveApprovalMode(opts.approvalMode);
+ if (approval) args.push(CLI.FLAGS.APPROVAL_MODE, approval);
+ // Native sessions: resume a prior session, or start/identify one by id.
+ if (opts.resume) args.push(CLI.FLAGS.RESUME, opts.resume);
+ else if (opts.sessionId) args.push(CLI.FLAGS.SESSION_ID, opts.sessionId);
+ return args;
+}
+
+async function runOnce(
+ prompt: string,
+ model: string | undefined,
+ opts: BackendRunOptions,
+): Promise {
+ const args = buildGeminiArgs(model, opts);
+ if (!opts.useStdin) args.push(CLI.FLAGS.PROMPT, prompt);
+ return executeCommand(
+ CLI.COMMANDS.GEMINI,
+ args,
+ opts.onProgress,
+ opts.useStdin ? prompt : undefined,
+ );
+}
+
+export const geminiBackend: Backend = {
+ name: "gemini",
+ supportsModelSelection: true,
+ async run(prompt, opts) {
+ const model = opts.model;
+ try {
+ return await runOnce(prompt, model, opts);
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ // gemini-2.5-pro quota exhausted → retry once on flash (unless already flash).
+ if (message.includes(ERROR_MESSAGES.QUOTA_EXCEEDED) && model !== MODELS.FLASH) {
+ Logger.warn(`${ERROR_MESSAGES.QUOTA_EXCEEDED}. Falling back to ${MODELS.FLASH}.`);
+ try {
+ const result = await runOnce(prompt, MODELS.FLASH, opts);
+ Logger.warn(`Successfully executed with ${MODELS.FLASH} fallback.`);
+ return result;
+ } catch (fallbackError) {
+ const fe =
+ fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
+ throw new Error(
+ `${MODELS.PRO} quota exceeded, ${MODELS.FLASH} fallback also failed: ${fe}`,
+ );
+ }
+ }
+ throw error;
+ }
+ },
+};
diff --git a/src/backends/index.test.ts b/src/backends/index.test.ts
new file mode 100644
index 0000000..e5c3e4f
--- /dev/null
+++ b/src/backends/index.test.ts
@@ -0,0 +1,16 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { getBackend } from "./index.js";
+
+test("getBackend defaults to gemini", () => {
+ assert.equal(getBackend({}).name, "gemini");
+ assert.equal(getBackend({ GEMINI_MCP_BACKEND: "" }).name, "gemini");
+ assert.equal(getBackend({ GEMINI_MCP_BACKEND: "gemini" }).name, "gemini");
+ assert.equal(getBackend({ GEMINI_MCP_BACKEND: "unknown" }).name, "gemini");
+});
+
+test("getBackend selects agy when requested (case-insensitive, incl. 'antigravity')", () => {
+ assert.equal(getBackend({ GEMINI_MCP_BACKEND: "agy" }).name, "agy");
+ assert.equal(getBackend({ GEMINI_MCP_BACKEND: "AGY" }).name, "agy");
+ assert.equal(getBackend({ GEMINI_MCP_BACKEND: "antigravity" }).name, "agy");
+});
diff --git a/src/backends/index.ts b/src/backends/index.ts
new file mode 100644
index 0000000..25bf440
--- /dev/null
+++ b/src/backends/index.ts
@@ -0,0 +1,26 @@
+import { ENV } from "../constants.js";
+import type { Backend } from "./types.js";
+import { geminiBackend } from "./gemini.js";
+import { agyBackend } from "./agy.js";
+
+export type { Backend, BackendRunOptions } from "./types.js";
+export { geminiBackend } from "./gemini.js";
+export { agyBackend } from "./agy.js";
+
+/**
+ * Select the active backend from GEMINI_MCP_BACKEND. Defaults to the Gemini CLI;
+ * "agy"/"antigravity" selects the experimental Antigravity CLI backend.
+ */
+export function getBackend(env: NodeJS.ProcessEnv = process.env): Backend {
+ const name = (env[ENV.BACKEND] || "gemini").trim().toLowerCase();
+ switch (name) {
+ case "agy":
+ case "antigravity":
+ return agyBackend;
+ case "gemini":
+ case "":
+ return geminiBackend;
+ default:
+ return geminiBackend;
+ }
+}
diff --git a/src/backends/types.ts b/src/backends/types.ts
new file mode 100644
index 0000000..a395e17
--- /dev/null
+++ b/src/backends/types.ts
@@ -0,0 +1,29 @@
+import type { ApprovalMode } from "../constants.js";
+
+/**
+ * Options a backend understands. Backends interpret these in their own terms
+ * (e.g. the gemini backend maps `resume` to `--resume`, the agy backend to
+ * `--conversation`/`--continue`); unsupported options are ignored.
+ */
+export interface BackendRunOptions {
+ model?: string;
+ sandbox?: boolean;
+ approvalMode?: ApprovalMode;
+ sessionId?: string;
+ resume?: string;
+ /**
+ * Deliver the prompt on stdin rather than as a flag argument. Used for
+ * changeMode / `@file` prompts to dodge cmd.exe parsing and the OS
+ * command-line length limit.
+ */
+ useStdin?: boolean;
+ onProgress?: (newOutput: string) => void;
+}
+
+/** A pluggable CLI backend that turns a prompt into model output. */
+export interface Backend {
+ readonly name: string;
+ /** Whether `model` selection is honoured (agy print-mode is Flash-only). */
+ readonly supportsModelSelection: boolean;
+ run(prompt: string, options: BackendRunOptions): Promise;
+}
diff --git a/src/constants.ts b/src/constants.ts
index 184cac2..087ea0f 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -62,14 +62,18 @@ export const CLI = {
// Command names
COMMANDS: {
GEMINI: "gemini",
+ AGY: "agy", // Antigravity CLI — experimental backend (gemini-cli's successor)
ECHO: "echo",
},
- // Command flags
+ // Command flags (Gemini CLI)
FLAGS: {
MODEL: "-m",
SANDBOX: "-s",
PROMPT: "-p",
- HELP: "-help",
+ HELP: "--help", // was "-help" — yargs parsed that as -h -e -l -p (the help bug)
+ APPROVAL_MODE: "--approval-mode",
+ SESSION_ID: "--session-id",
+ RESUME: "--resume",
},
// Default values
DEFAULTS: {
@@ -79,6 +83,26 @@ export const CLI = {
},
} as const;
+// Gemini CLI approval modes (`gemini --approval-mode `, confirmed in v0.43).
+// Opt-in only — when unset, no mode is forced (preserves plain Q&A behaviour).
+// plan = autonomous read-only planner · auto_edit = auto-approve edit tools ·
+// yolo = auto-approve all tools.
+export const APPROVAL_MODES = {
+ DEFAULT: "default",
+ AUTO_EDIT: "auto_edit",
+ YOLO: "yolo",
+ PLAN: "plan",
+} as const;
+export type ApprovalMode = (typeof APPROVAL_MODES)[keyof typeof APPROVAL_MODES];
+
+// Environment variables that configure the server.
+export const ENV = {
+ BACKEND: "GEMINI_MCP_BACKEND", // "gemini" (default) | "agy"
+ APPROVAL_MODE: "GEMINI_MCP_APPROVAL_MODE", // overridden per-call by the approvalMode arg
+ GEMINI_CLI_PATH: "GEMINI_CLI_PATH", // explicit path to the gemini executable (Windows shim resolution)
+ TIMEOUT_MS: "GEMINI_MCP_TIMEOUT_MS", // per-call command timeout in milliseconds
+} as const;
+
// (merged PromptArguments and ToolArguments)
export interface ToolArguments {
@@ -88,6 +112,9 @@ export interface ToolArguments {
changeMode?: boolean | string;
chunkIndex?: number | string; // Which chunk to return (1-based)
chunkCacheKey?: string; // Optional cache key for continuation
+ approvalMode?: string; // Gemini approval mode: default | auto_edit | yolo | plan
+ sessionId?: string; // Start/identify a session (gemini --session-id, agy --conversation)
+ resume?: string; // Resume a prior session id or "latest" (gemini --resume, agy --continue)
message?: string; // For Ping tool -- Un-used.
// --> new tool
diff --git a/src/index.ts b/src/index.ts
index 46c6118..a1d10ee 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -16,6 +16,7 @@ import {
GetPromptResult,
CallToolResult,
} from "@modelcontextprotocol/sdk/types.js";
+import { readFileSync } from "node:fs";
import { Logger } from "./utils/logger.js";
import { PROTOCOL, ToolArguments } from "./constants.js";
@@ -27,10 +28,16 @@ import {
getPromptMessage
} from "./tools/index.js";
+// Read the version from package.json at runtime so it never drifts from the
+// published version (it previously hardcoded an out-of-date "1.1.4").
+const pkg = JSON.parse(
+ readFileSync(new URL("../package.json", import.meta.url), "utf8"),
+) as { version: string };
+
const server = new Server(
{
name: "gemini-cli-mcp",
- version: "1.1.4",
+ version: pkg.version,
},{
capabilities: {
tools: {},
diff --git a/src/tools/ask-gemini.tool.ts b/src/tools/ask-gemini.tool.ts
index b6fee71..bfdc917 100644
--- a/src/tools/ask-gemini.tool.ts
+++ b/src/tools/ask-gemini.tool.ts
@@ -1,9 +1,10 @@
import { z } from 'zod';
import { UnifiedTool } from './registry.js';
import { executeGeminiCLI, processChangeModeOutput } from '../utils/geminiExecutor.js';
-import {
- ERROR_MESSAGES,
- STATUS_MESSAGES
+import {
+ ERROR_MESSAGES,
+ STATUS_MESSAGES,
+ type ApprovalMode,
} from '../constants.js';
const askGeminiArgsSchema = z.object({
@@ -13,6 +14,9 @@ const askGeminiArgsSchema = z.object({
changeMode: z.boolean().default(false).describe("Enable structured change mode - formats prompts to prevent tool errors and returns structured edit suggestions that Claude can apply directly"),
chunkIndex: z.union([z.number(), z.string()]).optional().describe("Which chunk to return (1-based)"),
chunkCacheKey: z.string().optional().describe("Optional cache key for continuation"),
+ approvalMode: z.enum(['default', 'auto_edit', 'yolo', 'plan']).optional().describe("Optional Gemini approval mode. If omitted, no mode is forced (best for plain Q&A/analysis). 'yolo'/'auto_edit' let Gemini run or edit (use with sandbox); 'plan' makes Gemini an autonomous read-only planner."),
+ sessionId: z.string().optional().describe("Start or identify a conversation session by id, so a later call can resume it (gemini --session-id)."),
+ resume: z.string().optional().describe("Resume a prior session by id, or 'latest' for the most recent, to continue a multi-turn conversation (gemini --resume)."),
});
export const askGeminiTool: UnifiedTool = {
@@ -24,8 +28,8 @@ export const askGeminiTool: UnifiedTool = {
},
category: 'gemini',
execute: async (args, onProgress) => {
- const { prompt, model, sandbox, changeMode, chunkIndex, chunkCacheKey } = args; if (!prompt?.trim()) { throw new Error(ERROR_MESSAGES.NO_PROMPT_PROVIDED); }
-
+ const { prompt, model, sandbox, changeMode, chunkIndex, chunkCacheKey, approvalMode, sessionId, resume } = args; if (!prompt?.trim()) { throw new Error(ERROR_MESSAGES.NO_PROMPT_PROVIDED); }
+
if (changeMode && chunkIndex && chunkCacheKey) {
// Security: validate cacheKey format before any cache access
if (typeof chunkCacheKey !== 'string' || !/^[a-f0-9]{8}$/.test(chunkCacheKey)) {
@@ -38,15 +42,17 @@ export const askGeminiTool: UnifiedTool = {
prompt as string
);
}
-
- const result = await executeGeminiCLI(
- prompt as string,
- model as string | undefined,
- !!sandbox,
- !!changeMode,
- onProgress
- );
-
+
+ const result = await executeGeminiCLI(prompt as string, {
+ model: model as string | undefined,
+ sandbox: !!sandbox,
+ changeMode: !!changeMode,
+ approvalMode: approvalMode as ApprovalMode | undefined,
+ sessionId: sessionId as string | undefined,
+ resume: resume as string | undefined,
+ onProgress,
+ });
+
if (changeMode) {
return processChangeModeOutput(
result,
@@ -55,6 +61,9 @@ export const askGeminiTool: UnifiedTool = {
prompt as string
);
}
- return `${STATUS_MESSAGES.GEMINI_RESPONSE}\n${result}`; // changeMode false
+ // Surface the active session id so the caller can resume the conversation.
+ const activeSession = (resume as string | undefined) || (sessionId as string | undefined);
+ const sessionNote = activeSession ? `\n\n[session: ${activeSession}]` : '';
+ return `${STATUS_MESSAGES.GEMINI_RESPONSE}\n${result}${sessionNote}`; // changeMode false
}
};
\ No newline at end of file
diff --git a/src/tools/brainstorm.tool.ts b/src/tools/brainstorm.tool.ts
index 0970ade..e5680d9 100644
--- a/src/tools/brainstorm.tool.ts
+++ b/src/tools/brainstorm.tool.ts
@@ -2,6 +2,7 @@ import { z } from 'zod';
import { UnifiedTool } from './registry.js';
import { Logger } from '../utils/logger.js';
import { executeGeminiCLI } from '../utils/geminiExecutor.js';
+import { type ApprovalMode } from '../constants.js';
function buildBrainstormPrompt(config: {
prompt: string;
@@ -118,6 +119,7 @@ ${domain ? `Given the ${domain} domain, I'll apply the most effective combinatio
const brainstormArgsSchema = z.object({
prompt: z.string().min(1).describe("Primary brainstorming challenge or question to explore"),
model: z.string().optional().describe("Optional model to use (e.g., 'gemini-2.5-flash'). If not specified, uses the default model (gemini-2.5-pro)."),
+ approvalMode: z.enum(['default', 'auto_edit', 'yolo', 'plan']).optional().describe("Optional Gemini approval mode. If omitted, no mode is forced."),
methodology: z.enum(['divergent', 'convergent', 'scamper', 'design-thinking', 'lateral', 'auto']).default('auto').describe("Brainstorming framework: 'divergent' (generate many ideas), 'convergent' (refine existing), 'scamper' (systematic triggers), 'design-thinking' (human-centered), 'lateral' (unexpected connections), 'auto' (AI selects best)"),
domain: z.string().optional().describe("Domain context for specialized brainstorming (e.g., 'software', 'business', 'creative', 'research', 'product', 'marketing')"),
constraints: z.string().optional().describe("Known limitations, requirements, or boundaries (budget, time, technical, legal, etc.)"),
@@ -138,6 +140,7 @@ export const brainstormTool: UnifiedTool = {
const {
prompt,
model,
+ approvalMode,
methodology = 'auto',
domain,
constraints,
@@ -166,6 +169,10 @@ export const brainstormTool: UnifiedTool = {
onProgress?.(`Generating ${ideaCount} ideas via ${methodology} methodology...`);
// Execute with Gemini
- return await executeGeminiCLI(enhancedPrompt, model as string | undefined, false, false, onProgress);
+ return await executeGeminiCLI(enhancedPrompt, {
+ model: model as string | undefined,
+ approvalMode: approvalMode as ApprovalMode | undefined,
+ onProgress,
+ });
}
};
\ No newline at end of file
diff --git a/src/tools/simple-tools.ts b/src/tools/simple-tools.ts
index 64af593..df272b9 100644
--- a/src/tools/simple-tools.ts
+++ b/src/tools/simple-tools.ts
@@ -1,6 +1,7 @@
import { z } from 'zod';
import { UnifiedTool } from './registry.js';
import { executeCommand } from '../utils/commandExecutor.js';
+import { CLI } from '../constants.js';
const pingArgsSchema = z.object({
prompt: z.string().default('').describe("Message to echo "),
@@ -16,7 +17,7 @@ export const pingTool: UnifiedTool = {
category: 'simple',
execute: async (args, onProgress) => {
const message = args.prompt || args.message || "Pong!";
- return executeCommand("echo", [message as string], onProgress);
+ return executeCommand(CLI.COMMANDS.ECHO, [message as string], onProgress);
}
};
@@ -31,6 +32,6 @@ export const helpTool: UnifiedTool = {
},
category: 'simple',
execute: async (args, onProgress) => {
- return executeCommand("gemini", ["-help"], onProgress);
+ return executeCommand(CLI.COMMANDS.GEMINI, [CLI.FLAGS.HELP], onProgress);
}
};
\ No newline at end of file
diff --git a/src/utils/commandExecutor.test.ts b/src/utils/commandExecutor.test.ts
new file mode 100644
index 0000000..5f510ca
--- /dev/null
+++ b/src/utils/commandExecutor.test.ts
@@ -0,0 +1,41 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+ quoteForCmd,
+ resolveCommandForExecution,
+ buildEnoentErrorMessage,
+} from "./commandExecutor.js";
+
+test("quoteForCmd wraps in double quotes and doubles embedded quotes", () => {
+ assert.equal(quoteForCmd("hello"), '"hello"');
+ assert.equal(quoteForCmd("a&calc"), '"a&calc"'); // cmd metachar made inert by quoting
+ assert.equal(quoteForCmd('a"b'), '"a""b"');
+});
+
+test("quoteForCmd doubles a trailing backslash so it can't escape the closing quote", () => {
+ assert.equal(quoteForCmd("path\\"), '"path\\\\"');
+});
+
+test("resolveCommandForExecution is a no-op off Windows", () => {
+ if (process.platform !== "win32") {
+ assert.equal(resolveCommandForExecution("gemini"), "gemini");
+ assert.equal(resolveCommandForExecution("echo"), "echo");
+ } else {
+ // On Windows it should at least never return an empty string.
+ assert.ok(resolveCommandForExecution("gemini").length > 0);
+ }
+});
+
+test("buildEnoentErrorMessage gives gemini-specific, platform-aware guidance", () => {
+ const msg = buildEnoentErrorMessage("gemini");
+ assert.match(msg, /Could not find the "gemini"/);
+ assert.match(msg, /GEMINI_CLI_PATH/);
+ assert.match(msg, /@google\/gemini-cli/);
+ assert.match(msg, process.platform === "win32" ? /where gemini/ : /which gemini/);
+});
+
+test("buildEnoentErrorMessage omits the gemini install hint for other commands", () => {
+ const msg = buildEnoentErrorMessage("agy");
+ assert.match(msg, /Could not find the "agy"/);
+ assert.doesNotMatch(msg, /@google\/gemini-cli/);
+});
diff --git a/src/utils/commandExecutor.ts b/src/utils/commandExecutor.ts
index edf90c7..f31e42f 100644
--- a/src/utils/commandExecutor.ts
+++ b/src/utils/commandExecutor.ts
@@ -1,24 +1,86 @@
-import { spawn } from "child_process";
+import { spawn, execSync } from "child_process";
import { Logger } from "./logger.js";
+import { CLI, ENV } from "../constants.js";
+import { resolveTimeoutMs } from "./timeoutManager.js";
// Quote a single argument for cmd.exe (used by spawn's shell:true on Windows).
// Embedded quotes are doubled and backslash runs before a quote (or the closing
// quote) are doubled so they don't escape it, per CommandLineToArgvW rules. Note
// cmd still expands %VAR%/!VAR! inside quotes — an env read at worst, not RCE.
-function quoteForCmd(arg: string): string {
+export function quoteForCmd(arg: string): string {
const body = String(arg).replace(/(\\*)"/g, '$1$1""').replace(/(\\+)$/, '$1$1');
return `"${body}"`;
}
+// Windows-only: find the real executable for the gemini command. The MCP server
+// often runs without the user's interactive PATH, so we (1) honour an explicit
+// GEMINI_CLI_PATH override, then (2) ask `where` and prefer the `.cmd` shim that
+// Node can actually launch (over .ps1/.bat/.exe). Falls back to "gemini.cmd".
+// Resolution is cached per command for the life of the process.
+const resolveCache = new Map();
+export function resolveCommandForExecution(command: string): string {
+ if (process.platform !== "win32" || command !== CLI.COMMANDS.GEMINI) return command;
+
+ const cached = resolveCache.get(command);
+ if (cached) return cached;
+
+ let resolved: string = command;
+ const override = process.env[ENV.GEMINI_CLI_PATH]?.trim();
+ if (override) {
+ resolved = override;
+ } else {
+ try {
+ const out = execSync(`where ${command}`, {
+ encoding: "utf8",
+ stdio: ["ignore", "pipe", "ignore"],
+ });
+ const candidates = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
+ const byExt = (ext: string) => candidates.find((c) => c.toLowerCase().endsWith(ext));
+ resolved =
+ byExt(".cmd") || byExt(".ps1") || byExt(".bat") || byExt(".exe") ||
+ candidates[0] || `${command}.cmd`;
+ } catch {
+ resolved = `${command}.cmd`;
+ }
+ }
+
+ resolveCache.set(command, resolved);
+ return resolved;
+}
+
+// Actionable guidance when the executable can't be found (ENOENT). The most
+// common cause is the MCP server not inheriting the user's interactive PATH.
+export function buildEnoentErrorMessage(command: string): string {
+ const isWindows = process.platform === "win32";
+ const lines = [
+ `Could not find the "${command}" executable.`,
+ `The MCP server runs in its own process and may not inherit your shell's PATH.`,
+ `• Verify it is installed and resolvable: \`${isWindows ? "where" : "which"} ${command}\`.`,
+ ];
+ if (command === CLI.COMMANDS.GEMINI) {
+ lines.push(
+ `• Install it: \`npm install -g @google/gemini-cli\`.`,
+ isWindows
+ ? `• Or set ${ENV.GEMINI_CLI_PATH} to the full path of the gemini shim (e.g. C:\\path\\to\\gemini.cmd).`
+ : `• Or set ${ENV.GEMINI_CLI_PATH} to the full path of the gemini executable.`,
+ );
+ }
+ return lines.join("\n");
+}
+
export async function executeCommand(
command: string,
args: string[],
- onProgress?: (newOutput: string) => void
+ onProgress?: (newOutput: string) => void,
+ stdinData?: string,
): Promise {
return new Promise((resolve, reject) => {
const startTime = Date.now();
Logger.commandExecution(command, args, startTime);
+ const isWindows = process.platform === "win32";
+ const resolvedCommand = resolveCommandForExecution(command);
+
// Windows quirk: Node 22+ blocks spawning `.cmd` / `.bat` shims without
// `shell: true` (CVE-2024-27980). But shell:true routes the command through
// cmd.exe, which re-parses the joined line — so EVERY argument must be
@@ -26,23 +88,61 @@ export async function executeCommand(
// trigger command injection even in tokens without spaces (e.g. a prompt
// `a&calc`); wrapping each arg in double quotes makes them inert. This is a
// no-op on macOS / Linux, where shell:false passes argv directly.
- const isWindows = process.platform === "win32";
const safeArgs = isWindows ? args.map(quoteForCmd) : args;
+ // A resolved full path may contain spaces; quote it for cmd.exe. A bare
+ // command name (no whitespace) passes through unchanged to preserve the
+ // exact, already-tested shim-launch behaviour.
+ const spawnCommand =
+ isWindows && /\s/.test(resolvedCommand) ? `"${resolvedCommand}"` : resolvedCommand;
- const childProcess = spawn(command, safeArgs, {
+ // Complex prompts arrive on stdin (see geminiExecutor) to bypass cmd.exe
+ // parsing and the OS command-line length limit; only open stdin then.
+ // windowsHide suppresses the popup console window on Windows (no-op elsewhere).
+ const childProcess = spawn(spawnCommand, safeArgs, {
env: process.env,
shell: isWindows,
- stdio: ["ignore", "pipe", "pipe"],
+ windowsHide: true,
+ stdio: [stdinData !== undefined ? "pipe" : "ignore", "pipe", "pipe"],
});
+ if (stdinData !== undefined && childProcess.stdin) {
+ childProcess.stdin.write(stdinData);
+ childProcess.stdin.end();
+ }
+
let stdout = "";
let stderr = "";
let isResolved = false;
let lastReportedLength = 0;
-
- childProcess.stdout.on("data", (data) => {
+
+ // Release a genuinely hung child after the configured timeout (default 30m;
+ // GEMINI_MCP_TIMEOUT_MS overrides, 0 disables). SIGTERM first, then SIGKILL.
+ const timeoutMs = resolveTimeoutMs();
+ let timeoutHandle: NodeJS.Timeout | undefined;
+ const clearTimer = () => {
+ if (timeoutHandle) {
+ clearTimeout(timeoutHandle);
+ timeoutHandle = undefined;
+ }
+ };
+ if (timeoutMs > 0) {
+ timeoutHandle = setTimeout(() => {
+ if (isResolved) return;
+ isResolved = true;
+ Logger.error(`Command timed out after ${timeoutMs}ms; terminating: ${command}`);
+ try { childProcess.kill("SIGTERM"); } catch { /* already gone */ }
+ const sigkill = setTimeout(() => {
+ try { childProcess.kill("SIGKILL"); } catch { /* already gone */ }
+ }, 2000);
+ sigkill.unref?.();
+ reject(new Error(`Command timed out after ${timeoutMs}ms: ${command}`));
+ }, timeoutMs);
+ timeoutHandle.unref?.();
+ }
+
+ childProcess.stdout?.on("data", (data) => {
stdout += data.toString();
-
+
// Report new content if callback provided
if (onProgress && stdout.length > lastReportedLength) {
const newContent = stdout.substring(lastReportedLength);
@@ -51,9 +151,8 @@ export async function executeCommand(
}
});
-
// CLI level errors
- childProcess.stderr.on("data", (data) => {
+ childProcess.stderr?.on("data", (data) => {
stderr += data.toString();
// find RESOURCE_EXHAUSTED when gemini-2.5-pro quota is exceeded
if (stderr.includes("RESOURCE_EXHAUSTED")) {
@@ -78,27 +177,32 @@ export async function executeCommand(
}
});
childProcess.on("error", (error) => {
- if (!isResolved) {
- isResolved = true;
- Logger.error(`Process error:`, error);
+ if (isResolved) return;
+ isResolved = true;
+ clearTimer();
+ Logger.error(`Process error:`, error);
+ const code = (error as NodeJS.ErrnoException).code;
+ if (code === "ENOENT") {
+ reject(new Error(buildEnoentErrorMessage(command)));
+ } else {
reject(new Error(`Failed to spawn command: ${error.message}`));
}
});
childProcess.on("close", (code) => {
- if (!isResolved) {
- isResolved = true;
- if (code === 0) {
- Logger.commandComplete(startTime, code, stdout.length);
- resolve(stdout.trim());
- } else {
- Logger.commandComplete(startTime, code);
- Logger.error(`Failed with exit code ${code}`);
- const errorMessage = stderr.trim() || "Unknown error";
- reject(
- new Error(`Command failed with exit code ${code}: ${errorMessage}`),
- );
- }
+ if (isResolved) return;
+ isResolved = true;
+ clearTimer();
+ if (code === 0) {
+ Logger.commandComplete(startTime, code, stdout.length);
+ resolve(stdout.trim());
+ } else {
+ Logger.commandComplete(startTime, code);
+ Logger.error(`Failed with exit code ${code}`);
+ const errorMessage = stderr.trim() || "Unknown error";
+ reject(
+ new Error(`Command failed with exit code ${code}: ${errorMessage}`),
+ );
}
});
});
-}
\ No newline at end of file
+}
diff --git a/src/utils/geminiExecutor.test.ts b/src/utils/geminiExecutor.test.ts
new file mode 100644
index 0000000..2fd922c
--- /dev/null
+++ b/src/utils/geminiExecutor.test.ts
@@ -0,0 +1,17 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { assertSafeFileReferences } from "./geminiExecutor.js";
+
+const root = process.cwd();
+
+test("assertSafeFileReferences allows in-project @file references", () => {
+ assert.doesNotThrow(() => assertSafeFileReferences("explain @src/index.ts", root));
+ assert.doesNotThrow(() => assertSafeFileReferences("no references at all", root));
+ assert.doesNotThrow(() => assertSafeFileReferences("@package.json summarise", root));
+});
+
+test("assertSafeFileReferences rejects traversal, home, and absolute references", () => {
+ assert.throws(() => assertSafeFileReferences("@../secret.txt", root), /outside the project directory/);
+ assert.throws(() => assertSafeFileReferences("@~/.ssh/id_rsa", root), /outside the project directory/);
+ assert.throws(() => assertSafeFileReferences("@/etc/passwd", root), /outside the project directory/);
+});
diff --git a/src/utils/geminiExecutor.ts b/src/utils/geminiExecutor.ts
index 6cae0aa..e754934 100644
--- a/src/utils/geminiExecutor.ts
+++ b/src/utils/geminiExecutor.ts
@@ -1,12 +1,7 @@
import * as path from 'path';
-import { executeCommand } from './commandExecutor.js';
import { Logger } from './logger.js';
-import {
- ERROR_MESSAGES,
- STATUS_MESSAGES,
- MODELS,
- CLI
-} from '../constants.js';
+import type { ApprovalMode } from '../constants.js';
+import { getBackend } from '../backends/index.js';
import { parseChangeModeOutput, validateChangeModeEdits } from './changeModeParser.js';
import { formatChangeModeResponse, summarizeChangeModeEdits } from './changeModeTranslator.js';
@@ -43,13 +38,21 @@ export function assertSafeFileReferences(prompt: string, root: string = process.
}
}
+export interface ExecuteGeminiOptions {
+ model?: string;
+ sandbox?: boolean;
+ changeMode?: boolean;
+ approvalMode?: ApprovalMode;
+ sessionId?: string;
+ resume?: string;
+ onProgress?: (newOutput: string) => void;
+}
+
export async function executeGeminiCLI(
prompt: string,
- model?: string,
- sandbox?: boolean,
- changeMode?: boolean,
- onProgress?: (newOutput: string) => void
+ options: ExecuteGeminiOptions = {},
): Promise {
+ const { model, sandbox, changeMode, approvalMode, sessionId, resume, onProgress } = options;
let prompt_processed = prompt;
if (changeMode) {
@@ -118,48 +121,25 @@ ${prompt_processed}
prompt_processed = changeModeInstructions;
}
- // Block @file references that escape the project root before the prompt
- // reaches the Gemini CLI's file-inlining parser (CVE-2026-0755).
+ // Security: block @file refs that escape the project root before the prompt
+ // reaches any CLI that inlines file contents (CVE-2026-0755).
assertSafeFileReferences(prompt_processed);
- const args = [];
- if (model) { args.push(CLI.FLAGS.MODEL, model); }
- if (sandbox) { args.push(CLI.FLAGS.SANDBOX); }
-
- // spawn runs with shell: false (and cmd.exe-safe quoting on Windows is
- // handled in commandExecutor), so the prompt is passed verbatim as a single
- // argv entry. No manual quoting here — wrapping in `"` only injects literal
- // quote characters and corrupts @file references (#66, CVE-2026-0755).
- args.push(CLI.FLAGS.PROMPT, prompt_processed);
-
- try {
- return await executeCommand(CLI.COMMANDS.GEMINI, args, onProgress);
- } catch (error) {
- const errorMessage = error instanceof Error ? error.message : String(error);
- if (errorMessage.includes(ERROR_MESSAGES.QUOTA_EXCEEDED) && model !== MODELS.FLASH) {
- Logger.warn(`${ERROR_MESSAGES.QUOTA_EXCEEDED}. Falling back to ${MODELS.FLASH}.`);
- await sendStatusMessage(STATUS_MESSAGES.FLASH_RETRY);
- const fallbackArgs = [];
- fallbackArgs.push(CLI.FLAGS.MODEL, MODELS.FLASH);
- if (sandbox) {
- fallbackArgs.push(CLI.FLAGS.SANDBOX);
- }
-
- // Pass the prompt verbatim here too (see note in the primary path).
- fallbackArgs.push(CLI.FLAGS.PROMPT, prompt_processed);
- try {
- const result = await executeCommand(CLI.COMMANDS.GEMINI, fallbackArgs, onProgress);
- Logger.warn(`Successfully executed with ${MODELS.FLASH} fallback.`);
- await sendStatusMessage(STATUS_MESSAGES.FLASH_SUCCESS);
- return result;
- } catch (fallbackError) {
- const fallbackErrorMessage = fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
- throw new Error(`${MODELS.PRO} quota exceeded, ${MODELS.FLASH} fallback also failed: ${fallbackErrorMessage}`);
- }
- } else {
- throw error;
- }
- }
+ // changeMode and @file prompts go on stdin (gemini backend) to keep large
+ // prompts under the OS command-line length limit and away from cmd.exe
+ // parsing on Windows; simple prompts use -p. The selected backend
+ // (gemini by default, agy when GEMINI_MCP_BACKEND=agy) handles the rest.
+ const useStdin = !!changeMode || prompt_processed.includes('@');
+
+ return getBackend().run(prompt_processed, {
+ model,
+ sandbox,
+ approvalMode,
+ sessionId,
+ resume,
+ useStdin,
+ onProgress,
+ });
}
export async function processChangeModeOutput(
@@ -229,9 +209,4 @@ export async function processChangeModeOutput(
Logger.debug(`ChangeMode: Parsed ${edits.length} edits, ${chunks.length} chunks, returning chunk ${returnChunkIndex}`);
return result;
-}
-
-// Placeholder
-async function sendStatusMessage(message: string): Promise {
- Logger.debug(`Status: ${message}`);
}
\ No newline at end of file
diff --git a/src/utils/timeoutManager.test.ts b/src/utils/timeoutManager.test.ts
new file mode 100644
index 0000000..f2f2f21
--- /dev/null
+++ b/src/utils/timeoutManager.test.ts
@@ -0,0 +1,19 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { resolveTimeoutMs, DEFAULT_COMMAND_TIMEOUT_MS } from "./timeoutManager.js";
+
+test("resolveTimeoutMs: default when unset or blank", () => {
+ assert.equal(resolveTimeoutMs({}), DEFAULT_COMMAND_TIMEOUT_MS);
+ assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "" }), DEFAULT_COMMAND_TIMEOUT_MS);
+ assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: " " }), DEFAULT_COMMAND_TIMEOUT_MS);
+});
+
+test("resolveTimeoutMs: honours a positive override", () => {
+ assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "5000" }), 5000);
+});
+
+test("resolveTimeoutMs: 0, negative, or invalid disables the timeout (returns 0)", () => {
+ assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "0" }), 0);
+ assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "-1" }), 0);
+ assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "abc" }), 0);
+});
diff --git a/src/utils/timeoutManager.ts b/src/utils/timeoutManager.ts
index e69de29..2764359 100644
--- a/src/utils/timeoutManager.ts
+++ b/src/utils/timeoutManager.ts
@@ -0,0 +1,20 @@
+import { ENV } from "../constants.js";
+
+// Default per-command timeout. Large-codebase analyses can legitimately run for
+// many minutes (see STATUS_MESSAGES), so this is deliberately generous — it
+// exists to release a genuinely hung child process, not to cap normal work.
+// Override with GEMINI_MCP_TIMEOUT_MS (milliseconds); set it to 0 to disable.
+export const DEFAULT_COMMAND_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
+
+/**
+ * Resolve the per-command timeout in milliseconds from the environment, falling
+ * back to {@link DEFAULT_COMMAND_TIMEOUT_MS}. A value of 0 — or any negative /
+ * non-numeric value — disables the timeout and returns 0.
+ */
+export function resolveTimeoutMs(env: NodeJS.ProcessEnv = process.env): number {
+ const raw = env[ENV.TIMEOUT_MS];
+ if (raw === undefined || raw.trim() === "") return DEFAULT_COMMAND_TIMEOUT_MS;
+ const parsed = Number(raw);
+ if (!Number.isFinite(parsed) || parsed <= 0) return 0; // disabled / invalid
+ return parsed;
+}
diff --git a/tsconfig.build.json b/tsconfig.build.json
new file mode 100644
index 0000000..50d32b9
--- /dev/null
+++ b/tsconfig.build.json
@@ -0,0 +1,4 @@
+{
+ "extends": "./tsconfig.json",
+ "exclude": ["node_modules", "dist", "scripts", "src/**/*.test.ts"]
+}
From 4c9b9b3ff197461f6431bcc15e4ab829e385cb31 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 14:15:08 -0700
Subject: [PATCH 2/8] fix: address PR #78 review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- commandExecutor: add an 'error' listener on child stdin so an EPIPE (child exited before reading) is logged instead of crashing the server (gemini-code-assist).
- commandExecutor: on Windows, terminate timed-out processes with 'taskkill /pid /T /F' — with shell:true, kill() only hit cmd.exe and orphaned the real gemini/agy child (gemini-code-assist).
- resolveCommandForExecution: prefer .cmd/.exe/.bat and stop preferring .ps1, which cmd.exe (shell:true) can't launch directly (Copilot).
- run-tests.mjs: feature-detect the tsx loader — '--import tsx' on Node >=20.6, '--loader tsx' below (the >=18 floor lacks --import) (Copilot).
- ask-gemini: don't emit 'latest' as a [session: ...] id and clarify it's the requested id, not one parsed from the CLI (Copilot).
---
docs/.vitepress/config.js | 1 +
docs/api.md | 103 +++++++++++++++++-
docs/concepts/configuration.md | 169 ++++++++++++++++++++++++++++++
docs/concepts/how-it-works.md | 46 ++++++--
docs/concepts/models.md | 20 ++--
docs/getting-started.md | 32 +++++-
docs/index.md | 5 +-
docs/installation.md | 2 +-
docs/resources/faq.md | 15 ++-
docs/resources/roadmap.md | 59 ++++++++---
docs/resources/troubleshooting.md | 13 ++-
docs/usage/commands.md | 96 +++++++++++------
scripts/run-tests.mjs | 8 +-
src/tools/ask-gemini.tool.ts | 10 +-
src/utils/commandExecutor.ts | 29 +++--
15 files changed, 520 insertions(+), 88 deletions(-)
create mode 100644 docs/concepts/configuration.md
diff --git a/docs/.vitepress/config.js b/docs/.vitepress/config.js
index 00a0a60..c8def64 100644
--- a/docs/.vitepress/config.js
+++ b/docs/.vitepress/config.js
@@ -39,6 +39,7 @@ export default withMermaid(
collapsed: false,
items: [
{ text: 'How It Works', link: '/concepts/how-it-works' },
+ { text: 'Configuration', link: '/concepts/configuration' },
{ text: 'File Analysis (@)', link: '/concepts/file-analysis' },
{ text: 'Model Selection', link: '/concepts/models' },
{ text: 'Sandbox Mode', link: '/concepts/sandbox' }
diff --git a/docs/api.md b/docs/api.md
index 1f469bd..f9341f1 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -1,3 +1,102 @@
-# API
+# API Reference
-Stay tuned.
\ No newline at end of file
+## Tools
+
+The MCP server exposes the following tools over stdio transport.
+
+### ask-gemini
+
+The primary tool for sending prompts to Gemini.
+
+**Arguments:**
+
+```typescript
+{
+ prompt: string; // Required. Use @ to include files.
+ model?: string; // e.g. "gemini-2.5-flash"
+ sandbox?: boolean; // default false
+ changeMode?: boolean; // default false — structured edits
+ approvalMode?: "default" | "auto_edit" | "yolo" | "plan";
+ sessionId?: string; // tag a session
+ resume?: string; // resume by id or "latest"
+ chunkIndex?: number; // 1-based chunk (changeMode)
+ chunkCacheKey?: string; // hex cache key (changeMode)
+}
+```
+
+### brainstorm
+
+Structured ideation with methodology frameworks.
+
+**Arguments:**
+
+```typescript
+{
+ prompt: string; // Required. The challenge to brainstorm.
+ model?: string;
+ approvalMode?: "default" | "auto_edit" | "yolo" | "plan";
+ methodology?: "divergent" | "convergent" | "scamper"
+ | "design-thinking" | "lateral" | "auto";
+ domain?: string; // e.g. "software", "business"
+ constraints?: string;
+ existingContext?: string;
+ ideaCount?: number; // default 12
+ includeAnalysis?: boolean; // default true
+}
+```
+
+### ping
+
+Echo test. Returns the input message.
+
+```typescript
+{ prompt?: string; } // defaults to "Pong!"
+```
+
+### Help
+
+Returns `gemini --help` output.
+
+```typescript
+{} // no arguments
+```
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `GEMINI_MCP_BACKEND` | `gemini` | Backend: `gemini` or `agy` (experimental) |
+| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
+| `GEMINI_MCP_TIMEOUT_MS` | `1800000` | Per-call timeout in ms; `0` disables |
+| `GEMINI_CLI_PATH` | *(auto)* | Full path to the gemini executable (Windows) |
+
+## Transport
+
+The server uses **stdio** transport (MCP standard). It reads JSON-RPC from stdin and writes responses to stdout. No HTTP server, no ports.
+
+```json
+{
+ "mcpServers": {
+ "gemini-cli": {
+ "command": "npx",
+ "args": ["-y", "gemini-mcp-tool"]
+ }
+ }
+}
+```
+
+## Backends
+
+The `BackendProvider` interface is:
+
+```typescript
+interface Backend {
+ readonly name: string;
+ readonly supportsModelSelection: boolean;
+ run(prompt: string, options: BackendRunOptions): Promise;
+}
+```
+
+Two implementations ship:
+- **`geminiBackend`** — default, full feature support
+- **`agyBackend`** — experimental, Flash-only, transcript-file recovery
\ No newline at end of file
diff --git a/docs/concepts/configuration.md b/docs/concepts/configuration.md
new file mode 100644
index 0000000..220729f
--- /dev/null
+++ b/docs/concepts/configuration.md
@@ -0,0 +1,169 @@
+# Configuration
+
+All configuration is done via environment variables in your MCP client config. No config files to manage.
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
+| `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` |
+| `GEMINI_MCP_TIMEOUT_MS` | `1800000` (30 min) | Per-call timeout; `0` disables |
+| `GEMINI_CLI_PATH` | *(auto-detect)* | Explicit path to the gemini executable |
+
+### Setting Environment Variables
+
+#### Claude Code
+```bash
+claude mcp add gemini-cli -e GEMINI_MCP_APPROVAL_MODE=plan -- npx -y gemini-mcp-tool
+```
+
+#### Claude Desktop / Other Clients
+```json
+{
+ "mcpServers": {
+ "gemini-cli": {
+ "command": "npx",
+ "args": ["-y", "gemini-mcp-tool"],
+ "env": {
+ "GEMINI_MCP_APPROVAL_MODE": "plan",
+ "GEMINI_MCP_TIMEOUT_MS": "1800000"
+ }
+ }
+ }
+}
+```
+
+---
+
+## Approval Mode
+
+Controls how much autonomy Gemini has when processing a request. Maps directly to `gemini --approval-mode`.
+
+| Mode | Behaviour | Use Case |
+|------|-----------|----------|
+| *(unset)* | No flag passed — Gemini behaves as normal Q&A | Default; best for analysis and questions |
+| `default` | Gemini's own default mode | Same as unset |
+| `plan` | Read-only autonomous planner | "Gemini reads, Claude edits" |
+| `auto_edit` | Auto-approve file edits, prompt for other tools | Combine with `sandbox` for safe edits |
+| `yolo` | Auto-approve everything | CI scripts, fully trusted operations |
+
+::: warning
+In headless mode (`-p`), `plan` turns Gemini into an autonomous planner that may ignore simple questions. Leave unset for plain Q&A.
+:::
+
+### Per-call Override
+
+The `approvalMode` tool argument overrides the environment variable:
+
+```
+Ask gemini to review this codebase with approvalMode: "plan"
+```
+
+---
+
+## Backends
+
+The MCP server can use different CLI backends to talk to Google's models.
+
+### Gemini CLI (default)
+
+The standard `gemini` command. Supports model selection, approval modes, sandbox, and native sessions.
+
+```json
+{
+ "env": {
+ "GEMINI_MCP_BACKEND": "gemini"
+ }
+}
+```
+
+### Antigravity CLI (experimental)
+
+Google's Antigravity CLI (`agy`) is the successor to `gemini` (Gemini CLI is retired June 18, 2026 for free/Pro/Ultra tiers). Set `GEMINI_MCP_BACKEND=agy` to opt in.
+
+```json
+{
+ "env": {
+ "GEMINI_MCP_BACKEND": "agy"
+ }
+}
+```
+
+**Caveats:**
+- Print mode (`agy -p`) is hardcoded to **Gemini 3.5 Flash** — model selection is ignored
+- The `agy -p` stdout bug (exit 0, empty output) is worked around by reading agy's transcript files on disk
+- Only `yolo` maps to agy's `--dangerously-skip-permissions`; graded approval modes are not supported
+- Calls are serialised to avoid transcript id collision
+
+::: tip
+You don't need to do anything today. Gemini CLI still works for headless/automation use. This backend is here so you're ready when the transition happens.
+:::
+
+---
+
+## Timeout
+
+A per-call timeout protects against hung CLI processes. If the timeout fires, the child is sent `SIGTERM`, then `SIGKILL` after 2 seconds.
+
+| Value | Behaviour |
+|-------|-----------|
+| `1800000` (default) | 30-minute timeout |
+| Any positive number | Timeout in milliseconds |
+| `0` | Disabled — wait forever |
+
+```json
+{
+ "env": {
+ "GEMINI_MCP_TIMEOUT_MS": "600000"
+ }
+}
+```
+
+::: tip
+Large codebase analyses can legitimately run for many minutes. The 30-minute default is deliberately generous — it exists to release genuinely hung processes, not to cap normal work.
+:::
+
+---
+
+## Native Sessions
+
+Multi-turn conversations use the Gemini CLI's own session system — no local transcript storage.
+
+### Starting a session
+Pass `sessionId` to tag a conversation:
+```
+ask-gemini with sessionId: "my-review" — review the auth module
+```
+
+### Resuming a session
+Pass `resume` with the session id (or `"latest"`) to continue:
+```
+ask-gemini with resume: "my-review" — now suggest improvements
+```
+
+The response includes a `[session: ]` footer so you can track which session is active.
+
+::: info
+Sessions are backed by `gemini --session-id` / `--resume` on the Gemini backend, and `agy --conversation` / `--continue` on the agy backend.
+:::
+
+---
+
+## Windows Executable Resolution
+
+On Windows, the MCP server often runs without your interactive PATH. The tool resolves the `gemini` command by:
+
+1. Checking `GEMINI_CLI_PATH` (if set)
+2. Running `where gemini` and preferring the `.cmd` shim
+3. Falling back to `gemini.cmd`
+
+If you get "command not found" errors on Windows, set `GEMINI_CLI_PATH` to the full path:
+
+```json
+{
+ "env": {
+ "GEMINI_CLI_PATH": "C:\\Users\\you\\AppData\\Roaming\\npm\\gemini.cmd"
+ }
+}
+```
diff --git a/docs/concepts/how-it-works.md b/docs/concepts/how-it-works.md
index f9bf5bd..98620e2 100644
--- a/docs/concepts/how-it-works.md
+++ b/docs/concepts/how-it-works.md
@@ -27,26 +27,58 @@ flowchart LR
subgraph main
direction TB
A[You] --> |"ask gemini..."| B([**Claude**])
- B -..-> |"invokes 'ask-gemini'"| C["Gemini-MCP-Tool"]
- C --> |"spawn!"| D[Gemini-CLI]
- D e1@-.-> |"response"| C
+ B -.-> |"invokes 'ask-gemini'"| C["Gemini-MCP-Tool"]
+ C --> |"dispatch"| D{"Backend"}
+ D --> |"default"| E[Gemini-CLI]
+ D -.-> |"experimental"| F["agy"]
+ E e1@-.-> |"response"| C
+ F -.-> |"transcript"| C
C -.-> |"response"| B
B -.-> |"summary response"| A
e1@{ animate: true }
end
subgraph Project
- B --> |"edits"| E["`**@*Files***`"]
- D -.-> |"reads"| E
+ B --> |"edits"| G["`**@*Files***`"]
+ E -.-> |"reads"| G
end
classDef userNode fill:#1a237e,stroke:#fff,color:#fff,stroke-width:2px
classDef claudeNode fill:#e64100,stroke:#fff,color:#fff,stroke-width:2px
classDef geminiNode fill:#4285f4,stroke:#fff,color:#fff,stroke-width:2px
classDef mcpNode fill:#37474f,stroke:#fff,color:#fff,stroke-width:2px
classDef dataNode fill:#1b5e20,stroke:#fff,color:#fff,stroke-width:2px
+ classDef dispatchNode fill:#6a1b9a,stroke:#fff,color:#fff,stroke-width:2px
+ classDef agyNode fill:#f57f17,stroke:#fff,color:#fff,stroke-width:2px
class A userNode
class B claudeNode
class C mcpNode
- class D geminiNode
- class E dataNode
+ class D dispatchNode
+ class E geminiNode
+ class F agyNode
+ class G dataNode
```
+
+## Architecture
+
+Starting with v1.2.0, the MCP server uses a **pluggable backend** architecture:
+
+1. **Your MCP client** (Claude Code, Claude Desktop, etc.) sends a tool call via stdio
+2. **gemini-mcp-tool** validates arguments, applies security guards (`@file` containment, approval mode), and routes the prompt through the selected backend
+3. **The backend** (Gemini CLI by default, Antigravity CLI when opted in) spawns the CLI, handles stdin/stdout, and returns the model response
+4. **The MCP server** formats the response and sends it back to your client
+
+### Key Components
+
+| Component | What it does |
+|-----------|-------------|
+| `commandExecutor` | Spawns CLI processes with Windows quoting, timeout/kill, ENOENT guidance |
+| `geminiExecutor` | Security guards, changeMode templating, backend dispatch |
+| `backends/gemini` | Builds Gemini CLI args, handles quota fallback (Pro → Flash) |
+| `backends/agy` | Experimental Antigravity CLI with transcript-file recovery |
+| `timeoutManager` | Configurable per-call timeout (SIGTERM → SIGKILL) |
+
+### Security
+
+- **CVE-2026-0755**: `@file` references are checked to stay within the project directory before being sent to any CLI
+- **CWE-22**: `chunkCacheKey` is validated against a strict hex format
+- **Windows injection**: All arguments are quoted for `cmd.exe` even without whitespace, neutralising `& | < > ^ ( )` metacharacters
diff --git a/docs/concepts/models.md b/docs/concepts/models.md
index e7207db..bcc2828 100644
--- a/docs/concepts/models.md
+++ b/docs/concepts/models.md
@@ -27,19 +27,27 @@ You can also append with '-m' or ask specifically with
{
"mcpServers": {
"gemini-cli": {
- "command": "gemini-mcp",
- "env": {
- "GEMINI_MODEL": "gemini-1.5-flash"
- }
+ "command": "npx",
+ "args": ["-y", "gemini-mcp-tool"]
}
}
}
```
-### Per Request (Coming Soon)
+The model is selected per-request via natural language or the `model` tool argument.
+
+### Per Request
+```
+ask gemini using flash to review this file
```
-/gemini-cli:analyze --model=flash @file.js quick review
+or explicitly:
```
+ask-gemini with model: "gemini-2.5-flash" — review @index.ts
+```
+
+::: warning Antigravity CLI (agy) backend
+When using `GEMINI_MCP_BACKEND=agy`, model selection is ignored — print mode is hardcoded to **Gemini 3.5 Flash**.
+:::
## Model Comparison
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 266f160..33621f9 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -31,7 +31,7 @@
Before installing, ensure you have:
-- **[Node.js](https://nodejs.org/)** v16.0.0 or higher
+- **[Node.js](https://nodejs.org/)** v18.0.0 or higher
- **[Google Gemini CLI](https://github.com/google-gemini/gemini-cli)** installed and configured on your system
- **[Claude Desktop](https://claude.ai/download)** or **[Claude Code](https://www.anthropic.com/claude-code)** with MCP support
@@ -78,6 +78,27 @@ For Claude Desktop users, add this to your configuration file:
}
```
+### Optional Environment Variables
+
+You can pass environment variables to configure the server:
+
+```json
+{
+ "mcpServers": {
+ "gemini-cli": {
+ "command": "npx",
+ "args": ["-y", "gemini-mcp-tool"],
+ "env": {
+ "GEMINI_MCP_APPROVAL_MODE": "plan",
+ "GEMINI_MCP_TIMEOUT_MS": "1800000"
+ }
+ }
+ }
+}
+```
+
+See [Configuration](/concepts/configuration) for all available environment variables.
+
::: warning
You must restart Claude Desktop ***completely*** for changes to take effect.
:::
@@ -162,6 +183,13 @@ Type `/gemini-cli` and these commands will appear:
- `/gemini-cli:sandbox` - Safe code execution
- `/gemini-cli:help` - Show help information
- `/gemini-cli:ping` - Test connectivity
+- `/gemini-cli:brainstorm` - Structured brainstorming with methodology frameworks
+
+### New in v1.2.0
+- **Approval mode** — control Gemini's autonomy: `approvalMode: "plan"` (read-only) or `"yolo"` (auto-approve)
+- **Multi-turn sessions** — pass `sessionId` / `resume` to continue conversations across calls
+- **Pluggable backends** — set `GEMINI_MCP_BACKEND=agy` to use the experimental Antigravity CLI
+- **Per-call timeout** — configurable via `GEMINI_MCP_TIMEOUT_MS` (default 30 min)
## Need a Different Client?
@@ -186,7 +214,7 @@ npm install -g @google/gemini-cli
1. Check your configuration file path
2. Ensure JSON syntax is correct
3. Restart your MCP client completely
-4. Verify Gemini CLI works: `gemini -help`
+4. Verify Gemini CLI works: `gemini --help`
### Client-Specific Issues
diff --git a/docs/index.md b/docs/index.md
index 7134bc8..0addc2b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -34,7 +34,10 @@ features:
details: Gemini-mcp-tool does not require any extra keys.
- icon: 🚦
title: Model Selection
- details: Choose from Gemini-2.5-Pro and Gemini-2.5-Flash, using natural language.
+ details: Choose from Gemini-2.5-Pro, Gemini-2.5-Flash, or let the agy backend use Gemini 3.5 Flash.
+ - icon: 🔧
+ title: Pluggable Backends
+ details: Gemini CLI by default, experimental Antigravity CLI (agy) opt-in — future-proof for June 2026.
---
diff --git a/docs/installation.md b/docs/installation.md
index ef17c46..90d4c20 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -4,7 +4,7 @@ Multiple ways to install Gemini MCP Tool, depending on your needs.
## Prerequisites
-- Node.js v16.0.0 or higher
+- Node.js v18.0.0 or higher
- Claude Desktop or Claude Code with MCP support
- Gemini CLI installed (`npm install -g @google/gemini-cli`)
diff --git a/docs/resources/faq.md b/docs/resources/faq.md
index a5004e9..6484d18 100644
--- a/docs/resources/faq.md
+++ b/docs/resources/faq.md
@@ -6,7 +6,7 @@
A bridge between Claude Desktop and Google's Gemini AI, enabling you to use Gemini's powerful capabilities directly within Claude.
### Does it support windows?
-Windows testing is underway, some users have reported success and other failures.
+Yes. v1.2.0 adds robust Windows executable resolution (`GEMINI_CLI_PATH`, `.cmd` shim detection), hardened `cmd.exe` argument quoting, and `windowsHide` to suppress console popups.
### Why use this instead of Gemini directly?
- Integrated into your existing AI workflow
@@ -32,7 +32,7 @@ Then, run "gemini" and complete auth.
Yes! It works with both Claude Desktop and Claude Code.
### What Node.js version do I need?
-Node.js v16.0.0 or higher.
+Node.js v18.0.0 or higher.
## Usage
@@ -87,7 +87,16 @@ Check your organization's policies and Google's Gemini API terms of service.
## Advanced
### Can I use this in CI/CD?
-Not recommended - designed for interactive development.
+Yes — set `GEMINI_MCP_APPROVAL_MODE=yolo` (or `plan` for read-only) to avoid interactive approval prompts. Combine with `GEMINI_MCP_TIMEOUT_MS` for a hard time limit.
+
+### What is approval mode?
+Approval mode controls how much autonomy Gemini has. By default, no mode is forced (plain Q&A). Set `approvalMode: "plan"` for a read-only planner, `"yolo"` to auto-approve everything, or `"auto_edit"` to auto-approve edits only. See [Configuration](/concepts/configuration).
+
+### What is the agy backend?
+Antigravity CLI (`agy`) is Google's successor to Gemini CLI. Set `GEMINI_MCP_BACKEND=agy` to try it. It's experimental — print mode is Flash-only and stdout is recovered from transcript files. See [Configuration](/concepts/configuration#backends).
+
+### Can I have multi-turn conversations?
+Yes — pass `sessionId` to start a named session, then `resume` with the same id (or `"latest"`) in a follow-up call. This uses Gemini's native `--session-id` / `--resume` flags.
diff --git a/docs/resources/roadmap.md b/docs/resources/roadmap.md
index e41b36a..f8d9dda 100644
--- a/docs/resources/roadmap.md
+++ b/docs/resources/roadmap.md
@@ -21,12 +21,18 @@ flowchart LR
Auto-Fallback"]
B --> C["v1.1.3
Claude Edits, Gemini Reads"]
+ C --> D["v1.1.5
+ Security Fixes"]
+ D --> E["v1.1.6
+ CVE-2026-0755"]
+ E --> F["v1.2.0
+ Backends + Sessions"]
classDef releasedNode fill:#1b5e20,stroke:#fff,color:#fff,stroke-width:2px
classDef currentNode fill:#e64100,stroke:#fff,color:#fff,stroke-width:2px
- class A,B releasedNode
- class C currentNode
+ class A,B,C,D,E releasedNode
+ class F currentNode
```
@@ -48,21 +54,40 @@ config:
timeline
title Gemini MCP Tool Evolution
- section June 2025
- v1.1.0 Release : Claude uses Gemini!
- : Sandbox Mode Testing
+ section 2025
+ v1.1.0-v1.1.3 : Claude uses Gemini!
+ : Sandbox Mode, Fallback
+ : Change Mode
+
+ section May 2026
+ v1.1.5-v1.1.6 : Security Patches
+ : CVE-2026-0755
+ : CWE-22 path traversal
- v1.1.1 Release : Bug Fixes
- : Enhanced Tool Descriptions
+ v1.2.0 Release : Pluggable Backends
+ : Approval Mode
+ : Native Sessions
+ : Per-call Timeout
+ : Windows Reliability
+ : Test Suite
- section July 2025
- v1.1.2 Release : Fallback System
-
- v1.1.3 Release : Claude Edits, Gemini Reads!
-
- Beta Testing : Beta Hooks System
- : Deterministic Routing
- : Streaming
- : Improved Caching
+ section Next
+ v1.3.0 Planned : Streaming output
+ : output-format support
+ : Full agy backend
```
-
\ No newline at end of file
+
+
+## What's Next
+
+### v1.3.0 (Planned)
+- **Streaming output** — `--output-format stream-json` for real-time progress
+- **Full agy backend** — once the `agy -p` stdout bug is fixed upstream
+- **ACP persistent process** — reuse a long-lived agy process for performance
+
+### Open PRs (separate merges)
+- **#65** — MCP SDK modernization + OAuth
+- **#44** — LRU cache for performance
+- **#46** — Tool annotations
+- **#50** — Native session-id resume (partially landed in 1.2.0)
+- **#35** — Gemini schema compatibility
\ No newline at end of file
diff --git a/docs/resources/troubleshooting.md b/docs/resources/troubleshooting.md
index 0a7c914..f55430c 100644
--- a/docs/resources/troubleshooting.md
+++ b/docs/resources/troubleshooting.md
@@ -134,9 +134,10 @@ claude mcp add gemini-cli -- npx gemini-mcp-tool
```
4. **For very large codebases, the tool prevents timeouts automatically**:
- - Progress updates keep the connection alive
- - Clear status messages show processing is active
- - No manual configuration needed
+ - Progress updates keep the connection alive
+ - Clear status messages show processing is active
+ - No manual configuration needed
+ - You can also configure the timeout via `GEMINI_MCP_TIMEOUT_MS` (default 30 min; set to `0` to disable)
@@ -147,7 +148,7 @@ claude mcp add gemini-cli -- npx gemini-mcp-tool
**Common causes**:
-1. **Node.js version compatibility** - Ensure Node.js ≥ v16.0.0
+1. **Node.js version compatibility** - Ensure Node.js ≥ v18.0.0
2. **Gemini CLI not installed** - Install with `npm install -g @google/gemini-cli`
3. **API key not configured** - Run `gemini config set api_key YOUR_API_KEY`
4. **PATH issues** - Restart terminal after installing Node.js/npm
@@ -260,6 +261,7 @@ echo $GOOGLE_GENERATIVE_AI_API_KEY
- Backup heartbeat every 20 seconds to ensure connection stays alive
- Clear status messages showing the tool is working
- Automatic completion notification when done
+- Configurable via `GEMINI_MCP_TIMEOUT_MS` env var (default 30 min; `0` disables)
**For very large codebases** (10,000+ files):
- Consider breaking analysis into smaller chunks
@@ -340,8 +342,9 @@ gemini "Hello"
### Windows 11
- **NPX flag issues**: Use `--yes` instead of `-y`
-- **Path problems**: Restart terminal after Node.js installation
+- **Path problems**: Restart terminal after Node.js installation, or set `GEMINI_CLI_PATH` to the full path of `gemini.cmd`
- **Connection issues**: Ensure Windows Defender isn't blocking Node.js
+- **"Command not found"**: The MCP server may not inherit your shell's PATH. Set `GEMINI_CLI_PATH` in your config `env` block.
### macOS
- **Permission issues**: Use `sudo` if npm install fails
diff --git a/docs/usage/commands.md b/docs/usage/commands.md
index f06e0c0..9414a86 100644
--- a/docs/usage/commands.md
+++ b/docs/usage/commands.md
@@ -1,52 +1,67 @@
# Commands Reference
-Complete list of available commands and their usage.
+Complete list of available tools and their arguments.
-## Slash Commands
+## Tools
-### `/gemini-cli:analyze`
-Analyze files or ask questions about code.
+### `ask-gemini`
-```
-/gemini-cli:analyze @file.js explain this code
-/gemini-cli:analyze @src/*.ts find security issues
-/gemini-cli:analyze how do I implement authentication?
-```
+The primary tool — send a prompt to Gemini and get a response.
-### `/gemini-cli:sandbox`
-Execute code in a safe environment.
+| Argument | Type | Default | Description |
+|----------|------|---------|-------------|
+| `prompt` | string | *(required)* | Your analysis request. Use `@` to include files |
+| `model` | string | `gemini-2.5-pro` | Model to use (e.g. `gemini-2.5-flash`) |
+| `sandbox` | boolean | `false` | Run in isolated sandbox (`-s` flag) |
+| `changeMode` | boolean | `false` | Structured edit mode for Claude to apply |
+| `approvalMode` | string | *(unset)* | `default` / `auto_edit` / `yolo` / `plan` |
+| `sessionId` | string | — | Start/tag a conversation session |
+| `resume` | string | — | Resume a prior session by id, or `"latest"` |
+| `chunkIndex` | number | — | Which chunk to return (1-based, for changeMode) |
+| `chunkCacheKey` | string | — | Cache key for continuation (changeMode) |
```
-/gemini-cli:sandbox create a Python fibonacci generator
-/gemini-cli:sandbox test this function: [code]
+/gemini-cli:ask-gemini @file.js explain this code
+/gemini-cli:ask-gemini @src/*.ts find security issues
```
-### `/gemini-cli:help`
-Show help information and available tools.
+### `brainstorm`
+
+Structured ideation with selectable methodology frameworks.
+
+| Argument | Type | Default | Description |
+|----------|------|---------|-------------|
+| `prompt` | string | *(required)* | Brainstorming challenge or question |
+| `model` | string | `gemini-2.5-pro` | Model to use |
+| `approvalMode` | string | *(unset)* | Gemini approval mode |
+| `methodology` | string | `auto` | `divergent` / `convergent` / `scamper` / `design-thinking` / `lateral` / `auto` |
+| `domain` | string | — | Domain context (e.g. `software`, `business`) |
+| `constraints` | string | — | Known limitations or boundaries |
+| `existingContext` | string | — | Background info to build upon |
+| `ideaCount` | number | `12` | Target number of ideas |
+| `includeAnalysis` | boolean | `true` | Include feasibility/impact scoring |
```
-/gemini-cli:help
-/gemini-cli:help analyze
+/gemini-cli:brainstorm how can we improve our onboarding flow?
```
-### `/gemini-cli:ping`
-Test connectivity with Gemini.
+### `Help`
+
+Show Gemini CLI help information.
```
-/gemini-cli:ping
-/gemini-cli:ping "Custom message"
+/gemini-cli:Help
```
-## Command Structure
+### `ping`
+
+Test connectivity with an echo.
```
-/gemini-cli:
[options]
+/gemini-cli:ping
+/gemini-cli:ping "Custom message"
```
-- **tool**: The action to perform (analyze, sandbox, help, ping)
-- **options**: Optional flags (coming soon)
-- **arguments**: Input text, files, or questions
-
## Natural Language Alternative
Instead of slash commands, you can use natural language:
@@ -54,6 +69,7 @@ Instead of slash commands, you can use natural language:
- "Use gemini to analyze index.js"
- "Ask gemini to create a test file"
- "Have gemini explain this error"
+- "Brainstorm ideas for the new feature using gemini"
## File Patterns
@@ -61,7 +77,6 @@ Instead of slash commands, you can use natural language:
```
@README.md
@src/index.js
-@test/unit.test.ts
```
### Multiple Files
@@ -82,21 +97,34 @@ Instead of slash commands, you can use natural language:
@test/unit/ # All files in test/unit
```
+::: danger Security
+`@file` references are restricted to the project directory. Paths like `@../secret.txt`, `@~/.ssh/id_rsa`, or `@/etc/passwd` are rejected (CVE-2026-0755).
+:::
+
## Advanced Usage
-### Combining Files and Questions
+### Approval Mode
+
+Control Gemini's autonomy per-call:
```
-/gemini-cli:analyze @package.json @src/index.js is the entry point configured correctly?
+ask gemini with approvalMode "plan" to review the architecture
+ask gemini with approvalMode "yolo" and sandbox to run this test suite
```
-### Complex Queries
+### Multi-turn Sessions
+
+Continue a conversation across multiple calls:
```
-/gemini-cli:analyze @src/**/*.js @test/**/*.test.js what's the test coverage?
+ask gemini with sessionId "review-1" to review the auth module
+ask gemini with resume "review-1" to now suggest improvements
+ask gemini with resume "latest" to continue where we left off
```
-### Code Generation
+### Change Mode
+
+Get structured edit suggestions that Claude can apply directly:
```
-/gemini-cli:analyze @models/user.js generate TypeScript types for this model
+ask gemini in changeMode to refactor @src/utils.js for readability
```
## Tips
diff --git a/scripts/run-tests.mjs b/scripts/run-tests.mjs
index 5ba7268..d1a978f 100644
--- a/scripts/run-tests.mjs
+++ b/scripts/run-tests.mjs
@@ -25,9 +25,15 @@ if (tests.length === 0) {
process.exit(0);
}
+// tsx is loaded via `--import` on Node >= 20.6, and the older `--loader` flag
+// below that (the engines floor is >=18, where `--import` may be unavailable).
+const [major, minor] = process.versions.node.split(".").map(Number);
+const supportsImport = major > 20 || (major === 20 && minor >= 6);
+const loaderArgs = supportsImport ? ["--import", "tsx"] : ["--loader", "tsx"];
+
const result = spawnSync(
process.execPath,
- ["--import", "tsx", "--test", ...tests],
+ [...loaderArgs, "--test", ...tests],
{ stdio: "inherit" },
);
process.exit(result.status ?? 1);
diff --git a/src/tools/ask-gemini.tool.ts b/src/tools/ask-gemini.tool.ts
index bfdc917..09db340 100644
--- a/src/tools/ask-gemini.tool.ts
+++ b/src/tools/ask-gemini.tool.ts
@@ -61,9 +61,13 @@ export const askGeminiTool: UnifiedTool = {
prompt as string
);
}
- // Surface the active session id so the caller can resume the conversation.
- const activeSession = (resume as string | undefined) || (sessionId as string | undefined);
- const sessionNote = activeSession ? `\n\n[session: ${activeSession}]` : '';
+ // Echo back the session id the caller supplied so follow-up calls can continue
+ // it. This is the requested id, not one parsed from the CLI; 'latest' is a
+ // resume selector (not an id), so it is not surfaced.
+ const requestedSession =
+ (typeof resume === 'string' && resume !== 'latest' ? resume : undefined) ||
+ (sessionId as string | undefined);
+ const sessionNote = requestedSession ? `\n\n[session: ${requestedSession}]` : '';
return `${STATUS_MESSAGES.GEMINI_RESPONSE}\n${result}${sessionNote}`; // changeMode false
}
};
\ No newline at end of file
diff --git a/src/utils/commandExecutor.ts b/src/utils/commandExecutor.ts
index f31e42f..0fe6e6f 100644
--- a/src/utils/commandExecutor.ts
+++ b/src/utils/commandExecutor.ts
@@ -36,8 +36,11 @@ export function resolveCommandForExecution(command: string): string {
});
const candidates = out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
const byExt = (ext: string) => candidates.find((c) => c.toLowerCase().endsWith(ext));
+ // Prefer extensions cmd.exe can launch directly (.cmd/.exe/.bat). A `.ps1`
+ // shim is NOT runnable via shell:true, so it is never preferred — only the
+ // raw first candidate is used as a last resort.
resolved =
- byExt(".cmd") || byExt(".ps1") || byExt(".bat") || byExt(".exe") ||
+ byExt(".cmd") || byExt(".exe") || byExt(".bat") ||
candidates[0] || `${command}.cmd`;
} catch {
resolved = `${command}.cmd`;
@@ -106,6 +109,12 @@ export async function executeCommand(
});
if (stdinData !== undefined && childProcess.stdin) {
+ // If the child has already exited/closed its stdin, write() emits EPIPE on
+ // the stream; without this listener that becomes an uncaught exception and
+ // crashes the (long-lived) MCP server.
+ childProcess.stdin.on("error", (err) => {
+ Logger.error(`stdin write failed: ${err instanceof Error ? err.message : String(err)}`);
+ });
childProcess.stdin.write(stdinData);
childProcess.stdin.end();
}
@@ -130,11 +139,19 @@ export async function executeCommand(
if (isResolved) return;
isResolved = true;
Logger.error(`Command timed out after ${timeoutMs}ms; terminating: ${command}`);
- try { childProcess.kill("SIGTERM"); } catch { /* already gone */ }
- const sigkill = setTimeout(() => {
- try { childProcess.kill("SIGKILL"); } catch { /* already gone */ }
- }, 2000);
- sigkill.unref?.();
+ if (isWindows && childProcess.pid) {
+ // With shell:true the child is cmd.exe; kill() would orphan the real
+ // gemini/agy process. taskkill /T terminates the whole process tree.
+ try {
+ execSync(`taskkill /pid ${childProcess.pid} /T /F`, { stdio: "ignore" });
+ } catch { /* already gone */ }
+ } else {
+ try { childProcess.kill("SIGTERM"); } catch { /* already gone */ }
+ const sigkill = setTimeout(() => {
+ try { childProcess.kill("SIGKILL"); } catch { /* already gone */ }
+ }, 2000);
+ sigkill.unref?.();
+ }
reject(new Error(`Command timed out after ${timeoutMs}ms: ${command}`));
}, timeoutMs);
timeoutHandle.unref?.();
From dc55a5700214faa33c0bc13d4dcaff5891641c84 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 14:21:05 -0700
Subject: [PATCH 3/8] docs: make version badge dynamic in theme layout
---
docs/.vitepress/theme/Layout.vue | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/docs/.vitepress/theme/Layout.vue b/docs/.vitepress/theme/Layout.vue
index 7185c30..0e09b6a 100644
--- a/docs/.vitepress/theme/Layout.vue
+++ b/docs/.vitepress/theme/Layout.vue
@@ -6,7 +6,7 @@
- 🏷️ 1.1.4
+ 🏷️ {{ version }}
@@ -36,6 +36,9 @@ import FundingHero from './components/FundingHero.vue'
import FundingEffects from './components/FundingEffects.vue'
import FundingLayout from './FundingLayout.vue'
+// Import version dynamically from package.json
+import { version } from '../../../package.json'
+
const { Layout } = DefaultTheme
const route = useRoute()
const { frontmatter } = useData()
From 1ffecba22739f457684829138b83dca3c7d7d5bb Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 14:22:04 -0700
Subject: [PATCH 4/8] docs: document version badge dynamic update in CHANGELOG
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6d7ae52..f56e6e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,7 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
### Changed
- `engines.node` raised to `>=18`.
-- The server version is now read from `package.json` at runtime, instead of a hardcoded string that had drifted to `1.1.4`.
+- The server version and the documentation navbar badge are now read from `package.json` dynamically, instead of hardcoded strings that had drifted to `1.1.4`.
- Installing from a Git checkout now builds automatically via a `prepare` script.
### Fixed
From 5e522456a38eab237cee479384a4c574283de368 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sat, 30 May 2026 18:41:30 -0700
Subject: [PATCH 5/8] feat: configurable default model (GEMINI_MODEL) + setup
doctor; document 1.2.0 env vars
Closes #49.
- GEMINI_MODEL sets the default model when a call doesn't pass one, so the
assistant can't silently fall back to an older model (#49; also helps #51).
GEMINI_FLASH_MODEL overrides the quota-fallback target. Precedence:
per-call model arg > GEMINI_MODEL > Gemini CLI default.
- New setup doctor (scripts/doctor.mjs, 'npm run doctor', and a
'gemini-mcp-doctor' bin): reports the active backend, detected gemini/agy
installs (path + version), and the effective model/approval/timeout/env
configuration with actionable hints. Self-contained, zero-dependency.
- Docs: new README 'Environment Variables' + 'Setup Doctor' sections; document
GEMINI_MODEL/GEMINI_FLASH_MODEL and the doctor across docs/concepts/
configuration.md, docs/concepts/models.md and docs/api.md; CHANGELOG entries.
---
CHANGELOG.md | 2 +
README.md | 37 ++++++
docs/api.md | 2 +
docs/concepts/configuration.md | 40 +++++++
docs/concepts/models.md | 8 +-
package.json | 5 +-
scripts/doctor.mjs | 199 +++++++++++++++++++++++++++++++++
src/backends/gemini.test.ts | 19 +++-
src/backends/gemini.ts | 27 ++++-
src/constants.ts | 2 +
10 files changed, 331 insertions(+), 10 deletions(-)
create mode 100755 scripts/doctor.mjs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f56e6e2..ec8eacf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
- **Pluggable backends** — the executor is now backend-agnostic. The Gemini CLI stays the default; set `GEMINI_MCP_BACKEND=agy` to use the **experimental** Antigravity CLI (`agy`) backend, ahead of Gemini CLI's 2026-06-18 retirement for free/Pro/Ultra tiers. (agy print-mode is Flash-only, and its reply is recovered from agy's transcript files to work around the upstream `agy -p` empty-stdout bug.)
- **Per-command timeout** — a hung CLI call is now terminated (SIGTERM → SIGKILL). Configurable via `GEMINI_MCP_TIMEOUT_MS` (default 30 minutes; `0` disables).
- **Windows executable resolution** — honours `GEMINI_CLI_PATH`, otherwise resolves the real `gemini` shim via `where` (preferring `.cmd`), fixing "command not found" when the MCP server doesn't inherit your shell's PATH.
+- **Configurable default model** — `GEMINI_MODEL` sets the model used when a call doesn't pass one, so the assistant can't silently fall back to an older model (#49); `GEMINI_FLASH_MODEL` overrides the quota-fallback target. Precedence: per-call `model` arg → `GEMINI_MODEL` → Gemini CLI default.
+- **Setup doctor** — `npm run doctor` / the `gemini-mcp-doctor` bin reports the active backend, detected `gemini`/`agy` installs (path + version), and the effective model/approval/timeout/env configuration, with actionable hints.
- **Test suite** — `node:test` coverage for the `@file` security guard, Windows quoting/resolution, approval-mode and session argument building, backend selection, and timeout parsing (`npm test`).
### Changed
diff --git a/README.md b/README.md
index 715ef16..e2ba67d 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,43 @@ If you installed globally, use this configuration instead:
After updating the configuration, restart your terminal session.
+### Environment Variables (1.2.0)
+
+All optional — set them in your MCP client's `env` block. See the [Configuration docs](docs/concepts/configuration.md) for full detail.
+
+| Variable | Default | Purpose |
+|----------|---------|---------|
+| `GEMINI_MODEL` | *(CLI default)* | Default model when a call doesn't specify one (e.g. `gemini-3-pro-preview`) |
+| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | `default` / `auto_edit` / `yolo` / `plan` → forwarded to `gemini --approval-mode` |
+| `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` (experimental) |
+| `GEMINI_MCP_TIMEOUT_MS` | `1800000` | Per-call timeout in ms; `0` disables |
+| `GEMINI_CLI_PATH` | *(auto)* | Full path to the `gemini` executable (Windows PATH issues) |
+| `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the automatic quota fallback |
+
+Example — pin a default model so the assistant can't fall back to an older one ([#49](https://github.com/jamubc/gemini-mcp-tool/issues/49)):
+
+```json
+{
+ "mcpServers": {
+ "gemini-cli": {
+ "command": "npx",
+ "args": ["-y", "gemini-mcp-tool"],
+ "env": { "GEMINI_MODEL": "gemini-3-pro-preview" }
+ }
+ }
+}
+```
+
+### Setup Doctor
+
+Not sure what's installed or how it's configured? Run the doctor to see the active backend, the detected `gemini` / `agy` versions and paths, and your effective model / approval / timeout settings:
+
+```bash
+npx -p gemini-mcp-tool gemini-mcp-doctor
+# or, from a clone of this repo:
+npm run doctor
+```
+
## Example Workflow
- **Natural language**: "use gemini to explain index.html", "understand the massive project using gemini", "ask gemini to search for latest news"
diff --git a/docs/api.md b/docs/api.md
index f9341f1..bb9137d 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -65,6 +65,8 @@ Returns `gemini --help` output.
| Variable | Default | Description |
|----------|---------|-------------|
+| `GEMINI_MODEL` | *(CLI default)* | Default model when a call omits `model` |
+| `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the quota fallback |
| `GEMINI_MCP_BACKEND` | `gemini` | Backend: `gemini` or `agy` (experimental) |
| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
| `GEMINI_MCP_TIMEOUT_MS` | `1800000` | Per-call timeout in ms; `0` disables |
diff --git a/docs/concepts/configuration.md b/docs/concepts/configuration.md
index 220729f..6a3c77a 100644
--- a/docs/concepts/configuration.md
+++ b/docs/concepts/configuration.md
@@ -6,6 +6,8 @@ All configuration is done via environment variables in your MCP client config. N
| Variable | Default | Description |
|----------|---------|-------------|
+| `GEMINI_MODEL` | *(CLI default)* | Default model when a call doesn't pass one |
+| `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the automatic quota fallback |
| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
| `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` |
| `GEMINI_MCP_TIMEOUT_MS` | `1800000` (30 min) | Per-call timeout; `0` disables |
@@ -36,6 +38,26 @@ claude mcp add gemini-cli -e GEMINI_MCP_APPROVAL_MODE=plan -- npx -y gemini-mcp-
---
+## Default Model
+
+By default the model is chosen per request (natural language or the `model` argument); if none is given, the Gemini CLI uses its own default. Set `GEMINI_MODEL` to pin a default so the assistant can't fall back to an older model ([issue #49](https://github.com/jamubc/gemini-mcp-tool/issues/49)):
+
+```json
+{
+ "env": {
+ "GEMINI_MODEL": "gemini-3-pro-preview"
+ }
+}
+```
+
+**Precedence:** per-call `model` argument → `GEMINI_MODEL` → Gemini CLI default. `GEMINI_FLASH_MODEL` overrides the model used for the automatic quota fallback (default `gemini-2.5-flash`).
+
+::: info
+The `agy` backend ignores model selection — its print mode is hardcoded to Gemini 3.5 Flash.
+:::
+
+---
+
## Approval Mode
Controls how much autonomy Gemini has when processing a request. Maps directly to `gemini --approval-mode`.
@@ -167,3 +189,21 @@ If you get "command not found" errors on Windows, set `GEMINI_CLI_PATH` to the f
}
}
```
+
+---
+
+## Diagnostics: the setup doctor
+
+Run the bundled doctor to see exactly what the tool will do on your machine — the active backend, the detected `gemini` / `agy` versions and paths, your effective model/approval/timeout configuration, and any problems:
+
+```bash
+npx -p gemini-mcp-tool gemini-mcp-doctor
+# or, from a clone of the repo:
+npm run doctor
+```
+
+It exits non-zero if the active backend's CLI can't be found, which makes it handy in setup scripts.
+
+::: info
+The doctor reads the environment of the shell you run it in. Your MCP client sets its own `env` for the server process, so values there may differ from what the doctor prints.
+:::
diff --git a/docs/concepts/models.md b/docs/concepts/models.md
index bcc2828..d9e8071 100644
--- a/docs/concepts/models.md
+++ b/docs/concepts/models.md
@@ -23,18 +23,22 @@ You can also append with '-m' or ask specifically with
```
### In Configuration
+
+Set `GEMINI_MODEL` to choose a default model for **every** call. This is the fix for the assistant occasionally falling back to an older model ([issue #49](https://github.com/jamubc/gemini-mcp-tool/issues/49)) — pin it once in your MCP config:
+
```json
{
"mcpServers": {
"gemini-cli": {
"command": "npx",
- "args": ["-y", "gemini-mcp-tool"]
+ "args": ["-y", "gemini-mcp-tool"],
+ "env": { "GEMINI_MODEL": "gemini-3-pro-preview" }
}
}
}
```
-The model is selected per-request via natural language or the `model` tool argument.
+**Precedence:** a per-request `model` argument overrides `GEMINI_MODEL`, which overrides the Gemini CLI's own default. So you can pin a default here and still say "use flash" for a one-off.
### Per Request
```
diff --git a/package.json b/package.json
index 6e5aa3c..eb5143b 100644
--- a/package.json
+++ b/package.json
@@ -5,13 +5,15 @@
"type": "module",
"main": "dist/index.js",
"bin": {
- "gemini-mcp": "dist/index.js"
+ "gemini-mcp": "dist/index.js",
+ "gemini-mcp-doctor": "scripts/doctor.mjs"
},
"scripts": {
"build": "tsc -p tsconfig.build.json",
"prepare": "npm run build",
"start": "node dist/index.js",
"dev": "tsc && node dist/index.js",
+ "doctor": "node scripts/doctor.mjs",
"test": "node scripts/run-tests.mjs",
"lint": "tsc --noEmit",
"contribute": "tsx src/contribute.ts",
@@ -43,6 +45,7 @@
},
"files": [
"dist/",
+ "scripts/doctor.mjs",
"README.md",
"LICENSE"
],
diff --git a/scripts/doctor.mjs b/scripts/doctor.mjs
new file mode 100755
index 0000000..91a9e53
--- /dev/null
+++ b/scripts/doctor.mjs
@@ -0,0 +1,199 @@
+#!/usr/bin/env node
+// gemini-mcp-tool setup doctor.
+//
+// Reports what the tool will actually do on this machine: which CLI backend is
+// active, whether the gemini / agy executables are installed (path + version),
+// the effective model configuration, and every related environment variable.
+//
+// Self-contained: pure Node, no build step or dependencies. The constant names
+// below mirror src/constants.ts — keep them in sync.
+
+import { spawnSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+const ENV = {
+ BACKEND: "GEMINI_MCP_BACKEND",
+ APPROVAL_MODE: "GEMINI_MCP_APPROVAL_MODE",
+ TIMEOUT_MS: "GEMINI_MCP_TIMEOUT_MS",
+ GEMINI_CLI_PATH: "GEMINI_CLI_PATH",
+ MODEL: "GEMINI_MODEL",
+ FLASH_MODEL: "GEMINI_FLASH_MODEL",
+};
+const DEFAULT_FLASH_MODEL = "gemini-2.5-flash";
+const DEFAULT_TIMEOUT_MS = 30 * 60 * 1000;
+const APPROVAL_MODES = ["default", "auto_edit", "yolo", "plan"];
+
+const isWindows = process.platform === "win32";
+const useColor = process.stdout.isTTY && !process.env.NO_COLOR;
+const paint = (code, s) => (useColor ? `\x1b[${code}m${s}\x1b[0m` : s);
+const c = {
+ bold: (s) => paint("1", s),
+ dim: (s) => paint("2", s),
+ green: (s) => paint("32", s),
+ yellow: (s) => paint("33", s),
+ red: (s) => paint("31", s),
+ cyan: (s) => paint("36", s),
+};
+const OK = c.green("✓");
+const WARN = c.yellow("⚠");
+const BAD = c.red("✗");
+
+const problems = [];
+
+function runCmd(cmd, args) {
+ try {
+ const r = spawnSync(cmd, args, {
+ encoding: "utf8",
+ timeout: 20000,
+ shell: isWindows, // .cmd shims on Windows need a shell
+ windowsHide: true,
+ });
+ if (r.error) return { ok: false, err: r.error.message };
+ return {
+ ok: r.status === 0,
+ status: r.status,
+ out: (r.stdout || "").trim(),
+ err: (r.stderr || "").trim(),
+ };
+ } catch (e) {
+ return { ok: false, err: e instanceof Error ? e.message : String(e) };
+ }
+}
+
+function locate(cmd) {
+ const r = runCmd(isWindows ? "where" : "which", [cmd]);
+ if (!r.ok || !r.out) return [];
+ return r.out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
+}
+
+function detectCli(cmd, { honourEnvPath = false } = {}) {
+ const override = honourEnvPath ? (process.env[ENV.GEMINI_CLI_PATH] || "").trim() : "";
+ let candidates = locate(cmd);
+ if (override) candidates = [override, ...candidates.filter((p) => p !== override)];
+ const primary = override || candidates[0] || null;
+ const found = candidates.length > 0 || (override && existsSync(override));
+
+ let version = null;
+ if (found) {
+ const v = runCmd(cmd, ["--version"]);
+ if (v.ok && v.out) version = v.out.split(/\r?\n/)[0].trim();
+ }
+ const ext = primary ? path.extname(primary).toLowerCase() : "";
+ return { found: !!found, primary, candidates, override: override || null, version, ext };
+}
+
+function envLine(key, { fallback = c.dim("(unset)"), mask = false } = {}) {
+ const raw = process.env[key];
+ if (raw === undefined || raw === "") return `${key} = ${fallback}`;
+ return `${key} = ${c.cyan(mask ? "********" : raw)}`;
+}
+
+function humanizeMs(ms) {
+ if (ms === 0) return "disabled (waits forever)";
+ if (ms % 60000 === 0) return `${ms / 60000} min`;
+ if (ms % 1000 === 0) return `${ms / 1000} s`;
+ return `${ms} ms`;
+}
+
+function heading(title) {
+ console.log("\n" + c.bold(title));
+ console.log(c.dim("─".repeat(Math.max(title.length, 16))));
+}
+
+// ── System ───────────────────────────────────────────────────────────────────
+heading("System");
+console.log(` node ${process.version}`);
+console.log(` platform ${process.platform} (${process.arch})`);
+
+// ── Backend selection ──────────────────────────────────────────────────────--
+const rawBackend = (process.env[ENV.BACKEND] || "gemini").trim().toLowerCase();
+const backend = rawBackend === "agy" || rawBackend === "antigravity" ? "agy" : "gemini";
+heading("Active backend");
+console.log(` ${ENV.BACKEND} = ${process.env[ENV.BACKEND] ? c.cyan(process.env[ENV.BACKEND]) : c.dim("(unset → gemini)")}`);
+console.log(` → using: ${c.bold(backend)}${backend === "agy" ? c.yellow(" (experimental)") : ""}`);
+if (process.env[ENV.BACKEND] && backend === "gemini" && rawBackend !== "gemini") {
+ console.log(` ${WARN} unrecognised value ${JSON.stringify(process.env[ENV.BACKEND])} — defaulting to gemini`);
+}
+
+// ── Gemini CLI ─────────────────────────────────────────────────────────────--
+heading("Gemini CLI");
+const gemini = detectCli("gemini", { honourEnvPath: true });
+if (gemini.found) {
+ console.log(` ${OK} found${gemini.override ? " (via " + ENV.GEMINI_CLI_PATH + ")" : ""}`);
+ console.log(` path ${gemini.primary}${gemini.ext ? c.dim(" [" + gemini.ext + "]") : ""}`);
+ console.log(` version ${gemini.version ? c.cyan(gemini.version) : c.yellow("(could not read --version)")}`);
+ if (gemini.candidates.length > 1) {
+ console.log(c.dim(` also on PATH: ${gemini.candidates.slice(1).join(", ")}`));
+ }
+} else {
+ console.log(` ${BAD} not found on PATH`);
+ if (backend === "gemini") {
+ problems.push(
+ `Gemini CLI not found. Install it (npm i -g @google/gemini-cli) or set ${ENV.GEMINI_CLI_PATH} to its full path.`,
+ );
+ }
+}
+
+// ── Antigravity CLI (agy) ─────────────────────────────────────────────────--
+heading("Antigravity CLI (agy)");
+const agy = detectCli("agy");
+const agyDataDir = path.join(os.homedir(), ".gemini", "antigravity-cli");
+if (agy.found) {
+ console.log(` ${OK} found`);
+ console.log(` path ${agy.primary}`);
+ console.log(` version ${agy.version ? c.cyan(agy.version) : c.yellow("(could not read --version)")}`);
+ console.log(` data dir ${existsSync(agyDataDir) ? OK + " " + agyDataDir : WARN + " missing (run `agy -i` once to authenticate)"}`);
+} else {
+ console.log(` ${c.dim("not installed")} ${c.dim("— optional; the future backend once Gemini CLI retires 2026-06-18")}`);
+ if (backend === "agy") {
+ problems.push("GEMINI_MCP_BACKEND=agy but the agy executable was not found on PATH.");
+ }
+}
+
+// ── Model configuration ───────────────────────────────────────────────────--
+heading("Model configuration");
+const defaultModel = (process.env[ENV.MODEL] || "").trim();
+const flashModel = (process.env[ENV.FLASH_MODEL] || "").trim() || DEFAULT_FLASH_MODEL;
+console.log(` default model ${defaultModel ? c.cyan(defaultModel) + c.dim(" (GEMINI_MODEL)") : c.dim("(Gemini CLI's own default; pass model: or set GEMINI_MODEL)")}`);
+console.log(` flash fallback ${c.cyan(flashModel)}${process.env[ENV.FLASH_MODEL] ? c.dim(" (GEMINI_FLASH_MODEL)") : c.dim(" (default)")}`);
+if (backend === "agy") {
+ console.log(` ${WARN} agy print-mode ignores model selection (hardcoded to Gemini 3.5 Flash)`);
+}
+
+// ── Approval & timeout ─────────────────────────────────────────────────────--
+heading("Behaviour");
+const approval = (process.env[ENV.APPROVAL_MODE] || "").trim();
+if (!approval) {
+ console.log(` approval mode ${c.dim("(unset → no flag; plain Q&A)")}`);
+} else if (APPROVAL_MODES.includes(approval)) {
+ console.log(` approval mode ${c.cyan(approval)}`);
+ if (approval === "plan") console.log(` ${WARN} 'plan' makes Gemini an autonomous planner in headless mode — not ideal for plain Q&A`);
+} else {
+ console.log(` approval mode ${c.yellow(approval)} ${WARN} not one of ${APPROVAL_MODES.join("/")} — will be ignored`);
+}
+const rawTimeout = (process.env[ENV.TIMEOUT_MS] || "").trim();
+let timeoutMs = DEFAULT_TIMEOUT_MS;
+if (rawTimeout) {
+ const n = Number(rawTimeout);
+ timeoutMs = Number.isFinite(n) && n > 0 ? n : 0;
+}
+console.log(` timeout ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? c.dim(" (GEMINI_MCP_TIMEOUT_MS)") : c.dim(" (default)")}`);
+
+// ── Environment variables ──────────────────────────────────────────────────--
+heading("Environment variables (this shell)");
+for (const key of Object.values(ENV)) console.log(" " + envLine(key));
+console.log(c.dim("\n Note: your MCP client sets its own env for the server process — these are"));
+console.log(c.dim(" the values in the shell running this doctor, which may differ."));
+
+// ── Summary ────────────────────────────────────────────────────────────────--
+heading("Summary");
+if (problems.length === 0) {
+ console.log(` ${OK} ${c.green("No problems detected.")} Active backend '${backend}' looks ready.`);
+} else {
+ console.log(` ${BAD} ${c.red(`${problems.length} issue(s) found:`)}`);
+ for (const p of problems) console.log(` - ${p}`);
+}
+console.log("");
+process.exit(problems.length === 0 ? 0 : 1);
diff --git a/src/backends/gemini.test.ts b/src/backends/gemini.test.ts
index 75749a2..1a068f0 100644
--- a/src/backends/gemini.test.ts
+++ b/src/backends/gemini.test.ts
@@ -1,6 +1,6 @@
import { test } from "node:test";
import assert from "node:assert/strict";
-import { resolveApprovalMode, buildGeminiArgs } from "./gemini.js";
+import { resolveApprovalMode, buildGeminiArgs, resolveModel } from "./gemini.js";
const ENV_KEY = "GEMINI_MCP_APPROVAL_MODE";
@@ -47,6 +47,23 @@ test("buildGeminiArgs forces no approval mode by default", () => {
});
});
+test("resolveModel: arg > GEMINI_MODEL env > undefined", () => {
+ const prev = process.env.GEMINI_MODEL;
+ delete process.env.GEMINI_MODEL;
+ try {
+ assert.equal(resolveModel(), undefined);
+ assert.equal(resolveModel("gemini-2.5-flash"), "gemini-2.5-flash");
+ process.env.GEMINI_MODEL = "gemini-3-pro-preview";
+ assert.equal(resolveModel(), "gemini-3-pro-preview");
+ assert.equal(resolveModel("gemini-2.5-flash"), "gemini-2.5-flash"); // explicit arg wins
+ process.env.GEMINI_MODEL = " ";
+ assert.equal(resolveModel(), undefined); // blank env ignored
+ } finally {
+ if (prev === undefined) delete process.env.GEMINI_MODEL;
+ else process.env.GEMINI_MODEL = prev;
+ }
+});
+
test("buildGeminiArgs adds the approval flag only when requested; resume beats sessionId", () => {
withEnv(undefined, () => {
assert.deepEqual(buildGeminiArgs(undefined, { approvalMode: "yolo" }), [
diff --git a/src/backends/gemini.ts b/src/backends/gemini.ts
index cc7ea9c..86e8fb8 100644
--- a/src/backends/gemini.ts
+++ b/src/backends/gemini.ts
@@ -26,6 +26,20 @@ export function resolveApprovalMode(arg?: string): ApprovalMode | undefined {
return VALID_APPROVAL_MODES.includes(candidate) ? (candidate as ApprovalMode) : undefined;
}
+/**
+ * Resolve the model to use: explicit per-call arg > GEMINI_MODEL env > undefined
+ * (let the Gemini CLI pick its own default). The env default lets users pin a
+ * model in their MCP config so Claude can't fall back to an older one (issue #49).
+ */
+export function resolveModel(argModel?: string): string | undefined {
+ return argModel || process.env[ENV.MODEL]?.trim() || undefined;
+}
+
+/** The model the quota fallback retries on (GEMINI_FLASH_MODEL or the default). */
+export function resolveFlashModel(): string {
+ return process.env[ENV.FLASH_MODEL]?.trim() || MODELS.FLASH;
+}
+
/** Build the Gemini CLI argv (minus the prompt, which may go on stdin). */
export function buildGeminiArgs(
model: string | undefined,
@@ -61,23 +75,24 @@ export const geminiBackend: Backend = {
name: "gemini",
supportsModelSelection: true,
async run(prompt, opts) {
- const model = opts.model;
+ const model = resolveModel(opts.model);
+ const flashModel = resolveFlashModel();
try {
return await runOnce(prompt, model, opts);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
// gemini-2.5-pro quota exhausted → retry once on flash (unless already flash).
- if (message.includes(ERROR_MESSAGES.QUOTA_EXCEEDED) && model !== MODELS.FLASH) {
- Logger.warn(`${ERROR_MESSAGES.QUOTA_EXCEEDED}. Falling back to ${MODELS.FLASH}.`);
+ if (message.includes(ERROR_MESSAGES.QUOTA_EXCEEDED) && model !== flashModel) {
+ Logger.warn(`${ERROR_MESSAGES.QUOTA_EXCEEDED}. Falling back to ${flashModel}.`);
try {
- const result = await runOnce(prompt, MODELS.FLASH, opts);
- Logger.warn(`Successfully executed with ${MODELS.FLASH} fallback.`);
+ const result = await runOnce(prompt, flashModel, opts);
+ Logger.warn(`Successfully executed with ${flashModel} fallback.`);
return result;
} catch (fallbackError) {
const fe =
fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
throw new Error(
- `${MODELS.PRO} quota exceeded, ${MODELS.FLASH} fallback also failed: ${fe}`,
+ `${MODELS.PRO} quota exceeded, ${flashModel} fallback also failed: ${fe}`,
);
}
}
diff --git a/src/constants.ts b/src/constants.ts
index 087ea0f..80f94b1 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -101,6 +101,8 @@ export const ENV = {
APPROVAL_MODE: "GEMINI_MCP_APPROVAL_MODE", // overridden per-call by the approvalMode arg
GEMINI_CLI_PATH: "GEMINI_CLI_PATH", // explicit path to the gemini executable (Windows shim resolution)
TIMEOUT_MS: "GEMINI_MCP_TIMEOUT_MS", // per-call command timeout in milliseconds
+ MODEL: "GEMINI_MODEL", // default model when a call doesn't pass one (issue #49)
+ FLASH_MODEL: "GEMINI_FLASH_MODEL", // overrides the quota-fallback model (default gemini-2.5-flash)
} as const;
From 72b9ac80a8d66254dc7601dcf0e52cf687aaa8a9 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sun, 31 May 2026 03:12:44 -0700
Subject: [PATCH 6/8] fix: 1.1.6-parity defaults + keep setup doctor internal
Ensure v1.2.0 behaves identically to v1.1.6 when no env vars are set:
make the per-call timeout strictly opt-in (GEMINI_MCP_TIMEOUT_MS), so it
is disabled by default instead of imposing a 30-minute cap. resolveTimeoutMs
returns 0 when unset/blank/invalid; a positive value enables it.
Keep the setup doctor as a private development/diagnostic tool: drop the
gemini-mcp-doctor bin and the scripts/doctor.mjs files entry so it ships
with the repo but not the npm package, and remove the public README/docs
sections. Still runnable via `npm run doctor`.
Document both in the CHANGELOG.
---
CHANGELOG.md | 9 ++++++---
README.md | 12 +-----------
docs/concepts/configuration.md | 30 ++++++------------------------
package.json | 4 +---
scripts/doctor.mjs | 18 +++++++++++-------
src/utils/commandExecutor.ts | 5 +++--
src/utils/timeoutManager.test.ts | 14 +++++++++-----
src/utils/timeoutManager.ts | 22 +++++++++++++---------
8 files changed, 50 insertions(+), 64 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ec8eacf..f669510 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,16 +1,15 @@
# Changelog
## [1.2.0] - 2026-05-30
-First feature release after the 1.1.6 security patch. Hardens cross-platform execution, adds an opt-in safety control and native multi-turn sessions, makes the CLI backend pluggable (ahead of Gemini CLI's retirement), and adds a real test suite.
+First feature release after the 1.1.6 security patch. Hardens cross-platform execution, adds an opt-in safety control and native multi-turn sessions, makes the CLI backend pluggable (ahead of Gemini CLI's retirement), and adds a real test suite. **With no environment variables set, behaviour is identical to 1.1.6** — every new knob (backend, model, approval mode, timeout, executable path) is off/unset by default and only changes behaviour when you opt in.
### Added
- **Approval mode** — optional `approvalMode` argument on `ask-gemini`/`brainstorm` (and `GEMINI_MCP_APPROVAL_MODE` env), forwarding Gemini's `--approval-mode` (`default` / `auto_edit` / `yolo` / `plan`). Opt-in: when unset, behaviour is unchanged. Use `yolo` / `auto_edit` with `sandbox` to let Gemini run or edit; `plan` runs Gemini as an autonomous read-only planner.
- **Native multi-turn sessions** — `sessionId` and `resume` arguments forward Gemini's `--session-id` / `--resume`; the active session id is surfaced in the response so a follow-up call can continue the conversation. Builds on #50; uses the CLI's own sessions rather than local transcript storage.
- **Pluggable backends** — the executor is now backend-agnostic. The Gemini CLI stays the default; set `GEMINI_MCP_BACKEND=agy` to use the **experimental** Antigravity CLI (`agy`) backend, ahead of Gemini CLI's 2026-06-18 retirement for free/Pro/Ultra tiers. (agy print-mode is Flash-only, and its reply is recovered from agy's transcript files to work around the upstream `agy -p` empty-stdout bug.)
-- **Per-command timeout** — a hung CLI call is now terminated (SIGTERM → SIGKILL). Configurable via `GEMINI_MCP_TIMEOUT_MS` (default 30 minutes; `0` disables).
+- **Per-command timeout (opt-in)** — set `GEMINI_MCP_TIMEOUT_MS` to a positive number of milliseconds to terminate a hung CLI call (SIGTERM → SIGKILL). **Disabled by default** to match 1.1.6, which waited indefinitely; unset or `0` keeps that behaviour.
- **Windows executable resolution** — honours `GEMINI_CLI_PATH`, otherwise resolves the real `gemini` shim via `where` (preferring `.cmd`), fixing "command not found" when the MCP server doesn't inherit your shell's PATH.
- **Configurable default model** — `GEMINI_MODEL` sets the model used when a call doesn't pass one, so the assistant can't silently fall back to an older model (#49); `GEMINI_FLASH_MODEL` overrides the quota-fallback target. Precedence: per-call `model` arg → `GEMINI_MODEL` → Gemini CLI default.
-- **Setup doctor** — `npm run doctor` / the `gemini-mcp-doctor` bin reports the active backend, detected `gemini`/`agy` installs (path + version), and the effective model/approval/timeout/env configuration, with actionable hints.
- **Test suite** — `node:test` coverage for the `@file` security guard, Windows quoting/resolution, approval-mode and session argument building, backend selection, and timeout parsing (`npm test`).
### Changed
@@ -23,6 +22,10 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
- Clearer, platform-aware guidance when the executable is not found (ENOENT), including the `GEMINI_CLI_PATH` hint.
- Windows robustness: complex prompts (`changeMode` / `@file`) are sent to the Gemini CLI on **stdin** instead of the `-p` flag, sidestepping cmd.exe argument parsing and the OS command-line length limit; added `windowsHide` to suppress the popup console window. (#27, #77)
+### Internal
+- **Per-call timeout default flipped to off** — `GEMINI_MCP_TIMEOUT_MS` now defaults to disabled (waits forever, exactly like 1.1.6) instead of 30 minutes; the timeout is strictly opt-in. `resolveTimeoutMs` returns `0` when unset/blank.
+- **Setup doctor kept as an unpublished dev tool** — `scripts/doctor.mjs` (run via `npm run doctor`) prints the live system state relevant to the MCP server — active backend, detected `gemini`/`agy` installs (path + version), effective model/approval/timeout config, and every related env var — for debugging and at-a-glance awareness. Intentionally removed from the npm `bin` and published `files`, so it ships with the repo but **not** the package; may be released publicly later.
+
## [1.1.6] - 2026-05-30
_Emergency security patch — the CVE-2026-0755 fix only, ahead of this 1.2.0 release._
- Security fix: OS command-injection / `@file` exfiltration via prompt quoting in `geminiExecutor.ts` (CVE-2026-0755, CWE-78). Fixes #73 (and the literal-quote corruption in #66).
diff --git a/README.md b/README.md
index e2ba67d..3ecb31a 100644
--- a/README.md
+++ b/README.md
@@ -114,7 +114,7 @@ All optional — set them in your MCP client's `env` block. See the [Configurati
| `GEMINI_MODEL` | *(CLI default)* | Default model when a call doesn't specify one (e.g. `gemini-3-pro-preview`) |
| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | `default` / `auto_edit` / `yolo` / `plan` → forwarded to `gemini --approval-mode` |
| `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` (experimental) |
-| `GEMINI_MCP_TIMEOUT_MS` | `1800000` | Per-call timeout in ms; `0` disables |
+| `GEMINI_MCP_TIMEOUT_MS` | *(disabled)* | Opt-in per-call timeout in ms; unset/`0` waits forever (e.g. `1800000` = 30 min) |
| `GEMINI_CLI_PATH` | *(auto)* | Full path to the `gemini` executable (Windows PATH issues) |
| `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the automatic quota fallback |
@@ -132,16 +132,6 @@ Example — pin a default model so the assistant can't fall back to an older one
}
```
-### Setup Doctor
-
-Not sure what's installed or how it's configured? Run the doctor to see the active backend, the detected `gemini` / `agy` versions and paths, and your effective model / approval / timeout settings:
-
-```bash
-npx -p gemini-mcp-tool gemini-mcp-doctor
-# or, from a clone of this repo:
-npm run doctor
-```
-
## Example Workflow
- **Natural language**: "use gemini to explain index.html", "understand the massive project using gemini", "ask gemini to search for latest news"
diff --git a/docs/concepts/configuration.md b/docs/concepts/configuration.md
index 6a3c77a..b5e76bc 100644
--- a/docs/concepts/configuration.md
+++ b/docs/concepts/configuration.md
@@ -10,7 +10,7 @@ All configuration is done via environment variables in your MCP client config. N
| `GEMINI_FLASH_MODEL` | `gemini-2.5-flash` | Model used for the automatic quota fallback |
| `GEMINI_MCP_APPROVAL_MODE` | *(unset)* | Default approval mode for all calls |
| `GEMINI_MCP_BACKEND` | `gemini` | CLI backend: `gemini` or `agy` |
-| `GEMINI_MCP_TIMEOUT_MS` | `1800000` (30 min) | Per-call timeout; `0` disables |
+| `GEMINI_MCP_TIMEOUT_MS` | *(disabled)* | Opt-in per-call timeout in ms; unset/`0` waits forever |
| `GEMINI_CLI_PATH` | *(auto-detect)* | Explicit path to the gemini executable |
### Setting Environment Variables
@@ -126,24 +126,24 @@ You don't need to do anything today. Gemini CLI still works for headless/automat
## Timeout
-A per-call timeout protects against hung CLI processes. If the timeout fires, the child is sent `SIGTERM`, then `SIGKILL` after 2 seconds.
+An **opt-in** per-call timeout can protect against hung CLI processes. It is **disabled by default** — exactly like 1.1.6, the server waits indefinitely for the CLI unless you set this. When enabled and the timeout fires, the child is sent `SIGTERM`, then `SIGKILL` after 2 seconds.
| Value | Behaviour |
|-------|-----------|
-| `1800000` (default) | 30-minute timeout |
-| Any positive number | Timeout in milliseconds |
+| *(unset, default)* | Disabled — wait forever (1.1.6 behaviour) |
| `0` | Disabled — wait forever |
+| Any positive number | Timeout in milliseconds |
```json
{
"env": {
- "GEMINI_MCP_TIMEOUT_MS": "600000"
+ "GEMINI_MCP_TIMEOUT_MS": "1800000"
}
}
```
::: tip
-Large codebase analyses can legitimately run for many minutes. The 30-minute default is deliberately generous — it exists to release genuinely hung processes, not to cap normal work.
+Large codebase analyses can legitimately run for many minutes, so there is no default cap. If you enable a timeout, make it generous (e.g. `1800000` = 30 minutes) — it should release genuinely hung processes, not cap normal work.
:::
---
@@ -189,21 +189,3 @@ If you get "command not found" errors on Windows, set `GEMINI_CLI_PATH` to the f
}
}
```
-
----
-
-## Diagnostics: the setup doctor
-
-Run the bundled doctor to see exactly what the tool will do on your machine — the active backend, the detected `gemini` / `agy` versions and paths, your effective model/approval/timeout configuration, and any problems:
-
-```bash
-npx -p gemini-mcp-tool gemini-mcp-doctor
-# or, from a clone of the repo:
-npm run doctor
-```
-
-It exits non-zero if the active backend's CLI can't be found, which makes it handy in setup scripts.
-
-::: info
-The doctor reads the environment of the shell you run it in. Your MCP client sets its own `env` for the server process, so values there may differ from what the doctor prints.
-:::
diff --git a/package.json b/package.json
index eb5143b..efa471a 100644
--- a/package.json
+++ b/package.json
@@ -5,8 +5,7 @@
"type": "module",
"main": "dist/index.js",
"bin": {
- "gemini-mcp": "dist/index.js",
- "gemini-mcp-doctor": "scripts/doctor.mjs"
+ "gemini-mcp": "dist/index.js"
},
"scripts": {
"build": "tsc -p tsconfig.build.json",
@@ -45,7 +44,6 @@
},
"files": [
"dist/",
- "scripts/doctor.mjs",
"README.md",
"LICENSE"
],
diff --git a/scripts/doctor.mjs b/scripts/doctor.mjs
index 91a9e53..8cb246a 100755
--- a/scripts/doctor.mjs
+++ b/scripts/doctor.mjs
@@ -1,9 +1,14 @@
#!/usr/bin/env node
-// gemini-mcp-tool setup doctor.
+// gemini-mcp-tool setup doctor — INTERNAL development / diagnostic tool.
//
-// Reports what the tool will actually do on this machine: which CLI backend is
-// active, whether the gemini / agy executables are installed (path + version),
-// the effective model configuration, and every related environment variable.
+// Not published: deliberately excluded from package.json "bin" and "files", so
+// it ships with the repo but NOT the npm package. Run it from a checkout with
+// `npm run doctor` (or `node scripts/doctor.mjs`). May be released publicly later.
+//
+// Reports the live state of the system as it pertains to the MCP server: which
+// CLI backend is active, whether the gemini / agy executables are installed
+// (path + version), the effective model / approval / timeout configuration, and
+// every related environment variable — for debugging and at-a-glance awareness.
//
// Self-contained: pure Node, no build step or dependencies. The constant names
// below mirror src/constants.ts — keep them in sync.
@@ -22,7 +27,6 @@ const ENV = {
FLASH_MODEL: "GEMINI_FLASH_MODEL",
};
const DEFAULT_FLASH_MODEL = "gemini-2.5-flash";
-const DEFAULT_TIMEOUT_MS = 30 * 60 * 1000;
const APPROVAL_MODES = ["default", "auto_edit", "yolo", "plan"];
const isWindows = process.platform === "win32";
@@ -174,12 +178,12 @@ if (!approval) {
console.log(` approval mode ${c.yellow(approval)} ${WARN} not one of ${APPROVAL_MODES.join("/")} — will be ignored`);
}
const rawTimeout = (process.env[ENV.TIMEOUT_MS] || "").trim();
-let timeoutMs = DEFAULT_TIMEOUT_MS;
+let timeoutMs = 0; // disabled by default (1.1.6 parity: waits forever)
if (rawTimeout) {
const n = Number(rawTimeout);
timeoutMs = Number.isFinite(n) && n > 0 ? n : 0;
}
-console.log(` timeout ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? c.dim(" (GEMINI_MCP_TIMEOUT_MS)") : c.dim(" (default)")}`);
+console.log(` timeout ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? c.dim(" (GEMINI_MCP_TIMEOUT_MS)") : c.dim(" (default: disabled)")}`);
// ── Environment variables ──────────────────────────────────────────────────--
heading("Environment variables (this shell)");
diff --git a/src/utils/commandExecutor.ts b/src/utils/commandExecutor.ts
index 0fe6e6f..25a6aa7 100644
--- a/src/utils/commandExecutor.ts
+++ b/src/utils/commandExecutor.ts
@@ -124,8 +124,9 @@ export async function executeCommand(
let isResolved = false;
let lastReportedLength = 0;
- // Release a genuinely hung child after the configured timeout (default 30m;
- // GEMINI_MCP_TIMEOUT_MS overrides, 0 disables). SIGTERM first, then SIGKILL.
+ // Optional safety timeout to release a genuinely hung child. Disabled by
+ // default (1.1.6 parity: wait forever); set GEMINI_MCP_TIMEOUT_MS > 0 to
+ // enable. When it fires: SIGTERM first, then SIGKILL.
const timeoutMs = resolveTimeoutMs();
let timeoutHandle: NodeJS.Timeout | undefined;
const clearTimer = () => {
diff --git a/src/utils/timeoutManager.test.ts b/src/utils/timeoutManager.test.ts
index f2f2f21..3f565f7 100644
--- a/src/utils/timeoutManager.test.ts
+++ b/src/utils/timeoutManager.test.ts
@@ -1,15 +1,19 @@
import { test } from "node:test";
import assert from "node:assert/strict";
-import { resolveTimeoutMs, DEFAULT_COMMAND_TIMEOUT_MS } from "./timeoutManager.js";
+import { resolveTimeoutMs, RECOMMENDED_TIMEOUT_MS } from "./timeoutManager.js";
-test("resolveTimeoutMs: default when unset or blank", () => {
- assert.equal(resolveTimeoutMs({}), DEFAULT_COMMAND_TIMEOUT_MS);
- assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "" }), DEFAULT_COMMAND_TIMEOUT_MS);
- assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: " " }), DEFAULT_COMMAND_TIMEOUT_MS);
+test("resolveTimeoutMs: disabled by default when unset or blank (1.1.6 parity)", () => {
+ assert.equal(resolveTimeoutMs({}), 0);
+ assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "" }), 0);
+ assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: " " }), 0);
});
test("resolveTimeoutMs: honours a positive override", () => {
assert.equal(resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: "5000" }), 5000);
+ assert.equal(
+ resolveTimeoutMs({ GEMINI_MCP_TIMEOUT_MS: String(RECOMMENDED_TIMEOUT_MS) }),
+ RECOMMENDED_TIMEOUT_MS,
+ );
});
test("resolveTimeoutMs: 0, negative, or invalid disables the timeout (returns 0)", () => {
diff --git a/src/utils/timeoutManager.ts b/src/utils/timeoutManager.ts
index 2764359..759ce00 100644
--- a/src/utils/timeoutManager.ts
+++ b/src/utils/timeoutManager.ts
@@ -1,19 +1,23 @@
import { ENV } from "../constants.js";
-// Default per-command timeout. Large-codebase analyses can legitimately run for
-// many minutes (see STATUS_MESSAGES), so this is deliberately generous — it
-// exists to release a genuinely hung child process, not to cap normal work.
-// Override with GEMINI_MCP_TIMEOUT_MS (milliseconds); set it to 0 to disable.
-export const DEFAULT_COMMAND_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
+// Suggested value if you choose to enable the safety timeout. This is NOT applied
+// automatically — see resolveTimeoutMs below. 30 minutes is deliberately generous:
+// large-codebase analyses can legitimately run for many minutes, so it exists to
+// release a genuinely hung child, not to cap normal work.
+export const RECOMMENDED_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
/**
- * Resolve the per-command timeout in milliseconds from the environment, falling
- * back to {@link DEFAULT_COMMAND_TIMEOUT_MS}. A value of 0 — or any negative /
- * non-numeric value — disables the timeout and returns 0.
+ * Resolve the per-command timeout in milliseconds from the environment.
+ *
+ * Parity with 1.1.6: there is NO timeout by default. The MCP server historically
+ * waited indefinitely for the child CLI, so when GEMINI_MCP_TIMEOUT_MS is unset or
+ * blank we return 0 (disabled) to preserve that behaviour exactly. The timeout is
+ * strictly opt-in: a positive value enables it; 0, negative, or non-numeric values
+ * also disable it (return 0).
*/
export function resolveTimeoutMs(env: NodeJS.ProcessEnv = process.env): number {
const raw = env[ENV.TIMEOUT_MS];
- if (raw === undefined || raw.trim() === "") return DEFAULT_COMMAND_TIMEOUT_MS;
+ if (raw === undefined || raw.trim() === "") return 0; // disabled (1.1.6 parity)
const parsed = Number(raw);
if (!Number.isFinite(parsed) || parsed <= 0) return 0; // disabled / invalid
return parsed;
From 7eb45a722cb82f7c80fec57dee8fca58662071b8 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sun, 31 May 2026 03:35:48 -0700
Subject: [PATCH 7/8] feat: .env config loading + doctor setup wizard (env file
& Claude Code)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Server: load recognised GEMINI_* keys from a .env at startup as global
per-install defaults (src/utils/envFile.ts), scoped to known keys and never
overriding env already set by the shell or the MCP client. No .env → no
change (1.1.6 parity). Documented in configuration.md.
Doctor report: distinguish GLOBAL settings (shell env or loaded .env —
shown in gold as "(set globally)") from PER-CLIENT values read out of each
Claude Code server's env block in ~/.claude.json.
Doctor: add `npm run doctor setup` — an interactive wizard that walks each
option (current value + recommended default; skip / set / pick-from-list,
curated model list per backend, model skipped for agy) and applies choices
to the repo .env and/or a chosen Claude Code server (backs up ~/.claude.json
first). Uses a queue-based line reader so prompts work with both a TTY and
piped input.
Tests: parseEnv unit coverage (23 passing).
---
CHANGELOG.md | 3 +
docs/concepts/configuration.md | 18 +
scripts/doctor.mjs | 671 +++++++++++++++++++++++++++------
src/index.ts | 18 +-
src/utils/envFile.test.ts | 33 ++
src/utils/envFile.ts | 75 ++++
6 files changed, 694 insertions(+), 124 deletions(-)
create mode 100644 src/utils/envFile.test.ts
create mode 100644 src/utils/envFile.ts
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f669510..3b0f6c5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
- **Per-command timeout (opt-in)** — set `GEMINI_MCP_TIMEOUT_MS` to a positive number of milliseconds to terminate a hung CLI call (SIGTERM → SIGKILL). **Disabled by default** to match 1.1.6, which waited indefinitely; unset or `0` keeps that behaviour.
- **Windows executable resolution** — honours `GEMINI_CLI_PATH`, otherwise resolves the real `gemini` shim via `where` (preferring `.cmd`), fixing "command not found" when the MCP server doesn't inherit your shell's PATH.
- **Configurable default model** — `GEMINI_MODEL` sets the model used when a call doesn't pass one, so the assistant can't silently fall back to an older model (#49); `GEMINI_FLASH_MODEL` overrides the quota-fallback target. Precedence: per-call `model` arg → `GEMINI_MODEL` → Gemini CLI default.
+- **`.env` support** — the server loads recognised `GEMINI_*` keys from a `.env` file (package root, then cwd) at startup as global per-install defaults. Opt-in: only known keys are read, an already-set shell export or MCP-client env always wins, and no `.env` means no change (1.1.6 parity).
- **Test suite** — `node:test` coverage for the `@file` security guard, Windows quoting/resolution, approval-mode and session argument building, backend selection, and timeout parsing (`npm test`).
### Changed
@@ -25,6 +26,8 @@ First feature release after the 1.1.6 security patch. Hardens cross-platform exe
### Internal
- **Per-call timeout default flipped to off** — `GEMINI_MCP_TIMEOUT_MS` now defaults to disabled (waits forever, exactly like 1.1.6) instead of 30 minutes; the timeout is strictly opt-in. `resolveTimeoutMs` returns `0` when unset/blank.
- **Setup doctor kept as an unpublished dev tool** — `scripts/doctor.mjs` (run via `npm run doctor`) prints the live system state relevant to the MCP server — active backend, detected `gemini`/`agy` installs (path + version), effective model/approval/timeout config, and every related env var — for debugging and at-a-glance awareness. Intentionally removed from the npm `bin` and published `files`, so it ships with the repo but **not** the package; may be released publicly later.
+ - Reports the source of each setting: a **global** value (this shell's env or the loaded `.env`, shown in gold as `(set globally)`) affects every client, vs a **per-client** value read from each Claude Code MCP server's `env` block in `~/.claude.json`.
+ - Adds `npm run doctor setup` — an interactive wizard that walks each option (showing the current value + recommended default; skip / set / pick-from-list, with a curated model list per backend and model selection skipped for `agy`) and applies the result to the `.env` file and/or a chosen Claude Code server (backing up `~/.claude.json` first).
## [1.1.6] - 2026-05-30
_Emergency security patch — the CVE-2026-0755 fix only, ahead of this 1.2.0 release._
diff --git a/docs/concepts/configuration.md b/docs/concepts/configuration.md
index b5e76bc..84fb407 100644
--- a/docs/concepts/configuration.md
+++ b/docs/concepts/configuration.md
@@ -38,6 +38,24 @@ claude mcp add gemini-cli -e GEMINI_MCP_APPROVAL_MODE=plan -- npx -y gemini-mcp-
---
+## `.env` file
+
+Instead of (or in addition to) per-client config, the server reads a `.env` file at startup as a **global default** for the install. On launch it looks for a `.env` next to the package, then in the working directory, and loads the recognised keys below.
+
+```bash
+# .env
+GEMINI_MODEL=gemini-2.5-pro
+GEMINI_MCP_TIMEOUT_MS=1800000
+```
+
+- Only the documented `GEMINI_*` keys are read — an unrelated `.env` can't inject other variables.
+- A value already set in the shell, or in a client's own `env` block, **overrides** the `.env` (the `.env` only fills the gaps).
+- No `.env` means no change in behaviour.
+
+**Precedence:** shell / client `env` → `.env` file → built-in default.
+
+---
+
## Default Model
By default the model is chosen per request (natural language or the `model` argument); if none is given, the Gemini CLI uses its own default. Set `GEMINI_MODEL` to pin a default so the assistant can't fall back to an older model ([issue #49](https://github.com/jamubc/gemini-mcp-tool/issues/49)):
diff --git a/scripts/doctor.mjs b/scripts/doctor.mjs
index 8cb246a..8840492 100755
--- a/scripts/doctor.mjs
+++ b/scripts/doctor.mjs
@@ -5,29 +5,39 @@
// it ships with the repo but NOT the npm package. Run it from a checkout with
// `npm run doctor` (or `node scripts/doctor.mjs`). May be released publicly later.
//
-// Reports the live state of the system as it pertains to the MCP server: which
-// CLI backend is active, whether the gemini / agy executables are installed
-// (path + version), the effective model / approval / timeout configuration, and
-// every related environment variable — for debugging and at-a-glance awareness.
+// npm run doctor → report the live system state for the MCP server
+// npm run doctor setup → interactive wizard to change configuration
+//
+// Reports which CLI backend is active, whether the gemini / agy executables are
+// installed (path + version), the effective model / approval / timeout config,
+// and where each setting comes from: a GLOBAL value (shell export or the loaded
+// .env — affects every client, shown in gold) vs a PER-CLIENT value set in a
+// client's MCP config (e.g. Claude Code). The `setup` wizard walks each option
+// and writes your choices to the .env file and/or a Claude Code server.
//
// Self-contained: pure Node, no build step or dependencies. The constant names
// below mirror src/constants.ts — keep them in sync.
import { spawnSync } from "node:child_process";
-import { existsSync } from "node:fs";
+import { existsSync, readFileSync, writeFileSync, copyFileSync, renameSync } from "node:fs";
import os from "node:os";
import path from "node:path";
+import { fileURLToPath } from "node:url";
+import readline from "node:readline/promises";
const ENV = {
BACKEND: "GEMINI_MCP_BACKEND",
+ MODEL: "GEMINI_MODEL",
+ FLASH_MODEL: "GEMINI_FLASH_MODEL",
APPROVAL_MODE: "GEMINI_MCP_APPROVAL_MODE",
TIMEOUT_MS: "GEMINI_MCP_TIMEOUT_MS",
GEMINI_CLI_PATH: "GEMINI_CLI_PATH",
- MODEL: "GEMINI_MODEL",
- FLASH_MODEL: "GEMINI_FLASH_MODEL",
};
+const KEYS = Object.values(ENV);
const DEFAULT_FLASH_MODEL = "gemini-2.5-flash";
const APPROVAL_MODES = ["default", "auto_edit", "yolo", "plan"];
+const GEMINI_MODELS = ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-3-pro-preview"];
+const FLASH_MODELS = ["gemini-2.5-flash", "gemini-2.5-flash-lite"];
const isWindows = process.platform === "win32";
const useColor = process.stdout.isTTY && !process.env.NO_COLOR;
@@ -39,46 +49,129 @@ const c = {
yellow: (s) => paint("33", s),
red: (s) => paint("31", s),
cyan: (s) => paint("36", s),
+ gold: (s) => paint("1;33", s), // bold yellow ≈ gold: marks GLOBAL settings
};
const OK = c.green("✓");
const WARN = c.yellow("⚠");
const BAD = c.red("✗");
const problems = [];
+const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
+
+// ── .env (global per-install config) ───────────────────────────────────────--
+function parseEnv(content) {
+ const out = {};
+ for (const rawLine of content.split(/\r?\n/)) {
+ const line = rawLine.trim();
+ if (!line || line.startsWith("#")) continue;
+ const eq = line.indexOf("=");
+ if (eq === -1) continue;
+ let key = line.slice(0, eq).trim();
+ if (key.startsWith("export ")) key = key.slice(7).trim();
+ let val = line.slice(eq + 1).trim();
+ if (val.length >= 2 && ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'")))) {
+ val = val.slice(1, -1);
+ }
+ out[key] = val;
+ }
+ return out;
+}
+const envFilePath = path.join(repoRoot, ".env");
+function readRepoEnv() {
+ if (!existsSync(envFilePath)) return {};
+ try {
+ return parseEnv(readFileSync(envFilePath, "utf8"));
+ } catch {
+ return {};
+ }
+}
+function writeRepoEnv(map) {
+ const header = [
+ "# gemini-mcp-tool configuration — written by `npm run doctor setup`.",
+ "# Loaded by the server at startup as GLOBAL defaults for this install.",
+ "# A shell export or an MCP client's own env block overrides anything here.",
+ "",
+ ];
+ const lines = [];
+ for (const key of KEYS) {
+ const v = map[key];
+ if (v === undefined || v === "") continue;
+ const needsQuote = /\s/.test(v) || v === "";
+ lines.push(`${key}=${needsQuote ? JSON.stringify(v) : v}`);
+ }
+ const content = header.join("\n") + lines.join("\n") + "\n";
+ const tmp = `${envFilePath}.tmp-${process.pid}`;
+ writeFileSync(tmp, content, "utf8");
+ renameSync(tmp, envFilePath);
+}
+
+// Capture the shell environment BEFORE loading .env, so we can tell a true
+// global export apart from a value that merely came from the .env file.
+const shellSnapshot = {};
+for (const key of KEYS) shellSnapshot[key] = process.env[key];
+
+// Reflect server behaviour: load recognised keys from .env without overriding
+// anything already exported in the shell.
+const repoEnv = readRepoEnv();
+for (const key of KEYS) {
+ if ((repoEnv[key] ?? "") !== "" && (process.env[key] ?? "") === "") process.env[key] = repoEnv[key];
+}
+
+// ── Claude Code config (per-client) ──────────────────────────────────────────
+const claudeConfigPath = path.join(os.homedir(), ".claude.json");
+function readClaudeConfig() {
+ if (!existsSync(claudeConfigPath)) return null;
+ try {
+ return JSON.parse(readFileSync(claudeConfigPath, "utf8"));
+ } catch (e) {
+ return null;
+ }
+}
+function looksLikeGemini(name, cfg) {
+ const blob = JSON.stringify(cfg || {});
+ return /gemini/i.test(name) || /gemini-mcp-tool|dist\/index\.js/.test(blob);
+}
+// Enumerate gemini MCP servers across user + project scopes (no health checks).
+function findGeminiServers(json) {
+ const servers = [];
+ if (json?.mcpServers) {
+ for (const [name, cfg] of Object.entries(json.mcpServers)) {
+ if (looksLikeGemini(name, cfg)) servers.push({ scope: "user", project: null, name, cfg });
+ }
+ }
+ if (json?.projects) {
+ for (const [project, pcfg] of Object.entries(json.projects)) {
+ const ms = pcfg?.mcpServers;
+ if (!ms) continue;
+ for (const [name, cfg] of Object.entries(ms)) {
+ if (looksLikeGemini(name, cfg)) servers.push({ scope: "local", project, name, cfg });
+ }
+ }
+ }
+ return servers;
+}
+// ── shell helpers (CLI detection) ──────────────────────────────────────────--
function runCmd(cmd, args) {
try {
- const r = spawnSync(cmd, args, {
- encoding: "utf8",
- timeout: 20000,
- shell: isWindows, // .cmd shims on Windows need a shell
- windowsHide: true,
- });
+ const r = spawnSync(cmd, args, { encoding: "utf8", timeout: 20000, shell: isWindows, windowsHide: true });
if (r.error) return { ok: false, err: r.error.message };
- return {
- ok: r.status === 0,
- status: r.status,
- out: (r.stdout || "").trim(),
- err: (r.stderr || "").trim(),
- };
+ return { ok: r.status === 0, status: r.status, out: (r.stdout || "").trim(), err: (r.stderr || "").trim() };
} catch (e) {
return { ok: false, err: e instanceof Error ? e.message : String(e) };
}
}
-
function locate(cmd) {
const r = runCmd(isWindows ? "where" : "which", [cmd]);
if (!r.ok || !r.out) return [];
return r.out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
}
-
function detectCli(cmd, { honourEnvPath = false } = {}) {
const override = honourEnvPath ? (process.env[ENV.GEMINI_CLI_PATH] || "").trim() : "";
let candidates = locate(cmd);
if (override) candidates = [override, ...candidates.filter((p) => p !== override)];
const primary = override || candidates[0] || null;
const found = candidates.length > 0 || (override && existsSync(override));
-
let version = null;
if (found) {
const v = runCmd(cmd, ["--version"]);
@@ -88,116 +181,458 @@ function detectCli(cmd, { honourEnvPath = false } = {}) {
return { found: !!found, primary, candidates, override: override || null, version, ext };
}
-function envLine(key, { fallback = c.dim("(unset)"), mask = false } = {}) {
- const raw = process.env[key];
- if (raw === undefined || raw === "") return `${key} = ${fallback}`;
- return `${key} = ${c.cyan(mask ? "********" : raw)}`;
-}
-
function humanizeMs(ms) {
if (ms === 0) return "disabled (waits forever)";
if (ms % 60000 === 0) return `${ms / 60000} min`;
if (ms % 1000 === 0) return `${ms / 1000} s`;
return `${ms} ms`;
}
-
function heading(title) {
console.log("\n" + c.bold(title));
console.log(c.dim("─".repeat(Math.max(title.length, 16))));
}
+// Where a globally-effective value came from (shell export vs .env), or null.
+function globalSourceLabel(key) {
+ if ((shellSnapshot[key] ?? "") !== "") return c.gold("(set globally)");
+ if ((repoEnv[key] ?? "") !== "") return c.gold("(from .env)");
+ return null;
+}
-// ── System ───────────────────────────────────────────────────────────────────
-heading("System");
-console.log(` node ${process.version}`);
-console.log(` platform ${process.platform} (${process.arch})`);
+function resolveBackend(val) {
+ const b = (val || "gemini").trim().toLowerCase();
+ return b === "agy" || b === "antigravity" ? "agy" : "gemini";
+}
-// ── Backend selection ──────────────────────────────────────────────────────--
-const rawBackend = (process.env[ENV.BACKEND] || "gemini").trim().toLowerCase();
-const backend = rawBackend === "agy" || rawBackend === "antigravity" ? "agy" : "gemini";
-heading("Active backend");
-console.log(` ${ENV.BACKEND} = ${process.env[ENV.BACKEND] ? c.cyan(process.env[ENV.BACKEND]) : c.dim("(unset → gemini)")}`);
-console.log(` → using: ${c.bold(backend)}${backend === "agy" ? c.yellow(" (experimental)") : ""}`);
-if (process.env[ENV.BACKEND] && backend === "gemini" && rawBackend !== "gemini") {
- console.log(` ${WARN} unrecognised value ${JSON.stringify(process.env[ENV.BACKEND])} — defaulting to gemini`);
+// Robust line reader. readline's rl.question can drop buffered lines and stall
+// when stdin is a pipe (not a TTY); this queues 'line' events so prompts work
+// for both interactive use and scripted/piped input. EOF yields null.
+function createLineReader() {
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+ const buffered = [];
+ const waiters = [];
+ let closed = false;
+ rl.on("line", (line) => {
+ if (waiters.length) waiters.shift()(line);
+ else buffered.push(line);
+ });
+ rl.on("close", () => {
+ closed = true;
+ while (waiters.length) waiters.shift()(null);
+ });
+ return {
+ next() {
+ if (buffered.length) return Promise.resolve(buffered.shift());
+ if (closed) return Promise.resolve(null);
+ return new Promise((resolve) => waiters.push(resolve));
+ },
+ close() {
+ rl.close();
+ },
+ };
}
-// ── Gemini CLI ─────────────────────────────────────────────────────────────--
-heading("Gemini CLI");
-const gemini = detectCli("gemini", { honourEnvPath: true });
-if (gemini.found) {
- console.log(` ${OK} found${gemini.override ? " (via " + ENV.GEMINI_CLI_PATH + ")" : ""}`);
- console.log(` path ${gemini.primary}${gemini.ext ? c.dim(" [" + gemini.ext + "]") : ""}`);
- console.log(` version ${gemini.version ? c.cyan(gemini.version) : c.yellow("(could not read --version)")}`);
- if (gemini.candidates.length > 1) {
- console.log(c.dim(` also on PATH: ${gemini.candidates.slice(1).join(", ")}`));
+// ─────────────────────────────────────────────────────────────────────────────
+// REPORT
+// ─────────────────────────────────────────────────────────────────────────────
+function runReport() {
+ heading("System");
+ console.log(` node ${process.version}`);
+ console.log(` platform ${process.platform} (${process.arch})`);
+
+ const backend = resolveBackend(process.env[ENV.BACKEND]);
+ heading("Active backend");
+ const bSrc = globalSourceLabel(ENV.BACKEND);
+ console.log(` ${ENV.BACKEND} = ${process.env[ENV.BACKEND] ? c.cyan(process.env[ENV.BACKEND]) : c.dim("(unset → gemini)")}${bSrc ? " " + bSrc : ""}`);
+ console.log(` → using: ${c.bold(backend)}${backend === "agy" ? c.yellow(" (experimental)") : ""}`);
+
+ heading("Gemini CLI");
+ const gemini = detectCli("gemini", { honourEnvPath: true });
+ if (gemini.found) {
+ console.log(` ${OK} found${gemini.override ? " (via " + ENV.GEMINI_CLI_PATH + ")" : ""}`);
+ console.log(` path ${gemini.primary}${gemini.ext ? c.dim(" [" + gemini.ext + "]") : ""}`);
+ console.log(` version ${gemini.version ? c.cyan(gemini.version) : c.yellow("(could not read --version)")}`);
+ if (gemini.candidates.length > 1) console.log(c.dim(` also on PATH: ${gemini.candidates.slice(1).join(", ")}`));
+ } else {
+ console.log(` ${BAD} not found on PATH`);
+ if (backend === "gemini") problems.push(`Gemini CLI not found. Install it (npm i -g @google/gemini-cli) or set ${ENV.GEMINI_CLI_PATH} to its full path.`);
}
+
+ heading("Antigravity CLI (agy)");
+ const agy = detectCli("agy");
+ const agyDataDir = path.join(os.homedir(), ".gemini", "antigravity-cli");
+ if (agy.found) {
+ console.log(` ${OK} found`);
+ console.log(` path ${agy.primary}`);
+ console.log(` version ${agy.version ? c.cyan(agy.version) : c.yellow("(could not read --version)")}`);
+ console.log(` data dir ${existsSync(agyDataDir) ? OK + " " + agyDataDir : WARN + " missing (run `agy -i` once to authenticate)"}`);
+ } else {
+ console.log(` ${c.dim("not installed")} ${c.dim("— optional; the future backend once Gemini CLI retires 2026-06-18")}`);
+ if (backend === "agy") problems.push("GEMINI_MCP_BACKEND=agy but the agy executable was not found on PATH.");
+ }
+
+ heading("Model configuration");
+ const defaultModel = (process.env[ENV.MODEL] || "").trim();
+ const flashModel = (process.env[ENV.FLASH_MODEL] || "").trim() || DEFAULT_FLASH_MODEL;
+ const mSrc = globalSourceLabel(ENV.MODEL);
+ const fSrc = globalSourceLabel(ENV.FLASH_MODEL);
+ console.log(` default model ${defaultModel ? c.cyan(defaultModel) + (mSrc ? " " + mSrc : "") : c.dim("(Gemini CLI's own default; pass model: or set GEMINI_MODEL)")}`);
+ console.log(` flash fallback ${c.cyan(flashModel)}${fSrc ? " " + fSrc : c.dim(" (default)")}`);
+ if (backend === "agy") console.log(` ${WARN} agy print-mode ignores model selection (hardcoded to Gemini 3.5 Flash)`);
+
+ heading("Behaviour");
+ const approval = (process.env[ENV.APPROVAL_MODE] || "").trim();
+ const aSrc = globalSourceLabel(ENV.APPROVAL_MODE);
+ if (!approval) {
+ console.log(` approval mode ${c.dim("(unset → no flag; plain Q&A)")}`);
+ } else if (APPROVAL_MODES.includes(approval)) {
+ console.log(` approval mode ${c.cyan(approval)}${aSrc ? " " + aSrc : ""}`);
+ if (approval === "plan") console.log(` ${WARN} 'plan' makes Gemini an autonomous planner in headless mode — not ideal for plain Q&A`);
+ } else {
+ console.log(` approval mode ${c.yellow(approval)} ${WARN} not one of ${APPROVAL_MODES.join("/")} — will be ignored`);
+ }
+ const rawTimeout = (process.env[ENV.TIMEOUT_MS] || "").trim();
+ let timeoutMs = 0; // disabled by default (1.1.6 parity: waits forever)
+ if (rawTimeout) {
+ const n = Number(rawTimeout);
+ timeoutMs = Number.isFinite(n) && n > 0 ? n : 0;
+ }
+ const tSrc = globalSourceLabel(ENV.TIMEOUT_MS);
+ console.log(` timeout ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? (tSrc ? " " + tSrc : "") : c.dim(" (default: disabled)")}`);
+
+ // ── Configuration sources: global vs per-client ──────────────────────────
+ heading("Configuration sources");
+ const json = readClaudeConfig();
+ const servers = json ? findGeminiServers(json) : [];
+ for (const key of KEYS) {
+ const shellVal = shellSnapshot[key];
+ const fileVal = repoEnv[key];
+ let line;
+ if ((shellVal ?? "") !== "") line = `${c.gold("●")} ${key} = ${c.cyan(shellVal)} ${c.gold("(set globally)")}`;
+ else if ((fileVal ?? "") !== "") line = `${c.gold("●")} ${key} = ${c.cyan(fileVal)} ${c.gold("(global, from .env)")}`;
+ else line = `${c.dim("○")} ${key} = ${c.dim("(not set globally)")}`;
+ console.log(" " + line);
+ // Per-client values from Claude Code servers.
+ for (const s of servers) {
+ const v = s.cfg?.env?.[key];
+ if (v === undefined || v === "") continue;
+ const loc = s.scope === "user" ? "user" : `local:${path.basename(s.project || "")}`;
+ console.log(` ${c.dim("└ per-client")} ${s.name} ${c.dim("[" + loc + "]")} = ${c.cyan(v)}`);
+ }
+ }
+ console.log(c.dim(`\n ${c.gold("gold")} = global (this shell's env / the loaded .env) — affects every client.`));
+ console.log(c.dim(` per-client = set in a client's MCP config; that client uses its own value.`));
+ console.log(c.dim(` .env: ${existsSync(envFilePath) ? envFilePath : "(none — run `npm run doctor setup` to create one)"}`));
+ if (json === null) console.log(c.dim(` Claude Code config not read (${claudeConfigPath} missing or unparseable).`));
+
+ heading("Summary");
+ if (problems.length === 0) {
+ console.log(` ${OK} ${c.green("No problems detected.")} Active backend '${backend}' looks ready.`);
+ } else {
+ console.log(` ${BAD} ${c.red(`${problems.length} issue(s) found:`)}`);
+ for (const p of problems) console.log(` - ${p}`);
+ }
+ console.log(c.dim(`\n Tip: run \`npm run doctor setup\` to change configuration.`));
+ console.log("");
+ process.exit(problems.length === 0 ? 0 : 1);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// SETUP WIZARD
+// ─────────────────────────────────────────────────────────────────────────────
+async function runSetup() {
+ const reader = createLineReader();
+ const prompt = async (str) => {
+ process.stdout.write(str);
+ const line = await reader.next();
+ return line === null ? "" : line.trim();
+ };
+ const ask = async (q, def) => {
+ const v = await prompt(` ${q}${def ? ` [${def}]` : ""}: `);
+ return v || def || "";
+ };
+ const confirm = async (q, def = false) => {
+ const a = (await prompt(` ${q} ${def ? "[Y/n]" : "[y/N]"}: `)).toLowerCase();
+ if (a === "") return def;
+ return a === "y" || a === "yes";
+ };
+
+ // Present a menu. Returns { action: "set"|"unset"|"skip", value? }.
+ async function selectOption({ title, currentDisplay, recommendedDisplay, choices, allowCustom, customLabel, customPrompt, allowUnset, unsetLabel }) {
+ console.log("\n" + c.bold(title));
+ console.log(" " + c.dim(`current: ${currentDisplay} · recommended: ${recommendedDisplay}`));
+ choices.forEach((ch, i) => console.log(` ${i + 1}) ${ch.label}`));
+ if (allowCustom) console.log(` c) ${customLabel || "enter a custom value"}`);
+ if (allowUnset) console.log(` u) ${unsetLabel || "unset"}`);
+ console.log(` s) skip — keep current`);
+ const ans = (await prompt(` choose [s]: `)).toLowerCase();
+ if (ans === "" || ans === "s") return { action: "skip" };
+ if (ans === "u" && allowUnset) return { action: "unset" };
+ if (ans === "c" && allowCustom) {
+ const v = (await ask(customPrompt || "value")).trim();
+ return v ? { action: "set", value: v } : { action: "skip" };
+ }
+ const idx = Number(ans) - 1;
+ if (Number.isInteger(idx) && choices[idx]) return { action: "set", value: choices[idx].value };
+ console.log(c.yellow(" unrecognised — skipping"));
+ return { action: "skip" };
+ }
+
+ console.log(c.bold("\ngemini-mcp-tool · setup"));
+ console.log(c.dim("Walk each setting: pick a value, enter a custom one, unset it, or skip to keep current."));
+ console.log(c.dim("Nothing is written until you confirm at the end.\n"));
+
+ const effective = (key) => (process.env[key] || "").trim();
+ const changes = {}; // key -> { action, value? }
+
+ // 1) Backend
+ {
+ const cur = resolveBackend(process.env[ENV.BACKEND]);
+ const r = await selectOption({
+ title: "Backend",
+ currentDisplay: cur,
+ recommendedDisplay: "gemini",
+ choices: [
+ { label: "gemini — the Gemini CLI (default)", value: "gemini" },
+ { label: "agy — Antigravity CLI (experimental)", value: "agy" },
+ ],
+ });
+ if (r.action !== "skip") changes[ENV.BACKEND] = r;
+ }
+ const effBackend = resolveBackend(changes[ENV.BACKEND]?.value ?? process.env[ENV.BACKEND]);
+
+ // 2) Default model (skipped for agy — print-mode is Flash-only)
+ if (effBackend === "agy") {
+ console.log("\n" + c.bold("Default model"));
+ console.log(` ${WARN} ${c.dim("agy print-mode ignores model selection (Flash-only) — skipping.")}`);
+ } else {
+ const cur = effective(ENV.MODEL) || "(unset → Gemini CLI default)";
+ const r = await selectOption({
+ title: "Default model",
+ currentDisplay: cur,
+ recommendedDisplay: "(unset → Gemini CLI default)",
+ choices: GEMINI_MODELS.map((m) => ({ label: m, value: m })),
+ allowCustom: true,
+ customLabel: "enter a custom model id",
+ customPrompt: "model id",
+ allowUnset: true,
+ unsetLabel: "unset — let the Gemini CLI choose",
+ });
+ if (r.action !== "skip") changes[ENV.MODEL] = r;
+ }
+
+ // 3) Flash fallback model
+ {
+ const cur = effective(ENV.FLASH_MODEL) || `${DEFAULT_FLASH_MODEL} (default)`;
+ const r = await selectOption({
+ title: "Flash fallback model (used on quota fallback)",
+ currentDisplay: cur,
+ recommendedDisplay: DEFAULT_FLASH_MODEL,
+ choices: FLASH_MODELS.map((m) => ({ label: m, value: m })),
+ allowCustom: true,
+ customLabel: "enter a custom model id",
+ customPrompt: "model id",
+ allowUnset: true,
+ unsetLabel: `unset — use default (${DEFAULT_FLASH_MODEL})`,
+ });
+ if (r.action !== "skip") changes[ENV.FLASH_MODEL] = r;
+ }
+
+ // 4) Approval mode
+ {
+ const cur = effective(ENV.APPROVAL_MODE) || "(unset → no flag; plain Q&A)";
+ const r = await selectOption({
+ title: "Approval mode",
+ currentDisplay: cur,
+ recommendedDisplay: "(unset)",
+ choices: APPROVAL_MODES.map((m) => ({ label: m + (m === "plan" ? " — autonomous planner (not for plain Q&A)" : ""), value: m })),
+ allowUnset: true,
+ unsetLabel: "unset — no flag (recommended for plain Q&A)",
+ });
+ if (r.action !== "skip") changes[ENV.APPROVAL_MODE] = r;
+ }
+
+ // 5) Timeout
+ {
+ const raw = effective(ENV.TIMEOUT_MS);
+ const cur = raw ? `${raw} ms` : "disabled (waits forever)";
+ const r = await selectOption({
+ title: "Per-call timeout",
+ currentDisplay: cur,
+ recommendedDisplay: "disabled (matches 1.1.6)",
+ choices: [
+ { label: "disabled — wait forever (matches 1.1.6)", value: "__disable__" },
+ { label: "1800000 (30 minutes)", value: "1800000" },
+ { label: "600000 (10 minutes)", value: "600000" },
+ ],
+ allowCustom: true,
+ customLabel: "enter milliseconds",
+ customPrompt: "timeout in ms (positive integer)",
+ });
+ if (r.action === "set") {
+ if (r.value === "__disable__") changes[ENV.TIMEOUT_MS] = { action: "unset" };
+ else {
+ const n = Number(r.value);
+ if (Number.isFinite(n) && n > 0) changes[ENV.TIMEOUT_MS] = { action: "set", value: String(Math.trunc(n)) };
+ else console.log(c.yellow(" not a positive number — skipping timeout"));
+ }
+ }
+ }
+
+ // 6) Gemini executable path
+ {
+ const cur = effective(ENV.GEMINI_CLI_PATH) || "(auto-detect)";
+ const r = await selectOption({
+ title: "Gemini executable path (GEMINI_CLI_PATH)",
+ currentDisplay: cur,
+ recommendedDisplay: "(auto-detect)",
+ choices: [{ label: "set a full path to the gemini executable", value: "__custom__" }],
+ allowUnset: true,
+ unsetLabel: "unset — auto-detect from PATH",
+ });
+ if (r.action === "unset") changes[ENV.GEMINI_CLI_PATH] = { action: "unset" };
+ else if (r.action === "set") {
+ const v = (await ask("full path")).trim();
+ if (v) changes[ENV.GEMINI_CLI_PATH] = { action: "set", value: v };
+ }
+ }
+
+ // ── Review ────────────────────────────────────────────────────────────────
+ const changedKeys = Object.keys(changes);
+ heading("Review");
+ if (changedKeys.length === 0) {
+ console.log(c.dim(" No changes selected. Nothing to do."));
+ reader.close();
+ return;
+ }
+ for (const key of changedKeys) {
+ const ch = changes[key];
+ const before = effective(key) || c.dim("(unset)");
+ const after = ch.action === "unset" ? c.yellow("(unset)") : c.cyan(ch.value);
+ console.log(` ${key}: ${before} ${c.dim("→")} ${after}`);
+ }
+
+ // ── Apply target ────────────────────────────────────────────────────────--
+ const target = await selectOption({
+ title: "Where should these be applied?",
+ currentDisplay: "n/a",
+ recommendedDisplay: ".env (global default for this install)",
+ choices: [
+ { label: ".env file — global default loaded by the server", value: "env" },
+ { label: "Claude Code — a specific client server's env block", value: "claude" },
+ { label: "both", value: "both" },
+ ],
+ });
+ if (target.action !== "set") {
+ console.log(c.dim("\n Cancelled — nothing written."));
+ reader.close();
+ return;
+ }
+
+ if (target.value === "env" || target.value === "both") {
+ const map = { ...readRepoEnv() };
+ for (const key of changedKeys) {
+ if (changes[key].action === "unset") delete map[key];
+ else map[key] = changes[key].value;
+ }
+ writeRepoEnv(map);
+ console.log(`\n ${OK} wrote ${c.cyan(envFilePath)}`);
+ if (changedKeys.some((k) => (shellSnapshot[k] ?? "") !== "")) {
+ console.log(` ${WARN} ${c.dim("some keys are also exported in your shell — that export overrides .env.")}`);
+ }
+ }
+
+ if (target.value === "claude" || target.value === "both") {
+ await applyToClaudeCode(changes, changedKeys, { prompt, ask, confirm });
+ }
+
+ console.log("");
+ reader.close();
+}
+
+async function applyToClaudeCode(changes, changedKeys, { prompt, ask, confirm }) {
+ const json = readClaudeConfig();
+ if (!json) {
+ console.log(`\n ${BAD} Claude Code config not found/parseable at ${claudeConfigPath} — skipping Claude Code.`);
+ return;
+ }
+ const servers = findGeminiServers(json);
+ console.log("\n" + c.bold("Claude Code — pick a server to update"));
+ servers.forEach((s, i) => {
+ const loc = s.scope === "user" ? "user" : `local:${s.project}`;
+ console.log(` ${i + 1}) ${s.name} ${c.dim("[" + loc + "]")}`);
+ });
+ console.log(` n) enter a different name (create under this project if missing)`);
+ console.log(` s) skip Claude Code`);
+ const ans = (await prompt(` choose [s]: `)).toLowerCase();
+
+ let ref;
+ if (ans === "" || ans === "s") {
+ console.log(c.dim(" skipped Claude Code."));
+ return;
+ } else if (ans === "n") {
+ const name = (await ask("server name", "gemini-cli")).trim() || "gemini-cli";
+ // Search existing across scopes; else create under the current project (local).
+ const existing = servers.find((s) => s.name === name) ||
+ (json.mcpServers?.[name] && { scope: "user", project: null, name, cfg: json.mcpServers[name] }) ||
+ (json.projects?.[repoRoot]?.mcpServers?.[name] && { scope: "local", project: repoRoot, name, cfg: json.projects[repoRoot].mcpServers[name] });
+ if (existing) {
+ ref = existing;
+ } else {
+ console.log(c.dim(` '${name}' not found — will create it under project ${repoRoot} (local scope) using \`npx -y gemini-mcp-tool\`.`));
+ if (!(await confirm("create it?", true))) return;
+ json.projects = json.projects || {};
+ json.projects[repoRoot] = json.projects[repoRoot] || {};
+ json.projects[repoRoot].mcpServers = json.projects[repoRoot].mcpServers || {};
+ json.projects[repoRoot].mcpServers[name] = { type: "stdio", command: "npx", args: ["-y", "gemini-mcp-tool"], env: {} };
+ ref = { scope: "local", project: repoRoot, name, cfg: json.projects[repoRoot].mcpServers[name] };
+ }
+ } else {
+ const idx = Number(ans) - 1;
+ if (!Number.isInteger(idx) || !servers[idx]) {
+ console.log(c.yellow(" unrecognised — skipping Claude Code."));
+ return;
+ }
+ ref = servers[idx];
+ }
+
+ // Merge env into the chosen server.
+ const target = ref.scope === "user" ? json.mcpServers[ref.name] : json.projects[ref.project].mcpServers[ref.name];
+ target.env = target.env || {};
+ for (const key of changedKeys) {
+ if (changes[key].action === "unset") delete target.env[key];
+ else target.env[key] = changes[key].value;
+ }
+
+ const loc = ref.scope === "user" ? "user" : `local:${ref.project}`;
+ console.log(`\n Resulting env for ${c.cyan(ref.name)} ${c.dim("[" + loc + "]")}:`);
+ const entries = Object.entries(target.env);
+ if (entries.length === 0) console.log(c.dim(" (empty)"));
+ for (const [k, v] of entries) console.log(` ${k} = ${c.cyan(v)}`);
+ console.log(c.dim(` Editing ${claudeConfigPath} (a ${c.bold("backup")} will be written to .bak first).`));
+ if (!(await confirm("write this change?", true))) {
+ console.log(c.dim(" not written."));
+ return;
+ }
+
+ try {
+ copyFileSync(claudeConfigPath, claudeConfigPath + ".bak");
+ const tmp = `${claudeConfigPath}.tmp-${process.pid}`;
+ writeFileSync(tmp, JSON.stringify(json, null, 2) + "\n", "utf8");
+ renameSync(tmp, claudeConfigPath);
+ console.log(` ${OK} updated ${c.cyan(ref.name)} in ${claudeConfigPath} ${c.dim("(backup: " + claudeConfigPath + ".bak)")}`);
+ console.log(` ${WARN} ${c.dim("restart Claude Code to pick up the change (avoid editing while it's running).")}`);
+ } catch (e) {
+ console.log(` ${BAD} failed to write config: ${e instanceof Error ? e.message : String(e)}`);
+ }
+}
+
+// ── dispatch ────────────────────────────────────────────────────────────────
+const mode = (process.argv[2] || "").toLowerCase();
+if (mode === "setup") {
+ runSetup().catch((e) => {
+ console.error(e instanceof Error ? e.message : String(e));
+ process.exit(1);
+ });
} else {
- console.log(` ${BAD} not found on PATH`);
- if (backend === "gemini") {
- problems.push(
- `Gemini CLI not found. Install it (npm i -g @google/gemini-cli) or set ${ENV.GEMINI_CLI_PATH} to its full path.`,
- );
- }
-}
-
-// ── Antigravity CLI (agy) ─────────────────────────────────────────────────--
-heading("Antigravity CLI (agy)");
-const agy = detectCli("agy");
-const agyDataDir = path.join(os.homedir(), ".gemini", "antigravity-cli");
-if (agy.found) {
- console.log(` ${OK} found`);
- console.log(` path ${agy.primary}`);
- console.log(` version ${agy.version ? c.cyan(agy.version) : c.yellow("(could not read --version)")}`);
- console.log(` data dir ${existsSync(agyDataDir) ? OK + " " + agyDataDir : WARN + " missing (run `agy -i` once to authenticate)"}`);
-} else {
- console.log(` ${c.dim("not installed")} ${c.dim("— optional; the future backend once Gemini CLI retires 2026-06-18")}`);
- if (backend === "agy") {
- problems.push("GEMINI_MCP_BACKEND=agy but the agy executable was not found on PATH.");
- }
-}
-
-// ── Model configuration ───────────────────────────────────────────────────--
-heading("Model configuration");
-const defaultModel = (process.env[ENV.MODEL] || "").trim();
-const flashModel = (process.env[ENV.FLASH_MODEL] || "").trim() || DEFAULT_FLASH_MODEL;
-console.log(` default model ${defaultModel ? c.cyan(defaultModel) + c.dim(" (GEMINI_MODEL)") : c.dim("(Gemini CLI's own default; pass model: or set GEMINI_MODEL)")}`);
-console.log(` flash fallback ${c.cyan(flashModel)}${process.env[ENV.FLASH_MODEL] ? c.dim(" (GEMINI_FLASH_MODEL)") : c.dim(" (default)")}`);
-if (backend === "agy") {
- console.log(` ${WARN} agy print-mode ignores model selection (hardcoded to Gemini 3.5 Flash)`);
-}
-
-// ── Approval & timeout ─────────────────────────────────────────────────────--
-heading("Behaviour");
-const approval = (process.env[ENV.APPROVAL_MODE] || "").trim();
-if (!approval) {
- console.log(` approval mode ${c.dim("(unset → no flag; plain Q&A)")}`);
-} else if (APPROVAL_MODES.includes(approval)) {
- console.log(` approval mode ${c.cyan(approval)}`);
- if (approval === "plan") console.log(` ${WARN} 'plan' makes Gemini an autonomous planner in headless mode — not ideal for plain Q&A`);
-} else {
- console.log(` approval mode ${c.yellow(approval)} ${WARN} not one of ${APPROVAL_MODES.join("/")} — will be ignored`);
-}
-const rawTimeout = (process.env[ENV.TIMEOUT_MS] || "").trim();
-let timeoutMs = 0; // disabled by default (1.1.6 parity: waits forever)
-if (rawTimeout) {
- const n = Number(rawTimeout);
- timeoutMs = Number.isFinite(n) && n > 0 ? n : 0;
-}
-console.log(` timeout ${c.cyan(humanizeMs(timeoutMs))}${rawTimeout ? c.dim(" (GEMINI_MCP_TIMEOUT_MS)") : c.dim(" (default: disabled)")}`);
-
-// ── Environment variables ──────────────────────────────────────────────────--
-heading("Environment variables (this shell)");
-for (const key of Object.values(ENV)) console.log(" " + envLine(key));
-console.log(c.dim("\n Note: your MCP client sets its own env for the server process — these are"));
-console.log(c.dim(" the values in the shell running this doctor, which may differ."));
-
-// ── Summary ────────────────────────────────────────────────────────────────--
-heading("Summary");
-if (problems.length === 0) {
- console.log(` ${OK} ${c.green("No problems detected.")} Active backend '${backend}' looks ready.`);
-} else {
- console.log(` ${BAD} ${c.red(`${problems.length} issue(s) found:`)}`);
- for (const p of problems) console.log(` - ${p}`);
+ runReport();
}
-console.log("");
-process.exit(problems.length === 0 ? 0 : 1);
diff --git a/src/index.ts b/src/index.ts
index a1d10ee..32b17f4 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -18,16 +18,22 @@ import {
} from "@modelcontextprotocol/sdk/types.js";
import { readFileSync } from "node:fs";
import { Logger } from "./utils/logger.js";
+import { loadEnvFile } from "./utils/envFile.js";
import { PROTOCOL, ToolArguments } from "./constants.js";
-import {
- getToolDefinitions,
- getPromptDefinitions,
- executeTool,
- toolExists,
- getPromptMessage
+import {
+ getToolDefinitions,
+ getPromptDefinitions,
+ executeTool,
+ toolExists,
+ getPromptMessage
} from "./tools/index.js";
+// Load the optional .env (global per-install config from `npm run doctor setup`)
+// before anything reads process.env. No-op when no .env is present; never
+// overrides env already set by the shell or the MCP client.
+loadEnvFile();
+
// Read the version from package.json at runtime so it never drifts from the
// published version (it previously hardcoded an out-of-date "1.1.4").
const pkg = JSON.parse(
diff --git a/src/utils/envFile.test.ts b/src/utils/envFile.test.ts
new file mode 100644
index 0000000..03d6e68
--- /dev/null
+++ b/src/utils/envFile.test.ts
@@ -0,0 +1,33 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { parseEnv } from "./envFile.js";
+
+test("parseEnv: basic KEY=VALUE pairs", () => {
+ const r = parseEnv("GEMINI_MODEL=gemini-2.5-pro\nGEMINI_MCP_TIMEOUT_MS=1800000");
+ assert.equal(r.GEMINI_MODEL, "gemini-2.5-pro");
+ assert.equal(r.GEMINI_MCP_TIMEOUT_MS, "1800000");
+});
+
+test("parseEnv: skips blanks and # comments", () => {
+ const r = parseEnv("# a comment\n\n # indented comment\nGEMINI_MODEL=x\n");
+ assert.deepEqual(Object.keys(r), ["GEMINI_MODEL"]);
+ assert.equal(r.GEMINI_MODEL, "x");
+});
+
+test("parseEnv: strips one layer of matching quotes and honours `export`", () => {
+ const r = parseEnv(`export GEMINI_MODEL="gemini 2.5"\nGEMINI_CLI_PATH='/a/b c/gemini'`);
+ assert.equal(r.GEMINI_MODEL, "gemini 2.5");
+ assert.equal(r.GEMINI_CLI_PATH, "/a/b c/gemini");
+});
+
+test("parseEnv: keeps '=' inside values and trims surrounding whitespace", () => {
+ const r = parseEnv(" GEMINI_MODEL = a=b=c \nGEMINI_FLASH_MODEL= flash ");
+ assert.equal(r.GEMINI_MODEL, "a=b=c");
+ assert.equal(r.GEMINI_FLASH_MODEL, "flash");
+});
+
+test("parseEnv: ignores malformed lines without '='", () => {
+ const r = parseEnv("NOT_AN_ASSIGNMENT\nGEMINI_MODEL=ok");
+ assert.equal(r.NOT_AN_ASSIGNMENT, undefined);
+ assert.equal(r.GEMINI_MODEL, "ok");
+});
diff --git a/src/utils/envFile.ts b/src/utils/envFile.ts
new file mode 100644
index 0000000..ecf5535
--- /dev/null
+++ b/src/utils/envFile.ts
@@ -0,0 +1,75 @@
+import { readFileSync, existsSync } from "node:fs";
+import * as path from "node:path";
+import { fileURLToPath } from "node:url";
+import { ENV } from "../constants.js";
+import { Logger } from "./logger.js";
+
+// Only these recognised keys are imported from a .env — never arbitrary keys —
+// so an unrelated .env sitting in the launch directory can't inject variables
+// into the server process.
+const KNOWN_KEYS: ReadonlySet = new Set(Object.values(ENV));
+
+export function parseEnv(content: string): Record {
+ const out: Record = {};
+ for (const rawLine of content.split(/\r?\n/)) {
+ const line = rawLine.trim();
+ if (!line || line.startsWith("#")) continue;
+ const eq = line.indexOf("=");
+ if (eq === -1) continue;
+ let key = line.slice(0, eq).trim();
+ if (key.startsWith("export ")) key = key.slice(7).trim();
+ let val = line.slice(eq + 1).trim();
+ // Strip a single layer of matching surrounding quotes.
+ if (
+ val.length >= 2 &&
+ ((val.startsWith('"') && val.endsWith('"')) ||
+ (val.startsWith("'") && val.endsWith("'")))
+ ) {
+ val = val.slice(1, -1);
+ }
+ out[key] = val;
+ }
+ return out;
+}
+
+function candidatePaths(): string[] {
+ // dist/utils/envFile.js → package root is two levels up.
+ const here = path.dirname(fileURLToPath(import.meta.url));
+ const packageRoot = path.resolve(here, "..", "..");
+ const cwd = process.cwd();
+ const paths = [path.join(packageRoot, ".env")];
+ if (path.resolve(cwd) !== packageRoot) paths.push(path.join(cwd, ".env"));
+ return paths;
+}
+
+/**
+ * Load recognised GEMINI_* config keys from a `.env` file into `process.env`,
+ * WITHOUT overriding anything already set — a shell export or the MCP client's
+ * own `env` block always wins. Only the keys in {@link ENV} are imported.
+ *
+ * This is the "global" (per-install) config source written by `npm run doctor
+ * setup`. It is a no-op when no `.env` exists, so installs that don't use one
+ * behave exactly as before (1.1.6 parity).
+ */
+export function loadEnvFile(): void {
+ for (const p of candidatePaths()) {
+ if (!existsSync(p)) continue;
+ let parsed: Record;
+ try {
+ parsed = parseEnv(readFileSync(p, "utf8"));
+ } catch (e) {
+ Logger.warn(`Could not read ${p}: ${e instanceof Error ? e.message : String(e)}`);
+ continue;
+ }
+ let applied = 0;
+ for (const [k, v] of Object.entries(parsed)) {
+ if (!KNOWN_KEYS.has(k)) continue;
+ // Don't override an already-set value (shell/client env wins over .env).
+ if (process.env[k] !== undefined && process.env[k] !== "") continue;
+ process.env[k] = v;
+ applied++;
+ }
+ if (applied > 0) Logger.debug(`Loaded ${applied} setting(s) from ${p}`);
+ return; // first existing .env wins
+ }
+}
From 32b855fc087c7b6b72dd7dc7b17e007e19c09839 Mon Sep 17 00:00:00 2001
From: jamubc <150970140+jamubc@users.noreply.github.com>
Date: Sun, 31 May 2026 18:27:10 -0700
Subject: [PATCH 8/8] test: categorized suite (unit/integration/e2e) + CI
gating
Restructure into test/{unit,integration,e2e}: migrate the 7 colocated unit
tests; add changeMode pipeline / chunkCache / registry / brainstorm unit
coverage; hermetic integration tests (changeMode->fetch-chunk flow, registry
-> tool contract); and a live e2e suite driving the real gemini through the
MCP server (auto-skips without gemini). Category-aware runner, NODE_ENV=test
log muting, tsconfig.test.json typecheck, CI gating on Node 18/20/22.
Reference branch for the phased re-derivation off main.
---
.github/workflows/ci.yml | 21 ++-
package.json | 7 +-
scripts/run-tests.mjs | 65 ++++++-
src/tools/brainstorm.tool.ts | 4 +-
src/utils/logger.ts | 10 ++
test/README.md | 85 +++++++++
test/e2e/ask-gemini.e2e.test.ts | 75 ++++++++
test/e2e/fixtures/sentinel.txt | 3 +
test/e2e/harness.ts | 165 ++++++++++++++++++
test/e2e/server.e2e.test.ts | 62 +++++++
test/integration/changeMode-pipeline.test.ts | 77 ++++++++
test/integration/tool-contract.test.ts | 51 ++++++
{src => test/unit}/backends/agy.test.ts | 2 +-
{src => test/unit}/backends/gemini.test.ts | 2 +-
{src => test/unit}/backends/index.test.ts | 2 +-
test/unit/tools/brainstorm.test.ts | 61 +++++++
test/unit/tools/registry.test.ts | 63 +++++++
test/unit/utils/changeModeChunker.test.ts | 73 ++++++++
test/unit/utils/changeModeParser.test.ts | 89 ++++++++++
test/unit/utils/changeModeTranslator.test.ts | 72 ++++++++
test/unit/utils/chunkCache.test.ts | 96 ++++++++++
.../unit}/utils/commandExecutor.test.ts | 2 +-
{src => test/unit}/utils/envFile.test.ts | 2 +-
.../unit}/utils/geminiExecutor.test.ts | 2 +-
.../unit}/utils/timeoutManager.test.ts | 2 +-
tsconfig.test.json | 8 +
26 files changed, 1073 insertions(+), 28 deletions(-)
create mode 100644 test/README.md
create mode 100644 test/e2e/ask-gemini.e2e.test.ts
create mode 100644 test/e2e/fixtures/sentinel.txt
create mode 100644 test/e2e/harness.ts
create mode 100644 test/e2e/server.e2e.test.ts
create mode 100644 test/integration/changeMode-pipeline.test.ts
create mode 100644 test/integration/tool-contract.test.ts
rename {src => test/unit}/backends/agy.test.ts (92%)
rename {src => test/unit}/backends/gemini.test.ts (98%)
rename {src => test/unit}/backends/index.test.ts (92%)
create mode 100644 test/unit/tools/brainstorm.test.ts
create mode 100644 test/unit/tools/registry.test.ts
create mode 100644 test/unit/utils/changeModeChunker.test.ts
create mode 100644 test/unit/utils/changeModeParser.test.ts
create mode 100644 test/unit/utils/changeModeTranslator.test.ts
create mode 100644 test/unit/utils/chunkCache.test.ts
rename {src => test/unit}/utils/commandExecutor.test.ts (97%)
rename {src => test/unit}/utils/envFile.test.ts (95%)
rename {src => test/unit}/utils/geminiExecutor.test.ts (91%)
rename {src => test/unit}/utils/timeoutManager.test.ts (90%)
create mode 100644 tsconfig.test.json
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f865fbe..acfe29d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,22 +12,27 @@ jobs:
strategy:
matrix:
- node-version: [16.x, 18.x, 20.x]
-
+ # node:test (used by the suite) requires Node >= 18.
+ node-version: [18.x, 20.x, 22.x]
+
steps:
- uses: actions/checkout@v4
-
+
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
-
+
- name: Install dependencies
run: npm ci
-
+
- name: Build
run: npm run build
-
+
+ - name: Type-check (source + tests)
+ run: npm run lint
+
+ # Hermetic suite only (unit + integration). The e2e suite needs an
+ # authenticated gemini CLI and is run on demand via `npm run test:e2e`.
- name: Run tests
- run: npm test
- continue-on-error: true
\ No newline at end of file
+ run: npm test
\ No newline at end of file
diff --git a/package.json b/package.json
index efa471a..c124fbf 100644
--- a/package.json
+++ b/package.json
@@ -13,8 +13,11 @@
"start": "node dist/index.js",
"dev": "tsc && node dist/index.js",
"doctor": "node scripts/doctor.mjs",
- "test": "node scripts/run-tests.mjs",
- "lint": "tsc --noEmit",
+ "test": "node scripts/run-tests.mjs unit integration",
+ "test:unit": "node scripts/run-tests.mjs unit",
+ "test:integration": "node scripts/run-tests.mjs integration",
+ "test:e2e": "npm run build && node scripts/run-tests.mjs e2e",
+ "lint": "tsc -p tsconfig.test.json",
"contribute": "tsx src/contribute.ts",
"prepublishOnly": "echo '⚠️ Remember to test locally first!' && npm run build",
"docs:dev": "vitepress dev docs",
diff --git a/scripts/run-tests.mjs b/scripts/run-tests.mjs
index d1a978f..f671b6e 100644
--- a/scripts/run-tests.mjs
+++ b/scripts/run-tests.mjs
@@ -1,16 +1,47 @@
#!/usr/bin/env node
-// Discover and run every *.test.ts under src/ with the built-in node:test
-// runner, using the tsx loader so the TypeScript sources run directly.
+// Category-aware test runner. Discovers *.test.ts under the selected category
+// folders (test/unit, test/integration, test/e2e) and runs them with the
+// built-in node:test runner via the tsx loader, so the TypeScript sources run
+// directly.
+//
+// Usage:
+// node scripts/run-tests.mjs # default: unit + integration (hermetic)
+// node scripts/run-tests.mjs unit # one category
+// node scripts/run-tests.mjs integration e2e # several
+// node scripts/run-tests.mjs all # unit + integration + e2e
+//
+// Categories:
+// unit pure, single-module tests. No subprocess, no network, no real CLI.
+// integration several real modules wired together. Still hermetic — never the real gemini CLI.
+// e2e the real gemini CLI driven through the real MCP server over stdio. Opt-in (live).
import { spawnSync } from "node:child_process";
-import { readdirSync, statSync } from "node:fs";
+import { readdirSync, statSync, existsSync } from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
-const srcDir = path.join(scriptDir, "..", "src");
+const testDir = path.join(scriptDir, "..", "test");
+
+const KNOWN = ["unit", "integration", "e2e"];
+const DEFAULT = ["unit", "integration"]; // the hermetic suite `npm test` runs and CI gates on
+
+function resolveCategories(argv) {
+ const args = argv.slice(2).map((a) => a.toLowerCase());
+ if (args.length === 0) return DEFAULT;
+ if (args.includes("all")) return KNOWN;
+ const unknown = args.filter((a) => !KNOWN.includes(a));
+ if (unknown.length > 0) {
+ console.error(`Unknown test category: ${unknown.join(", ")}`);
+ console.error(`Valid categories: ${KNOWN.join(", ")}, all`);
+ process.exit(2);
+ }
+ // De-dupe while preserving the documented order.
+ return KNOWN.filter((c) => args.includes(c));
+}
function findTests(dir) {
const found = [];
+ if (!existsSync(dir)) return found;
for (const entry of readdirSync(dir)) {
const full = path.join(dir, entry);
if (statSync(full).isDirectory()) found.push(...findTests(full));
@@ -19,21 +50,37 @@ function findTests(dir) {
return found;
}
-const tests = findTests(srcDir);
+const categories = resolveCategories(process.argv);
+const tests = categories.flatMap((c) => findTests(path.join(testDir, c)));
+
if (tests.length === 0) {
- console.log("No test files found.");
+ console.log(`No test files found for: ${categories.join(", ")}`);
process.exit(0);
}
+console.log(`Running ${tests.length} test file(s) [${categories.join(", ")}]`);
+
// tsx is loaded via `--import` on Node >= 20.6, and the older `--loader` flag
-// below that (the engines floor is >=18, where `--import` may be unavailable).
+// below that.
const [major, minor] = process.versions.node.split(".").map(Number);
const supportsImport = major > 20 || (major === 20 && minor >= 6);
const loaderArgs = supportsImport ? ["--import", "tsx"] : ["--loader", "tsx"];
+// Mute routine [GMCPT] logging for the hermetic categories so the reporter
+// output stays readable. The e2e suite keeps full server logs (its child
+// server process inherits this env), which is useful for debugging live calls.
+const env = { ...process.env };
+if (!categories.includes("e2e")) env.NODE_ENV = "test";
+
+// Run test files serially (--test-concurrency=1). The changeMode chunk cache is
+// a single shared on-disk dir (os.tmpdir()/gemini-mcp-chunks); files that touch
+// it (chunkCache, changeMode-pipeline) would otherwise race across parallel
+// worker processes. Serial e2e also avoids hitting the gemini quota in parallel.
+// The hermetic suite is tiny, so the cost is negligible. (Flag available on the
+// Node 18.19+/20.10+/22 versions CI runs.)
const result = spawnSync(
process.execPath,
- [...loaderArgs, "--test", ...tests],
- { stdio: "inherit" },
+ [...loaderArgs, "--test", "--test-concurrency=1", ...tests],
+ { stdio: "inherit", env },
);
process.exit(result.status ?? 1);
diff --git a/src/tools/brainstorm.tool.ts b/src/tools/brainstorm.tool.ts
index e5680d9..fac8418 100644
--- a/src/tools/brainstorm.tool.ts
+++ b/src/tools/brainstorm.tool.ts
@@ -4,7 +4,7 @@ import { Logger } from '../utils/logger.js';
import { executeGeminiCLI } from '../utils/geminiExecutor.js';
import { type ApprovalMode } from '../constants.js';
-function buildBrainstormPrompt(config: {
+export function buildBrainstormPrompt(config: {
prompt: string;
methodology: string;
domain?: string;
@@ -67,7 +67,7 @@ Begin brainstorming session:`;
/**
* Returns methodology-specific instructions for structured brainstorming
*/
-function getMethodologyInstructions(methodology: string, domain?: string): string {
+export function getMethodologyInstructions(methodology: string, domain?: string): string {
const methodologies: Record = {
'divergent': `**Divergent Thinking Approach:**
- Generate maximum quantity of ideas without self-censoring
diff --git a/src/utils/logger.ts b/src/utils/logger.ts
index e5be641..35269f2 100644
--- a/src/utils/logger.ts
+++ b/src/utils/logger.ts
@@ -6,11 +6,20 @@ export class Logger {
return `${LOG_PREFIX} ${message}` + "\n";
}
+ // Routine logging is muted when NODE_ENV=test so the test reporter output
+ // stays readable; errors are never muted. Production never sets NODE_ENV=test,
+ // so default (1.1.6-parity) behaviour is unchanged.
+ private static get muted(): boolean {
+ return process.env.NODE_ENV === "test";
+ }
+
static log(message: string, ...args: any[]): void {
+ if (this.muted) return;
console.warn(this.formatMessage(message), ...args);
}
static warn(message: string, ...args: any[]): void {
+ if (this.muted) return;
console.warn(this.formatMessage(message), ...args);
}
@@ -19,6 +28,7 @@ export class Logger {
}
static debug(message: string, ...args: any[]): void {
+ if (this.muted) return;
console.warn(this.formatMessage(message), ...args);
}
diff --git a/test/README.md b/test/README.md
new file mode 100644
index 0000000..392d407
--- /dev/null
+++ b/test/README.md
@@ -0,0 +1,85 @@
+# Tests
+
+The suite is split into three categories by **how much of the real world they touch**. Each lives in its own folder and runs with the built-in [`node:test`](https://nodejs.org/api/test.html) runner via the `tsx` loader (no extra test framework).
+
+| Category | Folder | Touches the real gemini CLI? | Runs in CI? | Command |
+|---|---|---|---|---|
+| **unit** | `test/unit/` | No | Yes (gates merges) | `npm run test:unit` |
+| **integration** | `test/integration/` | No | Yes (gates merges) | `npm run test:integration` |
+| **e2e** | `test/e2e/` | **Yes — the real CLI** | No (opt-in, local) | `npm run test:e2e` |
+
+```bash
+npm test # unit + integration (the hermetic, CI-gating suite)
+npm run test:unit
+npm run test:integration
+npm run test:e2e # builds, then drives the REAL gemini CLI through the MCP server
+node scripts/run-tests.mjs all # everything (unit + integration + e2e)
+```
+
+## What goes where
+
+### `unit/` — pure, single-module
+Fast, deterministic tests of one module's logic. **No subprocess, no network, no real CLI.**
+Mirrors `src/` (`test/unit/utils/...`, `test/unit/backends/...`, `test/unit/tools/...`).
+Examples: argument builders, model/approval/timeout resolution, the changeMode
+parser/chunker/translator, the chunk cache, the registry's schema/prompt helpers,
+brainstorm prompt construction.
+
+### `integration/` — several real modules wired together
+Still **hermetic** — it never invokes the real gemini CLI. The "Gemini output" is a
+fixture string fed into the real downstream pipeline. Covers the cross-module flows a
+user actually hits:
+- the full **changeMode pipeline**: response string → parse → validate → chunk → cache →
+ `fetch-chunk` retrieval of later chunks;
+- the **registry → tool contract**: argument validation surfaced as friendly errors, and
+ every tool guard/error branch that resolves *without* calling Gemini.
+
+> Integration tests must **not** spawn the gemini CLI. Anything that needs a real model
+> response belongs in `e2e/`.
+
+### `e2e/` — the real product, end to end
+Spawns the **built MCP server** (`dist/index.js`) over stdio and connects with the MCP
+SDK client — exactly how Claude / mcpjam do. Tool calls exercise the whole path:
+protocol → registry → tool → backend → spawned **gemini** CLI. This is the automated
+replacement for manual mcpjam testing.
+
+- Gemini-dependent tests **auto-skip** when the `gemini` CLI is not on `PATH`, so the
+ suite degrades gracefully. The non-gemini tools (`ping`, `timeout-test`, `fetch-chunk`,
+ `tools/list`, `prompts/list`) always run.
+- `npm run test:e2e` builds first, so it tests exactly what ships.
+- Live model calls are slow and use your gemini quota; the model is pinned to
+ `gemini-2.5-flash` and each test has a generous timeout.
+- Every E2E MCP response is printed as a `node:test` diagnostic by default, so
+ passing results still show the exact raw response that each assertion checked.
+- Shared setup (spawning/closing the server, `gemini` detection, reading tool text) lives
+ in `test/e2e/harness.ts`.
+
+## Adding a test
+
+1. Pick the category by the table above. If it needs a real model answer, it's `e2e`.
+2. Create `test//.test.ts` (e2e files are named `*.e2e.test.ts`).
+3. Use `node:test` + `node:assert/strict`:
+
+ ```ts
+ import { test } from "node:test";
+ import assert from "node:assert/strict";
+ import { thing } from "../../../src/utils/thing.js"; // unit: 3 levels up to src/
+
+ test("does the thing", () => {
+ assert.equal(thing(1), 2);
+ });
+ ```
+
+ For e2e, drive the server via the harness:
+
+ ```ts
+ import { startServer, textOf, GEMINI_SKIP } from "./harness.js";
+ // ...callTool, then assert on textOf(result)
+ ```
+
+4. Keep `unit`/`integration` hermetic. Run `npm test` (and `npm run lint` to type-check).
+
+## Notes
+- `npm test` sets `NODE_ENV=test`, which mutes routine `[GMCPT]` logging (errors still
+ print) so the reporter output stays readable. The e2e run keeps full server logs.
+- `npm run lint` type-checks `src/` **and** `test/` via `tsconfig.test.json`.
diff --git a/test/e2e/ask-gemini.e2e.test.ts b/test/e2e/ask-gemini.e2e.test.ts
new file mode 100644
index 0000000..313c770
--- /dev/null
+++ b/test/e2e/ask-gemini.e2e.test.ts
@@ -0,0 +1,75 @@
+import { test, before, after } from "node:test";
+import assert from "node:assert/strict";
+import { callGemini, callTool, startServer, textOf, GEMINI_SKIP, type ServerHandle } from "./harness.js";
+
+// LIVE tests: these drive the real gemini CLI through the real MCP server. They
+// auto-skip when gemini is not on PATH, so the suite degrades gracefully. Real
+// model calls are slow, hence the generous per-test timeout. Model is pinned to
+// flash for speed and to spare the pro daily quota.
+const LIVE = { skip: GEMINI_SKIP, timeout: 120_000 } as const;
+const MODEL = "gemini-2.5-flash";
+
+let server: ServerHandle;
+
+before(async () => {
+ server = await startServer();
+});
+after(async () => {
+ await server?.close();
+});
+
+test("ask-gemini answers a deterministic factual question", LIVE, async (t) => {
+ const { isError, text } = await callGemini(t, server, {
+ name: "ask-gemini",
+ arguments: { prompt: "What is 2 + 2? Reply with only the number.", model: MODEL },
+ });
+ assert.equal(isError, false, text);
+ assert.match(text, /Gemini response:/); // the tool's wrapper is always present
+ assert.match(text, /\b4\b/); // ...and the model actually answered
+});
+
+test("ask-gemini echoes the session id so a follow-up can resume it", LIVE, async (t) => {
+ // Unique per run: gemini persists sessions to disk, so a fixed id collides
+ // ("Session ID already exists") on the next run. The [session: …] marker is
+ // added by the tool itself, so asserting on this exact id is deterministic.
+ const sessionId = `e2e-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
+ const res = await callTool(t, server, {
+ name: "ask-gemini",
+ arguments: { prompt: "Reply with the single word: ok", model: MODEL, sessionId },
+ });
+ const text = textOf(res);
+ assert.equal(res.isError ?? false, false, text);
+ assert.ok(text.includes(`[session: ${sessionId}]`), text);
+});
+
+test("ask-gemini inlines an in-project @file reference", LIVE, async (t) => {
+ const { isError, text } = await callGemini(t, server, {
+ name: "ask-gemini",
+ arguments: {
+ prompt:
+ "@test/e2e/fixtures/sentinel.txt Reply with only the sentinel token that appears in this file.",
+ model: MODEL,
+ },
+ });
+ assert.equal(isError, false, text);
+ assert.match(text, /BANANA_SENTINEL_42/);
+});
+
+test("Help returns the gemini CLI help text", LIVE, async (t) => {
+ const res = await callTool(t, server, { name: "Help", arguments: {} });
+ const text = textOf(res);
+ assert.equal(res.isError ?? false, false, text);
+ assert.match(text, /usage|--model|gemini/i);
+});
+
+// brainstorm generates free-form ideas: the slowest call, and nondeterministic
+// (flash can even return empty). Its prompt construction is unit-tested, and its
+// integration path is identical to ask-gemini (proven above), so here we only
+// verify the live round-trip succeeds end-to-end. Larger timeout, single attempt.
+test("brainstorm completes a real round-trip through gemini", { skip: GEMINI_SKIP, timeout: 180_000 }, async (t) => {
+ const res = await callTool(t, server, {
+ name: "brainstorm",
+ arguments: { prompt: "one quick way to speed up CI", model: MODEL, ideaCount: 1, includeAnalysis: false },
+ });
+ assert.equal(res.isError ?? false, false, textOf(res));
+});
diff --git a/test/e2e/fixtures/sentinel.txt b/test/e2e/fixtures/sentinel.txt
new file mode 100644
index 0000000..107d5f6
--- /dev/null
+++ b/test/e2e/fixtures/sentinel.txt
@@ -0,0 +1,3 @@
+This fixture is read by the e2e @file test.
+The sentinel token is BANANA_SENTINEL_42.
+If you can read this line, the @file inlining worked.
diff --git a/test/e2e/harness.ts b/test/e2e/harness.ts
new file mode 100644
index 0000000..556a9fc
--- /dev/null
+++ b/test/e2e/harness.ts
@@ -0,0 +1,165 @@
+// Shared harness for the live e2e suite. Spawns the REAL MCP server (the built
+// dist/index.js) over stdio and connects with the MCP SDK client — the same way
+// a real client (Claude, mcpjam, etc.) does. Tool calls therefore exercise the
+// entire product: protocol -> registry -> tool -> backend -> spawned gemini CLI.
+//
+// This file is intentionally not named *.test.ts so the runner does not execute
+// it directly.
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
+import { execSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import path from "node:path";
+import type { TestContext } from "node:test";
+import { inspect } from "node:util";
+import { fileURLToPath } from "node:url";
+
+const here = path.dirname(fileURLToPath(import.meta.url));
+export const REPO_ROOT = path.resolve(here, "..", "..");
+const SERVER_ENTRY = path.join(REPO_ROOT, "dist", "index.js");
+
+/** True when the real gemini CLI is installed and resolvable on PATH. */
+export function hasGemini(): boolean {
+ try {
+ execSync("gemini --version", { stdio: "ignore" });
+ return true;
+ } catch {
+ return false;
+ }
+}
+
+/** Skip reason for gemini-dependent tests, or false when gemini is available. */
+export const GEMINI_SKIP: string | false = hasGemini()
+ ? false
+ : "gemini CLI not on PATH — run `npm i -g @google/gemini-cli` and authenticate";
+
+export interface ServerHandle {
+ client: Client;
+ close: () => Promise;
+}
+
+/** Start the built MCP server and return a connected client. */
+export async function startServer(extraEnv: Record = {}): Promise {
+ if (!existsSync(SERVER_ENTRY)) {
+ throw new Error(
+ `Server entry not found at ${SERVER_ENTRY}. Run \`npm run build\` first ` +
+ `(\`npm run test:e2e\` does this for you).`,
+ );
+ }
+
+ // Pass the parent environment through (PATH so gemini resolves, HOME so the
+ // gemini auth/config is found), plus any per-test overrides.
+ const env: Record = {};
+ for (const [k, v] of Object.entries(process.env)) {
+ if (typeof v === "string") env[k] = v;
+ }
+ Object.assign(env, extraEnv);
+
+ const transport = new StdioClientTransport({
+ command: process.execPath,
+ args: [SERVER_ENTRY],
+ env,
+ stderr: "inherit", // surface server logs/errors during e2e runs
+ });
+ const client = new Client({ name: "gmcpt-e2e", version: "0.0.0" }, { capabilities: {} });
+ await client.connect(transport);
+
+ return { client, close: () => transport.close() };
+}
+
+function jsonReplacer(_key: string, value: unknown): unknown {
+ if (value instanceof Error) {
+ const raw: Record = {
+ name: value.name,
+ message: value.message,
+ };
+ for (const key of Object.getOwnPropertyNames(value)) {
+ if (key !== "stack") raw[key] = (value as unknown as Record)[key];
+ }
+ return raw;
+ }
+ return value;
+}
+
+function formatRaw(value: unknown): string {
+ try {
+ const json = JSON.stringify(value, jsonReplacer, 2);
+ if (json !== undefined) return json;
+ } catch {
+ // Fall back to inspect for unexpected non-JSON SDK objects.
+ }
+ return inspect(value, {
+ depth: null,
+ maxArrayLength: null,
+ maxStringLength: null,
+ breakLength: 100,
+ });
+}
+
+export function rawResponse(t: TestContext, label: string, value: unknown): void {
+ t.diagnostic(`${label} raw response:\n${formatRaw(value)}`);
+}
+
+export async function listTools(t: TestContext, server: ServerHandle) {
+ const result = await server.client.listTools();
+ rawResponse(t, "tools/list", result);
+ return result;
+}
+
+export async function listPrompts(t: TestContext, server: ServerHandle) {
+ const result = await server.client.listPrompts();
+ rawResponse(t, "prompts/list", result);
+ return result;
+}
+
+export async function callTool(
+ t: TestContext,
+ server: ServerHandle,
+ params: Parameters[0],
+) {
+ const result = await server.client.callTool(params);
+ rawResponse(t, `tools/call ${params.name}`, result);
+ return result;
+}
+
+/**
+ * Call a tool whose assertions depend on the live MODEL output, retrying on a
+ * transient empty/errored response (the model occasionally returns nothing).
+ * This verifies we eventually get a *valid* response without masking a real,
+ * persistent failure. Each attempt's raw response is printed. Use plain
+ * `callTool` for tools whose checks are deterministic (e.g. the session marker,
+ * `gemini --help`) so they aren't retried needlessly.
+ */
+export async function callGemini(
+ t: TestContext,
+ server: ServerHandle,
+ params: Parameters[0],
+ retries = 2,
+): Promise<{ isError: boolean; text: string }> {
+ let isError = false;
+ let text = "";
+ for (let attempt = 1; attempt <= retries + 1; attempt++) {
+ const res = await server.client.callTool(params);
+ isError = (res as { isError?: boolean }).isError ?? false;
+ text = textOf(res);
+ rawResponse(t, `tools/call ${params.name}${attempt > 1 ? ` (attempt ${attempt})` : ""}`, res);
+ if (!isError && text.trim().length > 0) break;
+ if (attempt <= retries) {
+ t.diagnostic(`${params.name}: empty/errored response — retrying (${attempt}/${retries})`);
+ }
+ }
+ return { isError, text };
+}
+
+/**
+ * Concatenate the text parts of a tool result. Typed as `unknown` because the
+ * SDK's callTool return is a union (the back-compat shape has no `content`);
+ * we narrow structurally here.
+ */
+export function textOf(result: unknown): string {
+ const content = (result as { content?: Array<{ type?: string; text?: string }> }).content ?? [];
+ return content
+ .filter((c) => c?.type === "text" && typeof c.text === "string")
+ .map((c) => c.text as string)
+ .join("\n");
+}
diff --git a/test/e2e/server.e2e.test.ts b/test/e2e/server.e2e.test.ts
new file mode 100644
index 0000000..2824d98
--- /dev/null
+++ b/test/e2e/server.e2e.test.ts
@@ -0,0 +1,62 @@
+import { test, before, after } from "node:test";
+import assert from "node:assert/strict";
+import { callTool, listPrompts, listTools, rawResponse, startServer, textOf, type ServerHandle } from "./harness.js";
+
+// Server + protocol + the tools that do NOT need the gemini CLI. These run
+// anywhere the project is built, with no gemini install or network.
+let server: ServerHandle;
+
+before(async () => {
+ server = await startServer();
+});
+after(async () => {
+ await server?.close();
+});
+
+test("lists every registered tool with a valid input schema", async (t) => {
+ const { tools } = await listTools(t, server);
+ const names = tools.map((t) => t.name);
+ for (const expected of ["ask-gemini", "brainstorm", "fetch-chunk", "ping", "Help", "timeout-test"]) {
+ assert.ok(names.includes(expected), `tools/list is missing "${expected}" (got: ${names.join(", ")})`);
+ }
+ const ask = tools.find((t) => t.name === "ask-gemini");
+ assert.ok(ask);
+ assert.equal(ask!.inputSchema.type, "object");
+});
+
+test("lists prompts derived from the registry", async (t) => {
+ const { prompts } = await listPrompts(t, server);
+ assert.ok(prompts.map((p) => p.name).includes("ask-gemini"));
+});
+
+test("ping echoes a message back over the full MCP round-trip", async (t) => {
+ const res = await callTool(t, server, { name: "ping", arguments: { prompt: "hello-e2e" } });
+ assert.equal(res.isError ?? false, false);
+ assert.match(textOf(res), /hello-e2e/);
+});
+
+test("timeout-test runs and reports completion", async (t) => {
+ const res = await callTool(t, server, { name: "timeout-test", arguments: { duration: 50 } });
+ assert.equal(res.isError ?? false, false);
+ assert.match(textOf(res), /Timeout test completed successfully/);
+});
+
+test("fetch-chunk returns a clean cache-miss message for an unknown key", async (t) => {
+ const res = await callTool(t, server, {
+ name: "fetch-chunk",
+ arguments: { cacheKey: "00000000", chunkIndex: 1 },
+ });
+ assert.equal(res.isError ?? false, false);
+ assert.match(textOf(res), /Cache miss/);
+});
+
+test("an unknown tool name is reported as an error", async (t) => {
+ await assert.rejects(async () => {
+ try {
+ await server.client.callTool({ name: "not-a-real-tool", arguments: {} });
+ } catch (error) {
+ rawResponse(t, "tools/call not-a-real-tool thrown response", error);
+ throw error;
+ }
+ });
+});
diff --git a/test/integration/changeMode-pipeline.test.ts b/test/integration/changeMode-pipeline.test.ts
new file mode 100644
index 0000000..faf5ca7
--- /dev/null
+++ b/test/integration/changeMode-pipeline.test.ts
@@ -0,0 +1,77 @@
+import { test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+// This wires the full changeMode path that the ask-gemini tool drives for large
+// edits: a Gemini-style response string -> parse -> validate -> chunk -> cache,
+// then the fetch-chunk tool retrieving subsequent chunks. No CLI is involved —
+// the "Gemini output" is a fixture string, so the test is hermetic.
+import { processChangeModeOutput } from "../../src/utils/geminiExecutor.js";
+import { fetchChunkTool } from "../../src/tools/fetch-chunk.tool.js";
+import { clearCache } from "../../src/utils/chunkCache.js";
+
+const FENCE = "```";
+
+function block(file: string, line: number, oldCode: string, newCode: string): string {
+ return [`**FILE: ${file}:${line}**`, FENCE, "OLD:", oldCode, "NEW:", newCode, FENCE].join("\n");
+}
+
+// Four edits with large bodies (~6 KB each) exceed the 20 KB chunk budget,
+// forcing the response to be split and cached.
+function bigMultiEditResponse(): string {
+ const big = "a".repeat(6000);
+ return [
+ block("src/one.ts", 10, big, "one"),
+ block("src/two.ts", 20, big, "two"),
+ block("src/three.ts", 30, big, "three"),
+ block("src/four.ts", 40, big, "four"),
+ ].join("\n\n");
+}
+
+beforeEach(() => clearCache());
+afterEach(() => clearCache());
+
+test("a single-edit response renders one chunk with no continuation", async () => {
+ const out = await processChangeModeOutput(block("src/a.ts", 1, "const x = 1;", "const x = 2;"), undefined, undefined, "prompt-a");
+ assert.match(out, /CHANGEMODE OUTPUT/);
+ assert.ok(out.includes("const x = 2;"));
+ assert.doesNotMatch(out, /Chunk 1 of/); // single chunk => no chunk header
+});
+
+test("a large multi-edit response chunks, caches, and advertises fetch-chunk", async () => {
+ const first = await processChangeModeOutput(bigMultiEditResponse(), undefined, undefined, "prompt-big");
+ assert.match(first, /Chunk 1 of 2/);
+
+ // The continuation must surface a real 8-char cache key.
+ const m = first.match(/cacheKey="([a-f0-9]{8})"/);
+ assert.ok(m, "expected a fetch-chunk cacheKey in the first chunk");
+ const cacheKey = m![1];
+
+ // The fetch-chunk tool retrieves the next chunk from that key.
+ const second = await fetchChunkTool.execute({ cacheKey, chunkIndex: 2 });
+ assert.match(second, /Chunk 2 of 2/);
+
+ // ...and chunk 1 is still retrievable.
+ const again = await fetchChunkTool.execute({ cacheKey, chunkIndex: 1 });
+ assert.match(again, /Chunk 1 of 2/);
+});
+
+test("fetch-chunk reports an out-of-range index", async () => {
+ const first = await processChangeModeOutput(bigMultiEditResponse(), undefined, undefined, "prompt-range");
+ const cacheKey = first.match(/cacheKey="([a-f0-9]{8})"/)![1];
+ const out = await fetchChunkTool.execute({ cacheKey, chunkIndex: 99 });
+ assert.match(out, /Invalid chunk index/);
+});
+
+test("fetch-chunk reports a cache miss for an unknown (but well-formed) key", async () => {
+ const out = await fetchChunkTool.execute({ cacheKey: "00000000", chunkIndex: 1 });
+ assert.match(out, /Cache miss/);
+});
+
+test("fetch-chunk rejects a malformed cache key before touching the cache", async () => {
+ const out = await fetchChunkTool.execute({ cacheKey: "../../etc/passwd", chunkIndex: 1 });
+ assert.match(out, /Invalid cacheKey format/);
+});
+
+test("a response with no OLD/NEW edits yields a clear message", async () => {
+ const out = await processChangeModeOutput("Gemini replied with prose and no edits.", undefined, undefined, "prompt-none");
+ assert.match(out, /No edits found/);
+});
diff --git a/test/integration/tool-contract.test.ts b/test/integration/tool-contract.test.ts
new file mode 100644
index 0000000..f9df500
--- /dev/null
+++ b/test/integration/tool-contract.test.ts
@@ -0,0 +1,51 @@
+import { test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+// Drives the registry -> tool boundary for every path that resolves WITHOUT
+// invoking the Gemini CLI: argument validation, and the guard/error branches
+// inside the tools. (The happy path that actually calls Gemini is covered by
+// the e2e suite.) These must never spawn a subprocess.
+import { executeTool } from "../../src/tools/index.js";
+import { clearCache } from "../../src/utils/chunkCache.js";
+
+beforeEach(() => clearCache());
+
+test("executeTool surfaces zod validation as a friendly error", async () => {
+ // ask-gemini requires a non-empty prompt; the error names the offending field.
+ await assert.rejects(() => executeTool("ask-gemini", {}), /Invalid arguments for ask-gemini.*prompt/s);
+});
+
+test("executeTool throws for an unknown tool", async () => {
+ await assert.rejects(() => executeTool("no-such-tool", {}), /Unknown tool/);
+});
+
+test("fetch-chunk via the registry returns a cache-miss message (no spawn)", async () => {
+ const out = await executeTool("fetch-chunk", { cacheKey: "deadbeef", chunkIndex: 1 });
+ assert.match(out, /Cache miss/);
+});
+
+test("fetch-chunk via the registry rejects a malformed cache key (no spawn)", async () => {
+ const out = await executeTool("fetch-chunk", { cacheKey: "not-a-key", chunkIndex: 1 });
+ assert.match(out, /Invalid cacheKey format/);
+});
+
+test("ask-gemini rejects a malformed chunkCacheKey before calling Gemini", async () => {
+ const out = await executeTool("ask-gemini", {
+ prompt: "x",
+ changeMode: true,
+ chunkIndex: 1,
+ chunkCacheKey: "bad!key!",
+ });
+ assert.match(out, /Invalid chunkCacheKey format/);
+});
+
+test("ask-gemini changeMode continuation with a missing cache reports no edits (no spawn)", async () => {
+ // Well-formed key, but nothing cached -> the continuation path returns the
+ // "no edits found" message rather than shelling out to Gemini.
+ const out = await executeTool("ask-gemini", {
+ prompt: "x",
+ changeMode: true,
+ chunkIndex: 1,
+ chunkCacheKey: "deadbeef",
+ });
+ assert.match(out, /No edits found/);
+});
diff --git a/src/backends/agy.test.ts b/test/unit/backends/agy.test.ts
similarity index 92%
rename from src/backends/agy.test.ts
rename to test/unit/backends/agy.test.ts
index 3e9418c..8703b10 100644
--- a/src/backends/agy.test.ts
+++ b/test/unit/backends/agy.test.ts
@@ -1,6 +1,6 @@
import { test } from "node:test";
import assert from "node:assert/strict";
-import { buildAgyArgs } from "./agy.js";
+import { buildAgyArgs } from "../../../src/backends/agy.js";
test("buildAgyArgs maps prompt, sessions, sandbox, and yolo", () => {
assert.deepEqual(buildAgyArgs("hi", {}), ["-p", "hi"]);
diff --git a/src/backends/gemini.test.ts b/test/unit/backends/gemini.test.ts
similarity index 98%
rename from src/backends/gemini.test.ts
rename to test/unit/backends/gemini.test.ts
index 1a068f0..325a40b 100644
--- a/src/backends/gemini.test.ts
+++ b/test/unit/backends/gemini.test.ts
@@ -1,6 +1,6 @@
import { test } from "node:test";
import assert from "node:assert/strict";
-import { resolveApprovalMode, buildGeminiArgs, resolveModel } from "./gemini.js";
+import { resolveApprovalMode, buildGeminiArgs, resolveModel } from "../../../src/backends/gemini.js";
const ENV_KEY = "GEMINI_MCP_APPROVAL_MODE";
diff --git a/src/backends/index.test.ts b/test/unit/backends/index.test.ts
similarity index 92%
rename from src/backends/index.test.ts
rename to test/unit/backends/index.test.ts
index e5c3e4f..e853237 100644
--- a/src/backends/index.test.ts
+++ b/test/unit/backends/index.test.ts
@@ -1,6 +1,6 @@
import { test } from "node:test";
import assert from "node:assert/strict";
-import { getBackend } from "./index.js";
+import { getBackend } from "../../../src/backends/index.js";
test("getBackend defaults to gemini", () => {
assert.equal(getBackend({}).name, "gemini");
diff --git a/test/unit/tools/brainstorm.test.ts b/test/unit/tools/brainstorm.test.ts
new file mode 100644
index 0000000..6abe47a
--- /dev/null
+++ b/test/unit/tools/brainstorm.test.ts
@@ -0,0 +1,61 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+ buildBrainstormPrompt,
+ getMethodologyInstructions,
+} from "../../../src/tools/brainstorm.tool.js";
+
+test("getMethodologyInstructions returns the requested framework", () => {
+ assert.match(getMethodologyInstructions("scamper"), /SCAMPER/);
+ assert.match(getMethodologyInstructions("scamper"), /Substitute/);
+ assert.match(getMethodologyInstructions("divergent"), /Divergent Thinking/);
+ assert.match(getMethodologyInstructions("design-thinking"), /Empathize/);
+});
+
+test("getMethodologyInstructions falls back to the auto framework for unknown methodologies", () => {
+ const out = getMethodologyInstructions("not-a-real-methodology");
+ assert.match(out, /AI-Optimized Approach/);
+});
+
+test("getMethodologyInstructions weaves the domain into the auto framework", () => {
+ assert.match(getMethodologyInstructions("auto", "fintech"), /fintech/);
+});
+
+test("buildBrainstormPrompt embeds the challenge, idea count, and chosen framework", () => {
+ const prompt = buildBrainstormPrompt({
+ prompt: "How do we reduce churn?",
+ methodology: "scamper",
+ ideaCount: 7,
+ includeAnalysis: true,
+ });
+ assert.match(prompt, /# BRAINSTORMING SESSION/);
+ assert.ok(prompt.includes("How do we reduce churn?"));
+ assert.match(prompt, /Generate 7 distinct/);
+ assert.match(prompt, /SCAMPER/);
+ assert.match(prompt, /## Analysis Framework/); // analysis requested
+});
+
+test("buildBrainstormPrompt omits the analysis framework when not requested", () => {
+ const prompt = buildBrainstormPrompt({
+ prompt: "ideas",
+ methodology: "divergent",
+ ideaCount: 5,
+ includeAnalysis: false,
+ });
+ assert.doesNotMatch(prompt, /## Analysis Framework/);
+});
+
+test("buildBrainstormPrompt injects optional domain, constraints, and context", () => {
+ const prompt = buildBrainstormPrompt({
+ prompt: "ideas",
+ methodology: "auto",
+ domain: "healthcare",
+ constraints: "HIPAA compliant",
+ existingContext: "prior pilot failed",
+ ideaCount: 3,
+ includeAnalysis: false,
+ });
+ assert.ok(prompt.includes("healthcare"));
+ assert.ok(prompt.includes("HIPAA compliant"));
+ assert.ok(prompt.includes("prior pilot failed"));
+});
diff --git a/test/unit/tools/registry.test.ts b/test/unit/tools/registry.test.ts
new file mode 100644
index 0000000..e15287f
--- /dev/null
+++ b/test/unit/tools/registry.test.ts
@@ -0,0 +1,63 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+// Importing the tools index registers every tool in the shared registry.
+import {
+ getToolDefinitions,
+ getPromptDefinitions,
+ getPromptMessage,
+ toolExists,
+} from "../../../src/tools/index.js";
+
+test("every registered tool exposes a valid JSON-schema definition", () => {
+ const defs = getToolDefinitions();
+ assert.ok(defs.length >= 6); // ask-gemini, ping, Help, brainstorm, fetch-chunk, timeout-test
+ for (const def of defs) {
+ assert.equal(typeof def.name, "string");
+ assert.equal(typeof def.description, "string");
+ assert.equal(def.inputSchema.type, "object");
+ assert.equal(typeof def.inputSchema.properties, "object");
+ assert.ok(Array.isArray(def.inputSchema.required));
+ }
+});
+
+test("ask-gemini requires a prompt; ping's prompt is optional", () => {
+ const defs = getToolDefinitions();
+ const ask = defs.find((d) => d.name === "ask-gemini");
+ const ping = defs.find((d) => d.name === "ping");
+ assert.ok(ask && ping);
+ assert.ok((ask!.inputSchema.properties as any).prompt);
+ assert.ok((ask!.inputSchema.required as string[]).includes("prompt"));
+ assert.ok(!(ping!.inputSchema.required as string[]).includes("prompt"));
+});
+
+test("toolExists reflects the registry", () => {
+ assert.equal(toolExists("ask-gemini"), true);
+ assert.equal(toolExists("fetch-chunk"), true);
+ assert.equal(toolExists("does-not-exist"), false);
+});
+
+test("getPromptDefinitions lists tools that declare a prompt", () => {
+ const prompts = getPromptDefinitions();
+ const names = prompts.map((p) => p.name);
+ assert.ok(names.includes("ask-gemini"));
+ assert.ok(names.includes("brainstorm"));
+ const ask = prompts.find((p) => p.name === "ask-gemini");
+ assert.equal(typeof ask!.description, "string");
+});
+
+test("getPromptMessage formats prompt text, boolean flags, and key/value params", () => {
+ const msg = getPromptMessage("ask-gemini", {
+ prompt: "explain this",
+ model: "gemini-2.5-flash",
+ sandbox: true,
+ changeMode: false, // false values are omitted
+ });
+ assert.match(msg, /^Use the ask-gemini tool: explain this/);
+ assert.ok(msg.includes("(model: gemini-2.5-flash)"));
+ assert.ok(msg.includes("[sandbox]")); // boolean true rendered as a flag
+ assert.ok(!msg.includes("changeMode")); // false omitted
+});
+
+test("getPromptMessage handles a bare tool reference with no params", () => {
+ assert.equal(getPromptMessage("Help", {}), "Use the Help tool");
+});
diff --git a/test/unit/utils/changeModeChunker.test.ts b/test/unit/utils/changeModeChunker.test.ts
new file mode 100644
index 0000000..f21da95
--- /dev/null
+++ b/test/unit/utils/changeModeChunker.test.ts
@@ -0,0 +1,73 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { chunkChangeModeEdits, summarizeChunking } from "../../../src/utils/changeModeChunker.js";
+import type { ChangeModeEdit } from "../../../src/utils/changeModeParser.js";
+
+function edit(filename: string, oldCode = "x", newCode = "y"): ChangeModeEdit {
+ const lines = (s: string) => (s === "" ? 0 : s.split("\n").length);
+ return {
+ filename,
+ oldStartLine: 1,
+ oldEndLine: Math.max(1, lines(oldCode)),
+ oldCode,
+ newStartLine: 1,
+ newEndLine: Math.max(1, lines(newCode)),
+ newCode,
+ };
+}
+
+test("chunkChangeModeEdits returns one empty chunk for no edits", () => {
+ const chunks = chunkChangeModeEdits([]);
+ assert.equal(chunks.length, 1);
+ assert.equal(chunks[0].edits.length, 0);
+ assert.equal(chunks[0].totalChunks, 1);
+ assert.equal(chunks[0].hasMore, false);
+});
+
+test("chunkChangeModeEdits keeps small edits together in a single chunk (default budget)", () => {
+ const chunks = chunkChangeModeEdits([edit("a.ts"), edit("b.ts"), edit("c.ts")]);
+ assert.equal(chunks.length, 1);
+ assert.equal(chunks[0].edits.length, 3);
+ assert.equal(chunks[0].totalChunks, 1);
+ assert.equal(chunks[0].hasMore, false);
+});
+
+test("chunkChangeModeEdits keeps edits to the same file grouped together", () => {
+ // Two edits to the same file fit easily under the default budget.
+ const chunks = chunkChangeModeEdits([edit("same.ts", "aaa"), edit("same.ts", "bbb")]);
+ assert.equal(chunks.length, 1);
+ assert.equal(chunks[0].edits.length, 2);
+});
+
+test("chunkChangeModeEdits splits across chunks when the budget is exceeded", () => {
+ // Each edit is ~260 chars (250 overhead + filename*2 + code). A 300-char budget
+ // forces one edit per chunk across three distinct files.
+ const chunks = chunkChangeModeEdits([edit("a.ts"), edit("b.ts"), edit("c.ts")], 300);
+ assert.equal(chunks.length, 3);
+ assert.deepEqual(
+ chunks.map((c) => c.chunkIndex),
+ [1, 2, 3],
+ );
+ assert.deepEqual(
+ chunks.map((c) => c.hasMore),
+ [true, true, false],
+ );
+ assert.deepEqual(
+ chunks.map((c) => c.totalChunks),
+ [3, 3, 3],
+ );
+});
+
+test("chunkChangeModeEdits splits a single oversized file across chunks", () => {
+ const chunks = chunkChangeModeEdits([edit("big.ts", "aaa"), edit("big.ts", "bbb")], 300);
+ assert.equal(chunks.length, 2);
+ assert.equal(chunks[0].edits[0].filename, "big.ts");
+ assert.equal(chunks[1].edits[0].filename, "big.ts");
+});
+
+test("summarizeChunking reports edit and chunk counts", () => {
+ const chunks = chunkChangeModeEdits([edit("a.ts"), edit("b.ts")], 300);
+ const summary = summarizeChunking(chunks);
+ assert.match(summary, /# edits: 2/);
+ assert.match(summary, /# chunks: 2/);
+});
diff --git a/test/unit/utils/changeModeParser.test.ts b/test/unit/utils/changeModeParser.test.ts
new file mode 100644
index 0000000..da73851
--- /dev/null
+++ b/test/unit/utils/changeModeParser.test.ts
@@ -0,0 +1,89 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+ parseChangeModeOutput,
+ validateChangeModeEdits,
+ type ChangeModeEdit,
+} from "../../../src/utils/changeModeParser.js";
+
+// The markdown fence is built as a plain string so the fixtures can be written
+// as template literals without colliding with the backtick delimiter.
+const FENCE = "```";
+
+function block(file: string, line: number, oldCode: string, newCode: string): string {
+ return [`**FILE: ${file}:${line}**`, FENCE, "OLD:", oldCode, "NEW:", newCode, FENCE].join("\n");
+}
+
+test("parseChangeModeOutput parses a single markdown OLD/NEW block", () => {
+ const out = parseChangeModeOutput(
+ ["Here is the edit:", block("src/a.ts", 10, "const x = 1;", "const x = 2;")].join("\n\n"),
+ );
+ assert.equal(out.length, 1);
+ const e = out[0];
+ assert.equal(e.filename, "src/a.ts");
+ assert.equal(e.oldStartLine, 10);
+ assert.equal(e.oldEndLine, 10); // single line
+ assert.equal(e.oldCode, "const x = 1;");
+ assert.equal(e.newCode, "const x = 2;");
+});
+
+test("parseChangeModeOutput computes end lines from multi-line OLD/NEW content", () => {
+ const out = parseChangeModeOutput(block("src/b.ts", 20, "foo();\nbar();", "baz();"));
+ assert.equal(out.length, 1);
+ const e = out[0];
+ assert.equal(e.oldStartLine, 20);
+ assert.equal(e.oldEndLine, 21); // two old lines: 20..21
+ assert.equal(e.newStartLine, 20);
+ assert.equal(e.newEndLine, 20); // one new line
+ assert.equal(e.oldCode, "foo();\nbar();");
+});
+
+test("parseChangeModeOutput parses multiple blocks in order", () => {
+ const out = parseChangeModeOutput(
+ [block("a.ts", 1, "a", "A"), block("b.ts", 2, "b", "B")].join("\n\n"),
+ );
+ assert.equal(out.length, 2);
+ assert.deepEqual(
+ out.map((e) => e.filename),
+ ["a.ts", "b.ts"],
+ );
+});
+
+test("parseChangeModeOutput returns [] for empty or non-matching input", () => {
+ assert.deepEqual(parseChangeModeOutput(""), []);
+ assert.deepEqual(parseChangeModeOutput("just some prose with no edits"), []);
+});
+
+test("validateChangeModeEdits accepts well-formed edits", () => {
+ const edits: ChangeModeEdit[] = [
+ {
+ filename: "a.ts",
+ oldStartLine: 1,
+ oldEndLine: 1,
+ oldCode: "a",
+ newStartLine: 1,
+ newEndLine: 1,
+ newCode: "A",
+ },
+ ];
+ assert.deepEqual(validateChangeModeEdits(edits), { valid: true, errors: [] });
+});
+
+test("validateChangeModeEdits flags missing filename, inverted ranges, and empty edits", () => {
+ const edits: ChangeModeEdit[] = [
+ {
+ filename: "",
+ oldStartLine: 5,
+ oldEndLine: 1, // inverted
+ oldCode: "",
+ newStartLine: 1,
+ newEndLine: 1,
+ newCode: "", // empty edit
+ },
+ ];
+ const result = validateChangeModeEdits(edits);
+ assert.equal(result.valid, false);
+ assert.ok(result.errors.some((e) => /missing filename/i.test(e)));
+ assert.ok(result.errors.some((e) => /Invalid line range/i.test(e)));
+ assert.ok(result.errors.some((e) => /Empty edit/i.test(e)));
+});
diff --git a/test/unit/utils/changeModeTranslator.test.ts b/test/unit/utils/changeModeTranslator.test.ts
new file mode 100644
index 0000000..d53f0e7
--- /dev/null
+++ b/test/unit/utils/changeModeTranslator.test.ts
@@ -0,0 +1,72 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+ formatChangeModeResponse,
+ summarizeChangeModeEdits,
+} from "../../../src/utils/changeModeTranslator.js";
+import type { ChangeModeEdit } from "../../../src/utils/changeModeParser.js";
+
+function edit(filename: string, oldCode: string, newCode: string): ChangeModeEdit {
+ return {
+ filename,
+ oldStartLine: 1,
+ oldEndLine: 1,
+ oldCode,
+ newStartLine: 1,
+ newEndLine: 1,
+ newCode,
+ };
+}
+
+test("formatChangeModeResponse renders a single-chunk response with the exact code", () => {
+ const out = formatChangeModeResponse([edit("a.ts", "OLD_CODE", "NEW_CODE")]);
+ assert.match(out, /CHANGEMODE OUTPUT/);
+ assert.match(out, /1 modification\b/); // singular
+ assert.match(out, /Replace this exact text:/);
+ assert.ok(out.includes("OLD_CODE"));
+ assert.ok(out.includes("NEW_CODE"));
+ assert.match(out, /Apply these edits in order/);
+});
+
+test("formatChangeModeResponse pluralizes the modification count", () => {
+ const out = formatChangeModeResponse([edit("a.ts", "1", "1"), edit("b.ts", "2", "2")]);
+ assert.match(out, /2 modifications\b/);
+});
+
+test("formatChangeModeResponse emits chunk headers and a fetch-chunk continuation", () => {
+ const out = formatChangeModeResponse([edit("a.ts", "x", "y")], {
+ current: 1,
+ total: 3,
+ cacheKey: "abcd1234",
+ });
+ assert.match(out, /Chunk 1 of 3/);
+ assert.ok(out.includes('fetch-chunk cacheKey="abcd1234" chunkIndex=2'));
+ assert.match(out, /2 more chunks/);
+});
+
+test("formatChangeModeResponse omits the continuation on the final chunk", () => {
+ const out = formatChangeModeResponse([edit("a.ts", "x", "y")], {
+ current: 3,
+ total: 3,
+ cacheKey: "abcd1234",
+ });
+ assert.match(out, /Chunk 3 of 3/);
+ assert.doesNotMatch(out, /fetch-chunk cacheKey/);
+});
+
+test("summarizeChangeModeEdits counts edits and affected files", () => {
+ const summary = summarizeChangeModeEdits([
+ edit("a.ts", "1", "1"),
+ edit("a.ts", "2", "2"),
+ edit("b.ts", "3", "3"),
+ ]);
+ assert.match(summary, /Total edits: 3/);
+ assert.match(summary, /Files affected: 2/);
+ assert.match(summary, /- a\.ts: 2 edits/);
+ assert.match(summary, /- b\.ts: 1 edit\b/);
+});
+
+test("summarizeChangeModeEdits marks the partial (multi-chunk) view", () => {
+ const summary = summarizeChangeModeEdits([edit("a.ts", "1", "1")], true);
+ assert.match(summary, /across all chunks/);
+});
diff --git a/test/unit/utils/chunkCache.test.ts b/test/unit/utils/chunkCache.test.ts
new file mode 100644
index 0000000..f3e2ded
--- /dev/null
+++ b/test/unit/utils/chunkCache.test.ts
@@ -0,0 +1,96 @@
+import { test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import {
+ cacheChunks,
+ getChunks,
+ clearCache,
+ getCacheStats,
+} from "../../../src/utils/chunkCache.js";
+import type { EditChunk } from "../../../src/utils/changeModeChunker.js";
+
+// chunkCache persists to a shared scratch dir under os.tmpdir() (10-min TTL).
+// clearCache() isolates each test; it only touches that scratch dir.
+beforeEach(() => clearCache());
+afterEach(() => clearCache());
+
+function chunk(n: number): EditChunk {
+ return {
+ edits: [
+ {
+ filename: `file${n}.ts`,
+ oldStartLine: 1,
+ oldEndLine: 1,
+ oldCode: `old${n}`,
+ newStartLine: 1,
+ newEndLine: 1,
+ newCode: `new${n}`,
+ },
+ ],
+ chunkIndex: n,
+ totalChunks: 1,
+ hasMore: false,
+ estimatedChars: 100,
+ };
+}
+
+test("cacheChunks returns an 8-char hex key and getChunks round-trips the data", () => {
+ const key = cacheChunks("a prompt", [chunk(1), chunk(2)]);
+ assert.match(key, /^[a-f0-9]{8}$/);
+
+ const got = getChunks(key);
+ assert.ok(got);
+ assert.equal(got!.length, 2);
+ assert.equal(got![0].edits[0].newCode, "new1");
+});
+
+test("cacheChunks is deterministic for the same prompt", () => {
+ const a = cacheChunks("identical", [chunk(1)]);
+ const b = cacheChunks("identical", [chunk(1)]);
+ assert.equal(a, b);
+});
+
+test("getChunks rejects malformed keys (path traversal / wrong shape)", () => {
+ assert.equal(getChunks("../../etc/passwd"), null);
+ assert.equal(getChunks("ZZZZZZZZ"), null); // not hex
+ assert.equal(getChunks("abc"), null); // too short
+ assert.equal(getChunks("deadbeef99"), null); // too long
+});
+
+test("getChunks returns null for a valid-format key with no cached file", () => {
+ assert.equal(getChunks("00000000"), null);
+});
+
+test("getChunks expires entries past the TTL and deletes the file", () => {
+ const key = cacheChunks("expire me", [chunk(1)]);
+ const { cacheDir } = getCacheStats();
+ const file = path.join(cacheDir, `${key}.json`);
+
+ // Backdate the stored timestamp beyond the 10-minute TTL.
+ const data = JSON.parse(fs.readFileSync(file, "utf-8"));
+ data.timestamp = Date.now() - 11 * 60 * 1000;
+ fs.writeFileSync(file, JSON.stringify(data));
+
+ assert.equal(getChunks(key), null);
+ assert.equal(fs.existsSync(file), false); // expired file is removed
+});
+
+test("the cache enforces a maximum file count (FIFO eviction)", () => {
+ const { maxSize } = getCacheStats();
+ for (let i = 0; i < maxSize + 5; i++) {
+ cacheChunks(`prompt-${i}`, [chunk(i)]);
+ }
+ assert.equal(getCacheStats().size, maxSize);
+});
+
+test("getCacheStats reports the TTL and max size; clearCache empties the dir", () => {
+ const stats = getCacheStats();
+ assert.equal(stats.ttl, 10 * 60 * 1000);
+ assert.equal(stats.maxSize, 50);
+
+ cacheChunks("something", [chunk(1)]);
+ assert.ok(getCacheStats().size >= 1);
+ clearCache();
+ assert.equal(getCacheStats().size, 0);
+});
diff --git a/src/utils/commandExecutor.test.ts b/test/unit/utils/commandExecutor.test.ts
similarity index 97%
rename from src/utils/commandExecutor.test.ts
rename to test/unit/utils/commandExecutor.test.ts
index 5f510ca..35b3a1c 100644
--- a/src/utils/commandExecutor.test.ts
+++ b/test/unit/utils/commandExecutor.test.ts
@@ -4,7 +4,7 @@ import {
quoteForCmd,
resolveCommandForExecution,
buildEnoentErrorMessage,
-} from "./commandExecutor.js";
+} from "../../../src/utils/commandExecutor.js";
test("quoteForCmd wraps in double quotes and doubles embedded quotes", () => {
assert.equal(quoteForCmd("hello"), '"hello"');
diff --git a/src/utils/envFile.test.ts b/test/unit/utils/envFile.test.ts
similarity index 95%
rename from src/utils/envFile.test.ts
rename to test/unit/utils/envFile.test.ts
index 03d6e68..62e3aca 100644
--- a/src/utils/envFile.test.ts
+++ b/test/unit/utils/envFile.test.ts
@@ -1,6 +1,6 @@
import { test } from "node:test";
import assert from "node:assert/strict";
-import { parseEnv } from "./envFile.js";
+import { parseEnv } from "../../../src/utils/envFile.js";
test("parseEnv: basic KEY=VALUE pairs", () => {
const r = parseEnv("GEMINI_MODEL=gemini-2.5-pro\nGEMINI_MCP_TIMEOUT_MS=1800000");
diff --git a/src/utils/geminiExecutor.test.ts b/test/unit/utils/geminiExecutor.test.ts
similarity index 91%
rename from src/utils/geminiExecutor.test.ts
rename to test/unit/utils/geminiExecutor.test.ts
index 2fd922c..dc52663 100644
--- a/src/utils/geminiExecutor.test.ts
+++ b/test/unit/utils/geminiExecutor.test.ts
@@ -1,6 +1,6 @@
import { test } from "node:test";
import assert from "node:assert/strict";
-import { assertSafeFileReferences } from "./geminiExecutor.js";
+import { assertSafeFileReferences } from "../../../src/utils/geminiExecutor.js";
const root = process.cwd();
diff --git a/src/utils/timeoutManager.test.ts b/test/unit/utils/timeoutManager.test.ts
similarity index 90%
rename from src/utils/timeoutManager.test.ts
rename to test/unit/utils/timeoutManager.test.ts
index 3f565f7..418725d 100644
--- a/src/utils/timeoutManager.test.ts
+++ b/test/unit/utils/timeoutManager.test.ts
@@ -1,6 +1,6 @@
import { test } from "node:test";
import assert from "node:assert/strict";
-import { resolveTimeoutMs, RECOMMENDED_TIMEOUT_MS } from "./timeoutManager.js";
+import { resolveTimeoutMs, RECOMMENDED_TIMEOUT_MS } from "../../../src/utils/timeoutManager.js";
test("resolveTimeoutMs: disabled by default when unset or blank (1.1.6 parity)", () => {
assert.equal(resolveTimeoutMs({}), 0);
diff --git a/tsconfig.test.json b/tsconfig.test.json
new file mode 100644
index 0000000..15d6a9c
--- /dev/null
+++ b/tsconfig.test.json
@@ -0,0 +1,8 @@
+{
+ "extends": "./tsconfig.json",
+ "compilerOptions": {
+ "noEmit": true,
+ "rootDir": "."
+ },
+ "include": ["src/**/*", "test/**/*"]
+}