-
Notifications
You must be signed in to change notification settings - Fork 198
v1.1.7: automated test suite, cross-platform hardening, internal doctor tooling #81
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,224 @@ | ||
| #!/usr/bin/env node | ||
| // gemini-mcp-tool doctor — INTERNAL dev / diagnostic + test tool. | ||
| // | ||
| // Not published: deliberately excluded from package.json "bin" and "files", so | ||
| // it ships with the repo but NOT the npm package. Run it from a checkout: | ||
| // | ||
| // npm run doctor → report the live system state for the MCP server | ||
| // npm run doctor test → preflight + run the e2e suite (the automated MCP | ||
| // test that replaces manual mcpjam clicking) | ||
| // | ||
| // This is the 1.1.7 seed: it reports what 1.1.7 actually has (node, the gemini | ||
| // CLI, GEMINI_CLI_PATH) and runs the test suite. Later feature PRs grow it with | ||
| // backend / model / approval / timeout diagnostics. | ||
| // Self-contained: pure Node, no build step or dependencies. | ||
|
|
||
| import { spawnSync } from "node:child_process"; | ||
| import { existsSync, readFileSync } from "node:fs"; | ||
| import path from "node:path"; | ||
| import { fileURLToPath } from "node:url"; | ||
|
|
||
| const ENV = { | ||
| GEMINI_CLI_PATH: "GEMINI_CLI_PATH", // explicit path to the gemini executable | ||
| }; | ||
|
|
||
| const isWindows = process.platform === "win32"; | ||
| const useColor = process.stdout.isTTY && !process.env.NO_COLOR; | ||
| const paint = (code, s) => (useColor ? `\x1b[${code}m${s}\x1b[0m` : s); | ||
| const c = { | ||
| bold: (s) => paint("1", s), | ||
| dim: (s) => paint("2", s), | ||
| green: (s) => paint("32", s), | ||
| yellow: (s) => paint("33", s), | ||
| red: (s) => paint("31", s), | ||
| cyan: (s) => paint("36", s), | ||
| }; | ||
| const OK = c.green("✓"); | ||
| const WARN = c.yellow("⚠"); | ||
| const BAD = c.red("✗"); | ||
|
|
||
| const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); | ||
|
|
||
| function heading(title) { | ||
| console.log("\n" + c.bold(title)); | ||
| console.log(c.dim("─".repeat(Math.max(title.length, 16)))); | ||
| } | ||
|
|
||
| function runCmd(cmd, args) { | ||
| try { | ||
| const executable = isWindows && /\s/.test(cmd) ? `"${cmd.replace(/"/g, '""')}"` : cmd; | ||
| const r = spawnSync(executable, args, { encoding: "utf8", timeout: 20000, shell: isWindows, windowsHide: true }); | ||
| if (r.error) return { ok: false, err: r.error.message }; | ||
| return { ok: r.status === 0, status: r.status, out: (r.stdout || "").trim(), err: (r.stderr || "").trim() }; | ||
| } catch (e) { | ||
| return { ok: false, err: e instanceof Error ? e.message : String(e) }; | ||
| } | ||
| } | ||
|
|
||
| function locate(cmd) { | ||
| const r = runCmd(isWindows ? "where" : "which", [cmd]); | ||
| if (!r.ok || !r.out) return []; | ||
| return r.out.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); | ||
| } | ||
|
|
||
| // Mirror commandExecutor's resolution: honour GEMINI_CLI_PATH, else PATH. | ||
| function detectGemini() { | ||
| const override = (process.env[ENV.GEMINI_CLI_PATH] || "").trim(); | ||
| const pathCandidates = locate("gemini"); | ||
| const candidates = override ? [override, ...pathCandidates.filter((p) => p !== override)] : pathCandidates; | ||
| let primary = override || null; | ||
| if (!primary && candidates.length > 0) { | ||
| if (isWindows) { | ||
| const byExt = (ext) => candidates.find((c) => c.toLowerCase().endsWith(ext)); | ||
| primary = byExt(".cmd") || byExt(".exe") || byExt(".bat") || candidates[0]; | ||
| } else { | ||
| primary = candidates[0]; | ||
| } | ||
| } | ||
| const found = override ? existsSync(override) : candidates.length > 0; | ||
| let version = null; | ||
| if (found && primary) { | ||
| const v = runCmd(primary, ["--version"]); | ||
| if (v.ok && v.out) version = v.out.split(/\r?\n/)[0].trim(); | ||
| } | ||
| return { found: !!found, primary, candidates, override: override || null, version }; | ||
| } | ||
|
jamubc marked this conversation as resolved.
|
||
|
|
||
| // ── report ─────────────────────────────────────────────────────────────────── | ||
| function runReport() { | ||
| const problems = []; | ||
|
|
||
| heading("System"); | ||
| console.log(` node ${process.version}`); | ||
| console.log(` platform ${process.platform} (${process.arch})`); | ||
|
|
||
| heading("Gemini CLI"); | ||
| const gemini = detectGemini(); | ||
| if (gemini.found) { | ||
| console.log(` ${OK} found${gemini.override ? " (via " + ENV.GEMINI_CLI_PATH + ")" : ""}`); | ||
| console.log(` path ${gemini.primary}`); | ||
| console.log(` version ${gemini.version ? c.cyan(gemini.version) : c.yellow("(could not read --version)")}`); | ||
| if (gemini.candidates.length > 1) console.log(c.dim(` also on PATH: ${gemini.candidates.slice(1).join(", ")}`)); | ||
| } else { | ||
| console.log(` ${BAD} ${gemini.override ? ENV.GEMINI_CLI_PATH + " path not found" : "not found on PATH"}`); | ||
| problems.push( | ||
| gemini.override | ||
| ? `${ENV.GEMINI_CLI_PATH} is set to ${gemini.override}, but that path does not exist.` | ||
| : `Gemini CLI not found. Install it (npm i -g @google/gemini-cli) or set ${ENV.GEMINI_CLI_PATH} to its full path.` | ||
| ); | ||
| } | ||
|
|
||
| heading("Summary"); | ||
| if (problems.length === 0) { | ||
| console.log(` ${OK} ${c.green("No problems detected.")}`); | ||
| } else { | ||
| console.log(` ${BAD} ${c.red(`${problems.length} issue(s) found:`)}`); | ||
| for (const p of problems) console.log(` - ${p}`); | ||
| } | ||
| console.log(c.dim(`\n Tips:`)); | ||
| console.log(c.dim(` \`npm run doctor test\` → build + run live e2e tests`)); | ||
| console.log(c.dim(` \`npm run doctor judge\` → build + run semantic LLM judge tests`)); | ||
| console.log(""); | ||
| process.exit(problems.length === 0 ? 0 : 1); | ||
| } | ||
|
|
||
| // ── test (automated MCP test, replaces manual mcpjam) ────────────────────────── | ||
| function runTest() { | ||
| heading("Preflight"); | ||
| const gemini = detectGemini(); | ||
| if (gemini.found) { | ||
| console.log(` ${OK} gemini ${gemini.version ? c.cyan(gemini.version) : ""} ${c.dim("(" + gemini.primary + ")")}`); | ||
| } else { | ||
| console.log(` ${WARN} gemini not on PATH — live model tests will skip; only the gemini-independent server tests run.`); | ||
| } | ||
|
|
||
| heading("Build"); | ||
| const build = spawnSync(isWindows ? "npm.cmd" : "npm", ["run", "build"], { | ||
| stdio: "inherit", | ||
| cwd: repoRoot, | ||
| shell: isWindows, | ||
| }); | ||
| if (build.status !== 0) { | ||
| console.log(` ${BAD} ${c.red("build failed — aborting.")}`); | ||
| process.exit(build.status ?? 1); | ||
| } | ||
| console.log(` ${OK} build succeeded`); | ||
|
|
||
| heading("E2E suite (real gemini through the MCP server)"); | ||
| const runner = path.join(repoRoot, "scripts", "run-tests.mjs"); | ||
| const e2e = spawnSync(process.execPath, [runner, "e2e"], { stdio: "inherit", cwd: repoRoot }); | ||
| if (e2e.status === 0) { | ||
| console.log(`\n ${OK} ${c.green("e2e suite passed — the MCP server works end-to-end.")}`); | ||
| } else { | ||
| console.log(`\n ${BAD} ${c.red("e2e suite failed.")}`); | ||
| } | ||
| process.exit(e2e.status ?? 1); | ||
| } | ||
|
|
||
| // ── judge (semantic evaluation) ──────────────────────────────────────────────── | ||
| function runJudgeTest() { | ||
| heading("Judge Preflight"); | ||
| const config = detectJudgeKeys(); | ||
| if (config.hasKey) { | ||
| console.log(` ${OK} LLM Judge configured via: ${config.keyType}`); | ||
| } else { | ||
| console.log(` ${BAD} No LLM Judge keys found. Please set DEEPSEEK_API_KEY or OPENROUTER_API_KEY in your test/.env file.`); | ||
| process.exit(1); | ||
| } | ||
|
|
||
| heading("Build"); | ||
| const build = spawnSync(isWindows ? "npm.cmd" : "npm", ["run", "build"], { | ||
| stdio: "inherit", | ||
| cwd: repoRoot, | ||
| shell: isWindows, | ||
| }); | ||
| if (build.status !== 0) { | ||
| console.log(` ${BAD} ${c.red("build failed — aborting.")}`); | ||
| process.exit(build.status ?? 1); | ||
| } | ||
| console.log(` ${OK} build succeeded`); | ||
|
|
||
| heading("LLM-as-a-Judge semantic test suite"); | ||
| const runner = path.join(repoRoot, "scripts", "run-tests.mjs"); | ||
| const judgeRun = spawnSync(process.execPath, [runner, "judge"], { stdio: "inherit", cwd: repoRoot }); | ||
| if (judgeRun.status === 0) { | ||
| console.log(`\n ${OK} ${c.green("Judge suite passed — semantic checks successful!")}`); | ||
| } else { | ||
| console.log(`\n ${BAD} ${c.red("Judge suite failed.")}`); | ||
| } | ||
| process.exit(judgeRun.status ?? 1); | ||
| } | ||
|
|
||
| function detectJudgeKeys() { | ||
| let hasKey = false; | ||
| let keyType = ""; | ||
| const envPath = path.join(repoRoot, "test", ".env"); | ||
|
|
||
| if (process.env.DEEPSEEK_API_KEY) { | ||
| hasKey = true; | ||
| keyType = "process.env.DEEPSEEK_API_KEY"; | ||
| } else if (process.env.OPENROUTER_API_KEY) { | ||
| hasKey = true; | ||
| keyType = "process.env.OPENROUTER_API_KEY"; | ||
| } | ||
|
|
||
| if (!hasKey && existsSync(envPath)) { | ||
| try { | ||
| const content = readFileSync(envPath, "utf-8"); | ||
| if (/DEEPSEEK_API_KEY\s*=\s*[^\s#]+/i.test(content)) { | ||
| hasKey = true; | ||
| keyType = "test/.env (DEEPSEEK_API_KEY)"; | ||
| } else if (/OPENROUTER_API_KEY\s*=\s*[^\s#]+/i.test(content)) { | ||
| hasKey = true; | ||
| keyType = "test/.env (OPENROUTER_API_KEY)"; | ||
| } | ||
| } catch {} | ||
| } | ||
| return { hasKey, keyType }; | ||
| } | ||
|
|
||
| // ── dispatch ─────────────────────────────────────────────────────────────────── | ||
| const mode = (process.argv[2] || "").toLowerCase(); | ||
| if (mode === "test") runTest(); | ||
| else if (mode === "judge") runJudgeTest(); | ||
| else runReport(); | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| #!/usr/bin/env node | ||
| // Category-aware test runner. Discovers *.test.ts under the selected category | ||
| // folders (test/unit, test/integration, test/e2e, test/judge) and runs them with the | ||
| // built-in node:test runner via the tsx loader, so the TypeScript sources run | ||
| // directly. | ||
| // | ||
| // Usage: | ||
| // node scripts/run-tests.mjs # default: unit + integration (hermetic) | ||
| // node scripts/run-tests.mjs unit # one category | ||
| // node scripts/run-tests.mjs integration e2e # several | ||
| // node scripts/run-tests.mjs judge # semantic LLM judge tests | ||
| // node scripts/run-tests.mjs all # unit + integration + e2e + judge | ||
| // | ||
| // Categories: | ||
| // unit pure, single-module tests. No subprocess, no network, no real CLI. | ||
| // integration several real modules wired together. Still hermetic — never the real gemini CLI. | ||
| // e2e the real gemini CLI driven through the real MCP server over stdio. Opt-in (live). | ||
|
jamubc marked this conversation as resolved.
jamubc marked this conversation as resolved.
|
||
| // judge live Gemini CLI output evaluated by a second LLM judge. Opt-in (live). | ||
| import { spawnSync } from "node:child_process"; | ||
| import { readdirSync, statSync, existsSync } from "node:fs"; | ||
| import path from "node:path"; | ||
| import { fileURLToPath } from "node:url"; | ||
|
|
||
| const scriptDir = path.dirname(fileURLToPath(import.meta.url)); | ||
| const testDir = path.join(scriptDir, "..", "test"); | ||
|
|
||
| const KNOWN = ["unit", "integration", "e2e", "judge"]; | ||
| const DEFAULT = ["unit", "integration"]; // the hermetic suite `npm test` runs and CI gates on | ||
|
|
||
| function resolveCategories(argv) { | ||
| const args = argv.slice(2).map((a) => a.toLowerCase()); | ||
| if (args.length === 0) return DEFAULT; | ||
| if (args.includes("all")) return KNOWN; | ||
| const unknown = args.filter((a) => !KNOWN.includes(a)); | ||
| if (unknown.length > 0) { | ||
| console.error(`Unknown test category: ${unknown.join(", ")}`); | ||
| console.error(`Valid categories: ${KNOWN.join(", ")}, all`); | ||
| process.exit(2); | ||
| } | ||
| // De-dupe while preserving the documented order. | ||
| return KNOWN.filter((c) => args.includes(c)); | ||
| } | ||
|
|
||
| function findTests(dir) { | ||
| const found = []; | ||
| if (!existsSync(dir)) return found; | ||
| for (const entry of readdirSync(dir)) { | ||
| const full = path.join(dir, entry); | ||
| if (statSync(full).isDirectory()) found.push(...findTests(full)); | ||
| else if (entry.endsWith(".test.ts")) found.push(full); | ||
| } | ||
| return found; | ||
| } | ||
|
|
||
| const categories = resolveCategories(process.argv); | ||
| const tests = categories.flatMap((c) => findTests(path.join(testDir, c))); | ||
|
|
||
| if (tests.length === 0) { | ||
| console.log(`No test files found for: ${categories.join(", ")}`); | ||
| process.exit(0); | ||
| } | ||
|
|
||
| console.log(`Running ${tests.length} test file(s) [${categories.join(", ")}]`); | ||
|
|
||
| // tsx requires Node >= 18.19 which always supports --import. | ||
| // The older --loader flag is deprecated and breaks on CI (Node 18.19+/20/22). | ||
| const loaderArgs = ["--import", "tsx"]; | ||
|
|
||
| // Mute routine [GMCPT] logging for the hermetic categories so the reporter | ||
| // output stays readable. The e2e suite keeps full server logs (its child | ||
| // server process inherits this env), which is useful for debugging live calls. | ||
| const env = { ...process.env }; | ||
| if (!categories.includes("e2e")) env.NODE_ENV = "test"; | ||
|
|
||
| // Run test files serially (--test-concurrency=1). The changeMode chunk cache is | ||
| // a single shared on-disk dir (os.tmpdir()/gemini-mcp-chunks); files that touch | ||
| // it (chunkCache, changeMode-pipeline) would otherwise race across parallel | ||
| // worker processes. Serial e2e also avoids hitting the gemini quota in parallel. | ||
| // The hermetic suite is tiny, so the cost is negligible. (Flag available on the | ||
| // Node 18.19+/20.10+/22 versions CI runs.) | ||
| const result = spawnSync( | ||
| process.execPath, | ||
| [...loaderArgs, "--test", "--test-concurrency=1", ...tests], | ||
| { stdio: "inherit", env }, | ||
| ); | ||
| process.exit(result.status ?? 1); | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.