diff --git a/mcp-servers.yaml b/mcp-servers.yaml index d0927da..a23d600 100644 --- a/mcp-servers.yaml +++ b/mcp-servers.yaml @@ -4,3 +4,6 @@ servers: args: ["-y", "@brave/brave-search-mcp-server"] env: BRAVE_API_KEY: ${BRAVE_API_KEY} + firecrawl: + url: ${FIRECRAWL_MCP_URL} + tools: [firecrawl_scrape, firecrawl_search] diff --git a/src/agent/mcp-client.test.ts b/src/agent/mcp-client.test.ts new file mode 100644 index 0000000..acb39f0 --- /dev/null +++ b/src/agent/mcp-client.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, test } from "bun:test"; +import { z } from "zod"; + +import { applySchemaOverrides, SCHEMA_OVERRIDES } from "./mcp-client.js"; + +// Regression guard for the draft-2020-12 fix: Firecrawl's MCP tools ship a JSON +// Schema (draft 2020-12) that Mastra's validator can't resolve, so we swap in a +// clean Zod schema by MUTATING the Tool instance in place. These tests pin that +// behavior — if the mutation stops taking effect, or an override silently fails +// to match a renamed tool, the bug returns invisibly in prod. + +// inputSchema typed as `unknown` to mirror the real Tool (the override replaces +// a JSON-schema object with a Zod schema, so the field must accept both). +const fakeTool = (): { inputSchema: unknown } => ({ + inputSchema: { $schema: "https://json-schema.org/draft/2020-12/schema" }, +}); + +describe("applySchemaOverrides", () => { + test("mutates the matching tool instance in place (same reference)", () => { + const scrape = fakeTool(); + const tools = { firecrawl_firecrawl_scrape: scrape }; + const override = z.object({ url: z.string() }); + + const applied = applySchemaOverrides(tools, { firecrawl_firecrawl_scrape: override }); + + expect(applied).toEqual(["firecrawl_firecrawl_scrape"]); + // Same object identity — proves we mutated, not replaced (the load-bearing bit). + expect(tools.firecrawl_firecrawl_scrape).toBe(scrape); + expect(scrape.inputSchema).toBe(override); + }); + + test("warns when the server is loaded but the tool id is missing (rename)", () => { + const warnings: string[] = []; + const tools = { firecrawl_some_other_tool: fakeTool() }; + + const applied = applySchemaOverrides( + tools, + { firecrawl_firecrawl_scrape: z.object({ url: z.string() }) }, + (m) => warnings.push(m), + ); + + expect(applied).toEqual([]); + expect(warnings).toHaveLength(1); + expect(warnings[0]).toContain("firecrawl_firecrawl_scrape"); + }); + + test("stays silent when the override's server isn't loaded at all", () => { + const warnings: string[] = []; + const tools = { "brave-search_brave_web_search": fakeTool() }; + + applySchemaOverrides( + tools, + { firecrawl_firecrawl_scrape: z.object({ url: z.string() }) }, + (m) => warnings.push(m), + ); + + expect(warnings).toEqual([]); + }); + + test("the real SCHEMA_OVERRIDES are valid Zod schemas keyed by serverName_toolName", () => { + for (const [id, schema] of Object.entries(SCHEMA_OVERRIDES)) { + expect(id).toContain("_"); + expect(typeof (schema as z.ZodTypeAny).parse).toBe("function"); + } + // The two ids the platform actually relies on today. + expect(Object.keys(SCHEMA_OVERRIDES)).toContain("firecrawl_firecrawl_scrape"); + expect(SCHEMA_OVERRIDES.firecrawl_firecrawl_scrape.parse({ url: "https://x.com" })).toEqual({ url: "https://x.com" }); + }); +}); diff --git a/src/agent/mcp-client.ts b/src/agent/mcp-client.ts index ecbbf75..94c73ff 100644 --- a/src/agent/mcp-client.ts +++ b/src/agent/mcp-client.ts @@ -4,11 +4,64 @@ */ import { MCPClient } from "@mastra/mcp"; import type { Tool } from "@mastra/core/tools"; +import { z } from "zod"; import { expandEnvVars } from "../config.js"; import fs from "node:fs"; import yaml from "yaml"; import { logger } from "../utils/external-logger.js"; +/** + * Clean input schemas for MCP tools whose server-provided JSON Schema declares + * draft 2020-12 ($schema: ".../2020-12/schema"). Mastra's tool-input validator + * can't resolve that meta-schema and rejects every call locally ("no schema with + * key or ref ...2020-12/schema"). Overriding inputSchema with a plain Zod schema + * sidesteps the broken validation — the MCP server still validates server-side. + * Keyed by the registered tool id (serverName_toolName). + */ +export const SCHEMA_OVERRIDES: Record = { + firecrawl_firecrawl_scrape: z.object({ + url: z.string().describe("The URL to scrape"), + formats: z.array(z.string()).optional().describe('Output formats, e.g. ["markdown"]'), + onlyMainContent: z.boolean().optional().describe("Strip nav/footer boilerplate (default true)"), + }), + firecrawl_firecrawl_search: z.object({ + query: z.string().describe("The search query"), + limit: z.number().optional().describe("Max results to return"), + }), +}; + +/** + * Apply {@link SCHEMA_OVERRIDES} to a loaded tool map, mutating each matching + * Tool instance's `inputSchema` in place. In-place is required: `Tool.execute` + * validates against `this.inputSchema` at call time and is an arrow function + * lexically bound to the instance, so a spread copy would never take effect. + * + * If an override's server is loaded but the specific tool id is absent (e.g. the + * server renamed it, or the `serverName_toolName` join changed), the override + * silently no-ops and calls regress to draft-2020-12 validation failures — so we + * warn loudly in that case. Returns the ids that were actually applied. + */ +export function applySchemaOverrides( + tools: Record, + overrides: Record = SCHEMA_OVERRIDES, + warn: (msg: string) => void = (m) => console.warn(m), +): string[] { + const applied: string[] = []; + for (const [id, schema] of Object.entries(overrides)) { + if (id in tools) { + tools[id].inputSchema = schema; + applied.push(id); + continue; + } + const serverName = id.slice(0, id.indexOf("_")); + const serverLoaded = Object.keys(tools).some((t) => t.startsWith(`${serverName}_`)); + if (serverLoaded) { + warn(`[mcp] schema override for "${id}" not applied — no tool with that id loaded (renamed?). Calls may fail draft-2020-12 validation.`); + } + } + return applied; +} + let mcpClient: MCPClient | null = null; let mcpTools: Record = {}; @@ -55,6 +108,12 @@ export async function initMCPClient( Object.entries(servers).map(([name, cfg]) => { // HTTP server (url-based) if (cfg.url) { + const resolvedUrl = expandEnvVars(cfg.url); + if (!resolvedUrl) { + // expandEnvVars yields "" for an unset var, which would throw an + // opaque "Invalid URL" — point at the missing var instead. + throw new Error(`[mcp] server "${name}" has an empty url; is its env var set? (template: ${cfg.url})`); + } const headers = cfg.headers ? Object.fromEntries( Object.entries(cfg.headers).map(([k, v]) => [k, expandEnvVars(v)]) @@ -63,7 +122,7 @@ export async function initMCPClient( return [ name, { - url: new URL(cfg.url), + url: new URL(resolvedUrl), requestInit: headers ? { headers } : undefined, }, ]; @@ -129,6 +188,9 @@ export async function initMCPClient( console.log(`[mcp] filtered ${Object.keys(allTools).length} → ${Object.keys(rawTools).length} tool(s) via whitelist`); } + // Replace broken (draft-2020-12) input schemas with clean Zod ones. + applySchemaOverrides(rawTools as Record); + // Wrap MCP tools to cap result size (prevent context overflow) const MAX_RESULT_CHARS = 30_000; mcpTools = Object.fromEntries(