From 339fd82850b4e6a6d10a5f86b9cbcaf21cd734ef Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 23 Jun 2025 16:43:17 +0000 Subject: [PATCH 1/2] feat: expand MDX components in markdown routes - Add expandMdxComponents function to convert custom components to markdown - Handle TSFetchCodeBlock, CodeGroup, and Template components - Apply template variable replacement using existing applyTemplates function - Process components before stripping MDX features in convertToLlmTxtMarkdown - Support global template variables like FREE_MODEL_CREDITS_THRESHOLD - Maintain backward compatibility with existing markdown processing Co-Authored-By: sam --- .../fern-docs/bundle/src/server/llm-txt-md.ts | 159 +++++++++++++++++- 1 file changed, 158 insertions(+), 1 deletion(-) diff --git a/packages/fern-docs/bundle/src/server/llm-txt-md.ts b/packages/fern-docs/bundle/src/server/llm-txt-md.ts index 94044a4273..fae6143c5c 100644 --- a/packages/fern-docs/bundle/src/server/llm-txt-md.ts +++ b/packages/fern-docs/bundle/src/server/llm-txt-md.ts @@ -6,6 +6,7 @@ import { toTree, visit, } from "@fern-docs/mdx"; +import { applyTemplates } from "../mdx/components/code/Template"; export function convertToLlmTxtMarkdown( markdown: string, @@ -22,12 +23,168 @@ export function convertToLlmTxtMarkdown( return [ `# ${title}`, description != null ? `> ${description}` : undefined, - stripMdxFeatures(content, format), + stripMdxFeatures(expandMdxComponents(content, format), format), ] .filter(isNonNullish) .join("\n\n"); } +/** + * Expands custom MDX components to their semantic markdown equivalents + * before stripping MDX features. This ensures components like TSFetchCodeBlock + * and Template are converted to readable content for LLM consumption. + */ +function expandMdxComponents(markdown: string, format: "mdx" | "md"): string { + if (format !== "mdx") { + return markdown; + } + + const { mdast } = toTree(markdown, { + format, + sanitize: true, + }); + + visit(mdast, (node, idx, parent) => { + if (parent == null || idx == null) { + return; + } + + if (isMdxJsxElementHast(node)) { + if (node.name === "TSFetchCodeBlock") { + const codeContent = extractCodeFromTSFetchCodeBlock(node); + if (codeContent) { + const codeBlock = { + type: "code", + lang: "typescript", + value: codeContent, + }; + parent.children[idx] = codeBlock; + } + return; + } + + if (node.name === "CodeGroup") { + const codeBlocks = extractCodeFromCodeGroup(node); + if (codeBlocks.length > 0) { + parent.children.splice(idx, 1, ...codeBlocks); + return idx + codeBlocks.length - 1; + } + return; + } + + if (node.name === "Template") { + const templateData = extractTemplateData(node); + if (templateData && node.children) { + const childrenMarkdown = mdastToMarkdown({ type: "root", children: node.children }); + const expandedContent = applyTemplates(childrenMarkdown, templateData); + + const { mdast: expandedMdast } = toTree(expandedContent, { format: "md", sanitize: true }); + if (expandedMdast.children) { + parent.children.splice(idx, 1, ...expandedMdast.children); + return idx + expandedMdast.children.length - 1; + } + } + return; + } + } + + return; + }); + + let expandedMarkdown = mdastToMarkdown(mdast); + + const templateData = extractGlobalTemplateData(markdown); + if (templateData && Object.keys(templateData).length > 0) { + expandedMarkdown = applyTemplates(expandedMarkdown, templateData); + } + + return expandedMarkdown; +} + +/** + * Extract code content from TSFetchCodeBlock component + */ +function extractCodeFromTSFetchCodeBlock(node: any): string | null { + if (node.children && node.children.length > 0) { + const codeChild = node.children.find((child: any) => child.type === "text" || child.type === "code"); + if (codeChild) { + return codeChild.value || codeChild.children?.[0]?.value || ""; + } + } + + const srcAttr = node.attributes?.find((attr: any) => attr.name === "src"); + const contentAttr = node.attributes?.find((attr: any) => attr.name === "content"); + + if (contentAttr?.value) { + return contentAttr.value; + } + + if (srcAttr?.value) { + return `// Code from: ${srcAttr.value}`; + } + + return null; +} + +/** + * Extract code blocks from CodeGroup component + */ +function extractCodeFromCodeGroup(node: any): any[] { + const codeBlocks: any[] = []; + + if (node.children) { + node.children.forEach((child: any, index: number) => { + if (child.type === "code" || (child.type === "element" && child.tagName === "code")) { + codeBlocks.push({ + type: "code", + lang: child.lang || "text", + value: child.value || child.children?.[0]?.value || "", + }); + } else if (isMdxJsxElementHast(child) && child.name === "Code") { + const lang = child.attributes?.find((attr: any) => attr.name === "language")?.value || "text"; + const content = child.children?.[0]?.value || ""; + codeBlocks.push({ + type: "code", + lang, + value: content, + }); + } + }); + } + + return codeBlocks; +} + +/** + * Extract template data from Template component attributes + */ +function extractTemplateData(node: any): Record | null { + const dataAttr = node.attributes?.find((attr: any) => attr.name === "data"); + if (dataAttr?.value && typeof dataAttr.value === "object") { + return dataAttr.value; + } + return null; +} + +/** + * Extract global template variables from markdown content + * This handles common OpenRouter template variables + */ +function extractGlobalTemplateData(markdown: string): Record { + const templateData: Record = { + FREE_MODEL_CREDITS_THRESHOLD: "10", // Example value + API_KEY_REF: "your-api-key", + BASE_URL: "https://openrouter.ai/api/v1", + }; + + const { data: frontmatter } = getFrontmatter(markdown); + if (frontmatter.templateData) { + Object.assign(templateData, frontmatter.templateData); + } + + return templateData; +} + /** * This is a living list of mdx features that we don't want to include in the LLM TXT format: * - esm imports From aeca96cba6664b804402ff8e7f3b917d94c3fa8d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 23 Jun 2025 20:12:12 +0000 Subject: [PATCH 2/2] test: add comprehensive tests for MDX component expansion - Test expandMdxComponents function with various MDX inputs - Test TSFetchCodeBlock, CodeGroup, and Template component expansion - Test global template variable replacement - Test edge cases and malformed components - Test backward compatibility with non-MDX content - Cover integration with convertToLlmTxtMarkdown function Co-Authored-By: sam --- .../bundle/src/server/llm-txt-md.test.ts | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 packages/fern-docs/bundle/src/server/llm-txt-md.test.ts diff --git a/packages/fern-docs/bundle/src/server/llm-txt-md.test.ts b/packages/fern-docs/bundle/src/server/llm-txt-md.test.ts new file mode 100644 index 0000000000..77e3d407f6 --- /dev/null +++ b/packages/fern-docs/bundle/src/server/llm-txt-md.test.ts @@ -0,0 +1,186 @@ +import { convertToLlmTxtMarkdown } from "./llm-txt-md"; + +describe("llm-txt-md", () => { + describe("convertToLlmTxtMarkdown", () => { + it("should handle regular markdown without MDX components", () => { + const markdown = "# Test\n\nThis is regular markdown."; + const result = convertToLlmTxtMarkdown(markdown, "Test Page", "md"); + + expect(result).toContain("# Test Page"); + expect(result).toContain("This is regular markdown."); + }); + + it("should expand TSFetchCodeBlock components to code blocks", () => { + const markdown = `# Test + + +console.log("Hello, world!"); +`; + + const result = convertToLlmTxtMarkdown(markdown, "Test Page", "mdx"); + + expect(result).toContain("# Test Page"); + expect(result).toContain("```typescript"); + expect(result).toContain('console.log("Hello, world!");'); + expect(result).not.toContain(""); + }); + + it("should expand CodeGroup components to multiple code blocks", () => { + const markdown = `# Test + + + +console.log("JS code"); + + +print("Python code") + +`; + + const result = convertToLlmTxtMarkdown(markdown, "Test Page", "mdx"); + + expect(result).toContain("# Test Page"); + expect(result).toContain("```javascript"); + expect(result).toContain('console.log("JS code");'); + expect(result).toContain("```python"); + expect(result).toContain('print("Python code")'); + expect(result).not.toContain(""); + expect(result).not.toContain(" { + const markdown = `# Test + +`; + + const result = convertToLlmTxtMarkdown(markdown, "Test Page", "mdx"); + + expect(result).toContain("# Test Page"); + expect(result).toContain("Use your API key: test-key-123"); + expect(result).toContain("Base URL: https://api.example.com"); + expect(result).not.toContain(" { + const markdown = `# Test + +Your free credits threshold is {{FREE_MODEL_CREDITS_THRESHOLD}}. +Use API key: {{API_KEY_REF}}`; + + const result = convertToLlmTxtMarkdown(markdown, "Test Page", "mdx"); + + expect(result).toContain("# Test Page"); + expect(result).toContain("Your free credits threshold is 10"); + expect(result).toContain("Use API key: your-api-key"); + expect(result).not.toContain("{{FREE_MODEL_CREDITS_THRESHOLD}}"); + expect(result).not.toContain("{{API_KEY_REF}}"); + }); + + it("should handle mixed content with multiple component types", () => { + const markdown = `# Mixed Content Test + +Regular markdown paragraph. + + +const apiKey = "{{API_KEY_REF}}"; + + + + + + +curl -H "Authorization: Bearer {{API_KEY_REF}}" + + + +More regular content with {{FREE_MODEL_CREDITS_THRESHOLD}} credits.`; + + const result = convertToLlmTxtMarkdown(markdown, "Mixed Test", "mdx"); + + expect(result).toContain("# Mixed Test"); + expect(result).toContain("Regular markdown paragraph."); + expect(result).toContain("```typescript"); + expect(result).toContain('const apiKey = "your-api-key";'); + expect(result).toContain("Hello Alice!"); + expect(result).toContain("```bash"); + expect(result).toContain('curl -H "Authorization: Bearer your-api-key"'); + expect(result).toContain("More regular content with 10 credits."); + + expect(result).not.toContain(""); + expect(result).not.toContain(""); + expect(result).not.toContain("{{"); + }); + + it("should handle empty or malformed components gracefully", () => { + const markdown = `# Edge Cases + + + + + + + +Regular content continues.`; + + const result = convertToLlmTxtMarkdown(markdown, "Edge Cases", "mdx"); + + expect(result).toContain("# Edge Cases"); + expect(result).toContain("Regular content continues."); + expect(result).not.toContain(""); + expect(result).not.toContain(""); + expect(result).not.toContain("