Skip to content

Commit aaba5a5

Browse files
committed
feat(mcp): Improve builtin-functions docs extraction
1 parent f3845af commit aaba5a5

File tree

1 file changed

+117
-106
lines changed

1 file changed

+117
-106
lines changed

mcp/extract-builtin-functions.ts

Lines changed: 117 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -43,122 +43,133 @@ async function extractBuiltinFunctions(
4343

4444
const builtins: BuiltinFunction[] = [];
4545

46-
$("h3[id]").each((_, h3Element) => {
47-
const h3 = $(h3Element);
48-
const func = h3.find("a").first().text();
49-
50-
if (!func.startsWith("@")) {
51-
return;
52-
}
53-
54-
const signature = h3.next("pre").text().trim();
55-
const descriptionParts: string[] = [];
56-
const seeAlsoLinks: string[] = [];
57-
58-
let current = h3.next("pre").next();
59-
while (current.length && current.prop("tagName")?.toLowerCase() !== "h3") {
60-
if (current.is("p")) {
61-
const pHtml = current.html() || "";
62-
const $p = cheerio.load(pHtml, {
63-
xml: { xmlMode: false, decodeEntities: false },
64-
});
46+
// Find "Builtin Functions" section
47+
const builtinFunctionsSection = $('h2[id="Builtin-Functions"]');
48+
if (builtinFunctionsSection.length === 0) {
49+
throw new Error("Could not find Builtin Functions section in HTML");
50+
}
6551

66-
$p("a").each((_, a) => {
67-
const link = $p(a);
68-
const href = link.attr("href") || "";
69-
const text = link.text();
70-
let markdownLink: string;
71-
if (href.startsWith("#")) {
72-
markdownLink = `[${text}](https://ziglang.org/documentation/${zigVersion}/${href})`;
73-
} else {
74-
markdownLink = `[${text}](${href})`;
52+
let current = builtinFunctionsSection.next();
53+
while (current.length && current.prop("tagName")?.toLowerCase() !== "h2") {
54+
if (current.is("h3[id]")) {
55+
const h3 = current;
56+
const func = h3.find("a").first().text();
57+
58+
if (func.startsWith("@")) {
59+
const signature = h3.next("pre").text().trim();
60+
const descriptionParts: string[] = [];
61+
const seeAlsoLinks: string[] = [];
62+
63+
let descCurrent = h3.next("pre").next();
64+
while (
65+
descCurrent.length &&
66+
!["h2", "h3"].includes(descCurrent.prop("tagName")?.toLowerCase() || "")
67+
) {
68+
if (descCurrent.is("p")) {
69+
const pHtml = descCurrent.html() || "";
70+
const $p = cheerio.load(pHtml, {
71+
xml: { xmlMode: false, decodeEntities: false },
72+
});
73+
74+
$p("a").each((_, a) => {
75+
const link = $p(a);
76+
const href = link.attr("href") || "";
77+
const text = link.text();
78+
let markdownLink: string;
79+
if (href.startsWith("#")) {
80+
markdownLink = `[${text}](https://ziglang.org/documentation/${zigVersion}/${href})`;
81+
} else {
82+
markdownLink = `[${text}](${href})`;
83+
}
84+
link.replaceWith(markdownLink);
85+
});
86+
87+
$p("code").each((_, code) => {
88+
const el = $p(code);
89+
el.replaceWith(`\`${el.text()}\``);
90+
});
91+
92+
const pText = $p.root().text();
93+
descriptionParts.push(pText.replace(/\s+/g, " ").trim());
94+
} else if (descCurrent.is("ul")) {
95+
// Convert each <li> to Markdown, handling <a> and <code> tags
96+
descCurrent.children("li").each((_, li) => {
97+
const liHtml = $(li).html() || "";
98+
const $li = cheerio.load(liHtml, {
99+
xml: { xmlMode: false, decodeEntities: false },
100+
});
101+
102+
$li("a").each((_, a) => {
103+
const link = $li(a);
104+
const href = link.attr("href") || "";
105+
const text = link.text();
106+
let markdownLink: string;
107+
if (href.startsWith("#")) {
108+
markdownLink = `[${text}](https://ziglang.org/documentation/${zigVersion}/${href})`;
109+
} else {
110+
markdownLink = `[${text}](${href})`;
111+
}
112+
link.replaceWith(markdownLink);
113+
});
114+
115+
$li("code").each((_, code) => {
116+
const el = $li(code);
117+
el.replaceWith(`\`${el.text()}\``);
118+
});
119+
120+
const liText = $li.root().text().replace(/\s+/g, " ").trim();
121+
if (liText.length > 0) {
122+
descriptionParts.push(`* ${liText}`);
123+
}
124+
});
125+
} else if (descCurrent.is("figure")) {
126+
// Extract <figcaption> and <pre> content
127+
const figcaption = descCurrent.find("figcaption").first().text().trim();
128+
const pre = descCurrent.find("pre").first();
129+
const code = pre.text();
130+
let lang = "";
131+
let label = "";
132+
if (figcaption) {
133+
label = `**${figcaption}**\n`;
134+
if (figcaption.endsWith(".zig")) {
135+
lang = "zig";
136+
} else if (figcaption.toLowerCase().includes("shell")) {
137+
lang = "sh";
138+
}
139+
}
140+
if (code) {
141+
// Format as Markdown code block
142+
const codeBlock = `${label}\n\`\`\`${lang}\n${code.trim()}\n\`\`\``;
143+
descriptionParts.push(codeBlock.trim());
144+
}
75145
}
76-
link.replaceWith(markdownLink);
77-
});
78-
79-
$p("code").each((_, code) => {
80-
const el = $p(code);
81-
el.replaceWith(`\`${el.text()}\``);
82-
});
146+
descCurrent = descCurrent.next();
147+
}
83148

84-
const pText = $p.root().text();
85-
descriptionParts.push(pText.replace(/\s+/g, " ").trim());
86-
} else if (current.is("ul")) {
87-
// Convert each <li> to Markdown, handling <a> and <code> tags
88-
current.children("li").each((_, li) => {
89-
const liHtml = $(li).html() || "";
90-
const $li = cheerio.load(liHtml, {
91-
xml: { xmlMode: false, decodeEntities: false },
92-
});
93-
94-
$li("a").each((_, a) => {
95-
const link = $li(a);
96-
const href = link.attr("href") || "";
97-
const text = link.text();
98-
let markdownLink: string;
99-
if (href.startsWith("#")) {
100-
markdownLink = `[${text}](https://ziglang.org/documentation/${zigVersion}/${href})`;
101-
} else {
102-
markdownLink = `[${text}](${href})`;
103-
}
104-
link.replaceWith(markdownLink);
105-
});
149+
// Join doc blocks with a single newline and collapse multiple newlines
150+
let docs = descriptionParts.join("\n");
151+
docs = docs.replace(/\n{2,}/g, "\n").replace(/\n+$/g, "");
106152

107-
$li("code").each((_, code) => {
108-
const el = $li(code);
109-
el.replaceWith(`\`${el.text()}\``);
110-
});
153+
if (docs.toLowerCase().endsWith("see also:")) {
154+
docs = docs.slice(0, -"see also:".length).trim();
155+
}
111156

112-
const liText = $li.root().text().replace(/\s+/g, " ").trim();
113-
if (liText.length > 0) {
114-
descriptionParts.push(`* ${liText}`);
115-
}
116-
});
117-
} else if (current.is("figure")) {
118-
// Extract <figcaption> and <pre> content
119-
const figcaption = current.find("figcaption").first().text().trim();
120-
const pre = current.find("pre").first();
121-
const code = pre.text();
122-
let lang = "";
123-
let label = "";
124-
if (figcaption) {
125-
label = `**${figcaption}**\n`;
126-
if (figcaption.endsWith(".zig")) {
127-
lang = "zig";
128-
} else if (figcaption.toLowerCase().includes("shell")) {
129-
lang = "sh";
157+
if (seeAlsoLinks.length > 0) {
158+
if (docs.length > 0) {
159+
docs += "\n";
130160
}
161+
docs += `See also:\n* ${seeAlsoLinks.join("\n* ")}`;
131162
}
132-
if (code) {
133-
// Format as Markdown code block
134-
const codeBlock = `${label}\n\`\`\`${lang}\n${code.trim()}\n\`\`\``;
135-
descriptionParts.push(codeBlock.trim());
136-
}
137-
}
138-
current = current.next();
139-
}
140163

141-
// Join doc blocks with a single newline and collapse multiple newlines
142-
let docs = descriptionParts.join("\n");
143-
docs = docs.replace(/\n{2,}/g, "\n").replace(/\n+$/g, "");
144-
145-
if (docs.toLowerCase().endsWith("see also:")) {
146-
docs = docs.slice(0, -"see also:".length).trim();
147-
}
148-
149-
if (seeAlsoLinks.length > 0) {
150-
if (docs.length > 0) {
151-
docs += "\n";
164+
builtins.push({
165+
func,
166+
signature,
167+
docs,
168+
});
152169
}
153-
docs += `See also:\n* ${seeAlsoLinks.join("\n* ")}`;
154170
}
155-
156-
builtins.push({
157-
func,
158-
signature,
159-
docs,
160-
});
161-
});
171+
current = current.next();
172+
}
162173

163174
if (!fs.existsSync(versionCacheDir)) {
164175
fs.mkdirSync(versionCacheDir, { recursive: true });

0 commit comments

Comments
 (0)