Skip to content

Commit

Permalink
feat: strip (#111)
Browse files Browse the repository at this point in the history
  • Loading branch information
crowlKats authored Mar 8, 2024
1 parent 700b143 commit c1a3902
Show file tree
Hide file tree
Showing 8 changed files with 196 additions and 19 deletions.
148 changes: 138 additions & 10 deletions mod.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,13 @@ export class Renderer extends Marked.Renderer {
}
}

const BLOCK_MATH_REGEXP = /\$\$\s(.+?)\s\$\$/g;
const INLINE_MATH_REGEXP = /\s\$((?=\S).*?(?=\S))\$/g;

/** Convert inline and block math to katex */
function mathify(markdown: string) {
// Deal with block math
markdown = markdown.replace(/\$\$\s(.+?)\s\$\$/g, (match, p1) => {
markdown = markdown.replace(BLOCK_MATH_REGEXP, (match, p1) => {
try {
return katex.renderToString(p1.trim(), { displayMode: true });
} catch (e) {
Expand All @@ -102,7 +105,7 @@ function mathify(markdown: string) {
});

// Deal with inline math
markdown = markdown.replace(/\s\$((?=\S).*?(?=\S))\$/g, (match, p1) => {
markdown = markdown.replace(INLINE_MATH_REGEXP, (match, p1) => {
try {
return " " + katex.renderToString(p1, { displayMode: false });
} catch (e) {
Expand All @@ -115,6 +118,17 @@ function mathify(markdown: string) {
return markdown;
}

function getOpts(opts: RenderOptions) {
return {
baseUrl: opts.baseUrl,
breaks: opts.breaks ?? false,
gfm: true,
mangle: false,
renderer: opts.renderer ? opts.renderer : new Renderer(opts),
async: false,
};
}

export interface RenderOptions {
baseUrl?: string;
mediaBaseUrl?: string;
Expand All @@ -136,14 +150,7 @@ export function render(markdown: string, opts: RenderOptions = {}): string {
markdown = mathify(markdown);
}

const marked_opts = {
baseUrl: opts.baseUrl,
breaks: opts.breaks ?? false,
gfm: true,
mangle: false,
renderer: opts.renderer ? opts.renderer : new Renderer(opts),
async: false,
};
const marked_opts = getOpts(opts);

const html =
(opts.inline
Expand Down Expand Up @@ -336,3 +343,124 @@ function mergeAttributes(
}
return merged;
}

function stripTokens(tokens: Marked.Token[]): string {
let out = "";
for (const token of tokens) {
if ("tokens" in token && token.tokens) {
out += stripTokens(token.tokens);
}

switch (token.type) {
case "space":
out += token.raw;
break;
case "code":
if (token.lang != "math") {
out += token.text;
}
break;
case "heading":
out += "\n\n";
break;
case "table":
for (const cell of token.header) {
out += stripTokens(cell.tokens) + " ";
}
out += "\n";
for (const row of token.rows) {
for (const cell of row) {
out += stripTokens(cell.tokens) + " ";
}
out += "\n";
}
break;
case "hr":
break;
case "blockquote":
break;
case "list":
out += stripTokens(token.items);
break;
case "list_item":
out += "\n";
break;
case "paragraph":
break;
case "html": {
// TODO: extract alt from img
out += sanitizeHtml(token.text, {
allowedTags: [],
allowedAttributes: {},
}).trim() + "\n\n";
break;
}
case "text":
if (!("tokens" in token) || !token.tokens) {
out += token.raw;
}
break;
case "def":
break;
case "escape":
break;
case "link":
break;
case "image":
if (token.title) {
out += token.title;
} else {
out += token.text;
}
break;
case "strong":
break;
case "em":
break;
case "codespan":
out += token.text;
break;
case "br":
break;
case "del":
break;
}
}

return out;
}

class StripTokenizer extends Marked.Tokenizer {
codespan(src: string): Marked.Tokens.Codespan | undefined {
// copied & modified from Marked to remove escaping
const cap = this.rules.inline.code.exec(src);
if (cap) {
let text = cap[2].replace(/\n/g, " ");
const hasNonSpaceChars = /[^ ]/.test(text);
const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
text = text.substring(1, text.length - 1);
}
return {
type: "codespan",
raw: cap[0],
text,
};
}
}
}

/**
* Strip all markdown syntax to get a plaintext output
*/
export function strip(markdown: string, opts: RenderOptions = {}): string {
markdown = emojify(markdown).replace(BLOCK_MATH_REGEXP, "").replace(
INLINE_MATH_REGEXP,
"",
);
const tokens = Marked.marked.lexer(markdown, {
...getOpts(opts),
tokenizer: new StripTokenizer(),
});
return stripTokens(tokens).trim().replace(/\n{3,}/g, "\n") + "\n";
}
13 changes: 13 additions & 0 deletions test/fixtures/alerts.strip
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Alerts

Note
Highlights information that users should take into account, even when
skimming.
Tip
Optional information to help a user be more successful.
Important
Crucial information necessary for users to succeed.
Warning
Critical content demanding immediate user attention due to potential risks.
Caution
Negative potential consequences of an action.
5 changes: 5 additions & 0 deletions test/fixtures/basic.strip
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Heading

list1
list2
list3
8 changes: 8 additions & 0 deletions test/fixtures/detailsSummaryDel.strip
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Example

Shopping list

Vegetables
Fruits
Fish
tofu
Empty file added test/fixtures/lineBreaks.strip
Empty file.
2 changes: 2 additions & 0 deletions test/fixtures/math.strip
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Block math:
Inline math:
3 changes: 3 additions & 0 deletions test/fixtures/taskList.strip
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Normal list
done
not done
36 changes: 27 additions & 9 deletions test/test.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import { assertEquals, assertStringIncludes } from "@std/assert";
import { DOMParser } from "https://deno.land/x/[email protected]/deno-dom-wasm.ts";
import { render, Renderer } from "../mod.ts";
import { render, Renderer, strip } from "../mod.ts";

Deno.test("Basic markdown", async () => {
const markdown = await Deno.readTextFile("./test/fixtures/basic.md");
const expected = await Deno.readTextFile("./test/fixtures/basic.html");
const expectedHTML = await Deno.readTextFile("./test/fixtures/basic.html");
const expectedStrip = await Deno.readTextFile("./test/fixtures/basic.strip");
const html = render(markdown);
assertEquals(html, expected);
assertEquals(html, expectedHTML);
assertEquals(strip(markdown), expectedStrip);

const document = new DOMParser().parseFromString(html, "text/html");
assertEquals(document?.querySelector("h1")?.textContent, "Heading");
Expand All @@ -16,8 +18,11 @@ Deno.test("Basic markdown", async () => {
Deno.test("Math rendering", async () => {
const math = await Deno.readTextFile("./test/fixtures/math.md");
const expected = await Deno.readTextFile("./test/fixtures/math.html");
const expectedStrip = await Deno.readTextFile("./test/fixtures/math.strip");
const html = render(math, { allowMath: true });
assertEquals(html, expected);
assertEquals(strip(math), expectedStrip);

const document = new DOMParser().parseFromString(html, "text/html");
assertEquals(
document?.querySelector(".katex-mathml")?.textContent,
Expand Down Expand Up @@ -101,9 +106,13 @@ Deno.test(
"alerts rendering",
async () => {
const markdown = await Deno.readTextFile("./test/fixtures/alerts.md");
const expected = await Deno.readTextFile("./test/fixtures/alerts.html");
const expectedHTML = await Deno.readTextFile("./test/fixtures/alerts.html");
const expectedStrip = await Deno.readTextFile(
"./test/fixtures/alerts.strip",
);
const html = render(markdown);
assertEquals(html, expected);
assertEquals(html, expectedHTML);
assertEquals(strip(html), expectedStrip);
},
);

Expand Down Expand Up @@ -349,12 +358,16 @@ Deno.test("details, summary, and del", () => {
</details>
`;
const expected = Deno.readTextFileSync(
const expectedHTML = Deno.readTextFileSync(
"./test/fixtures/detailsSummaryDel.html",
);
const expectedStrip = Deno.readTextFileSync(
"./test/fixtures/detailsSummaryDel.strip",
);

const html = render(markdown);
assertEquals(html, expected);
assertEquals(html, expectedHTML);
assertEquals(strip(markdown), expectedStrip);
});

Deno.test("del tag test", () => {
Expand All @@ -363,6 +376,7 @@ Deno.test("del tag test", () => {

const html = render(markdown);
assertEquals(html, result);
assertEquals(strip(markdown), "tofu\n");
});

Deno.test("h1 test", () => {
Expand All @@ -372,6 +386,7 @@ Deno.test("h1 test", () => {

const html = render(markdown);
assertEquals(html, result);
assertEquals(strip(markdown), "Hello\n");
});

Deno.test("svg test", () => {
Expand All @@ -380,16 +395,19 @@ Deno.test("svg test", () => {

const html = render(markdown);
assertEquals(html, result);
assertEquals(strip(markdown), "\n");
});

Deno.test("task list", () => {
const markdown = `- Normal list
- [x] done
- [ ] not done`;
const expected = Deno.readTextFileSync("./test/fixtures/taskList.html");
const expectedHTML = Deno.readTextFileSync("./test/fixtures/taskList.html");
const expectedStrip = Deno.readTextFileSync("./test/fixtures/taskList.strip");

const html = render(markdown);
assertEquals(html, expected);
assertEquals(html, expectedHTML);
assertEquals(strip(markdown), expectedStrip);
});

Deno.test("anchor test raw", () => {
Expand Down

0 comments on commit c1a3902

Please sign in to comment.