Skip to content

Commit c1a3902

Browse files
authored
feat: strip (#111)
1 parent 700b143 commit c1a3902

8 files changed

+196
-19
lines changed

mod.ts

+138-10
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,13 @@ export class Renderer extends Marked.Renderer {
8888
}
8989
}
9090

91+
const BLOCK_MATH_REGEXP = /\$\$\s(.+?)\s\$\$/g;
92+
const INLINE_MATH_REGEXP = /\s\$((?=\S).*?(?=\S))\$/g;
93+
9194
/** Convert inline and block math to katex */
9295
function mathify(markdown: string) {
9396
// Deal with block math
94-
markdown = markdown.replace(/\$\$\s(.+?)\s\$\$/g, (match, p1) => {
97+
markdown = markdown.replace(BLOCK_MATH_REGEXP, (match, p1) => {
9598
try {
9699
return katex.renderToString(p1.trim(), { displayMode: true });
97100
} catch (e) {
@@ -102,7 +105,7 @@ function mathify(markdown: string) {
102105
});
103106

104107
// Deal with inline math
105-
markdown = markdown.replace(/\s\$((?=\S).*?(?=\S))\$/g, (match, p1) => {
108+
markdown = markdown.replace(INLINE_MATH_REGEXP, (match, p1) => {
106109
try {
107110
return " " + katex.renderToString(p1, { displayMode: false });
108111
} catch (e) {
@@ -115,6 +118,17 @@ function mathify(markdown: string) {
115118
return markdown;
116119
}
117120

121+
function getOpts(opts: RenderOptions) {
122+
return {
123+
baseUrl: opts.baseUrl,
124+
breaks: opts.breaks ?? false,
125+
gfm: true,
126+
mangle: false,
127+
renderer: opts.renderer ? opts.renderer : new Renderer(opts),
128+
async: false,
129+
};
130+
}
131+
118132
export interface RenderOptions {
119133
baseUrl?: string;
120134
mediaBaseUrl?: string;
@@ -136,14 +150,7 @@ export function render(markdown: string, opts: RenderOptions = {}): string {
136150
markdown = mathify(markdown);
137151
}
138152

139-
const marked_opts = {
140-
baseUrl: opts.baseUrl,
141-
breaks: opts.breaks ?? false,
142-
gfm: true,
143-
mangle: false,
144-
renderer: opts.renderer ? opts.renderer : new Renderer(opts),
145-
async: false,
146-
};
153+
const marked_opts = getOpts(opts);
147154

148155
const html =
149156
(opts.inline
@@ -336,3 +343,124 @@ function mergeAttributes(
336343
}
337344
return merged;
338345
}
346+
347+
function stripTokens(tokens: Marked.Token[]): string {
348+
let out = "";
349+
for (const token of tokens) {
350+
if ("tokens" in token && token.tokens) {
351+
out += stripTokens(token.tokens);
352+
}
353+
354+
switch (token.type) {
355+
case "space":
356+
out += token.raw;
357+
break;
358+
case "code":
359+
if (token.lang != "math") {
360+
out += token.text;
361+
}
362+
break;
363+
case "heading":
364+
out += "\n\n";
365+
break;
366+
case "table":
367+
for (const cell of token.header) {
368+
out += stripTokens(cell.tokens) + " ";
369+
}
370+
out += "\n";
371+
for (const row of token.rows) {
372+
for (const cell of row) {
373+
out += stripTokens(cell.tokens) + " ";
374+
}
375+
out += "\n";
376+
}
377+
break;
378+
case "hr":
379+
break;
380+
case "blockquote":
381+
break;
382+
case "list":
383+
out += stripTokens(token.items);
384+
break;
385+
case "list_item":
386+
out += "\n";
387+
break;
388+
case "paragraph":
389+
break;
390+
case "html": {
391+
// TODO: extract alt from img
392+
out += sanitizeHtml(token.text, {
393+
allowedTags: [],
394+
allowedAttributes: {},
395+
}).trim() + "\n\n";
396+
break;
397+
}
398+
case "text":
399+
if (!("tokens" in token) || !token.tokens) {
400+
out += token.raw;
401+
}
402+
break;
403+
case "def":
404+
break;
405+
case "escape":
406+
break;
407+
case "link":
408+
break;
409+
case "image":
410+
if (token.title) {
411+
out += token.title;
412+
} else {
413+
out += token.text;
414+
}
415+
break;
416+
case "strong":
417+
break;
418+
case "em":
419+
break;
420+
case "codespan":
421+
out += token.text;
422+
break;
423+
case "br":
424+
break;
425+
case "del":
426+
break;
427+
}
428+
}
429+
430+
return out;
431+
}
432+
433+
class StripTokenizer extends Marked.Tokenizer {
434+
codespan(src: string): Marked.Tokens.Codespan | undefined {
435+
// copied & modified from Marked to remove escaping
436+
const cap = this.rules.inline.code.exec(src);
437+
if (cap) {
438+
let text = cap[2].replace(/\n/g, " ");
439+
const hasNonSpaceChars = /[^ ]/.test(text);
440+
const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
441+
if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
442+
text = text.substring(1, text.length - 1);
443+
}
444+
return {
445+
type: "codespan",
446+
raw: cap[0],
447+
text,
448+
};
449+
}
450+
}
451+
}
452+
453+
/**
454+
* Strip all markdown syntax to get a plaintext output
455+
*/
456+
export function strip(markdown: string, opts: RenderOptions = {}): string {
457+
markdown = emojify(markdown).replace(BLOCK_MATH_REGEXP, "").replace(
458+
INLINE_MATH_REGEXP,
459+
"",
460+
);
461+
const tokens = Marked.marked.lexer(markdown, {
462+
...getOpts(opts),
463+
tokenizer: new StripTokenizer(),
464+
});
465+
return stripTokens(tokens).trim().replace(/\n{3,}/g, "\n") + "\n";
466+
}

test/fixtures/alerts.strip

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Alerts
2+
3+
Note
4+
Highlights information that users should take into account, even when
5+
skimming.
6+
Tip
7+
Optional information to help a user be more successful.
8+
Important
9+
Crucial information necessary for users to succeed.
10+
Warning
11+
Critical content demanding immediate user attention due to potential risks.
12+
Caution
13+
Negative potential consequences of an action.

test/fixtures/basic.strip

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Heading
2+
3+
list1
4+
list2
5+
list3

test/fixtures/detailsSummaryDel.strip

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Example
2+
3+
Shopping list
4+
5+
Vegetables
6+
Fruits
7+
Fish
8+
tofu

test/fixtures/lineBreaks.strip

Whitespace-only changes.

test/fixtures/math.strip

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Block math:
2+
Inline math:

test/fixtures/taskList.strip

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Normal list
2+
done
3+
not done

test/test.ts

+27-9
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import { assertEquals, assertStringIncludes } from "@std/assert";
22
import { DOMParser } from "https://deno.land/x/[email protected]/deno-dom-wasm.ts";
3-
import { render, Renderer } from "../mod.ts";
3+
import { render, Renderer, strip } from "../mod.ts";
44

55
Deno.test("Basic markdown", async () => {
66
const markdown = await Deno.readTextFile("./test/fixtures/basic.md");
7-
const expected = await Deno.readTextFile("./test/fixtures/basic.html");
7+
const expectedHTML = await Deno.readTextFile("./test/fixtures/basic.html");
8+
const expectedStrip = await Deno.readTextFile("./test/fixtures/basic.strip");
89
const html = render(markdown);
9-
assertEquals(html, expected);
10+
assertEquals(html, expectedHTML);
11+
assertEquals(strip(markdown), expectedStrip);
1012

1113
const document = new DOMParser().parseFromString(html, "text/html");
1214
assertEquals(document?.querySelector("h1")?.textContent, "Heading");
@@ -16,8 +18,11 @@ Deno.test("Basic markdown", async () => {
1618
Deno.test("Math rendering", async () => {
1719
const math = await Deno.readTextFile("./test/fixtures/math.md");
1820
const expected = await Deno.readTextFile("./test/fixtures/math.html");
21+
const expectedStrip = await Deno.readTextFile("./test/fixtures/math.strip");
1922
const html = render(math, { allowMath: true });
2023
assertEquals(html, expected);
24+
assertEquals(strip(math), expectedStrip);
25+
2126
const document = new DOMParser().parseFromString(html, "text/html");
2227
assertEquals(
2328
document?.querySelector(".katex-mathml")?.textContent,
@@ -101,9 +106,13 @@ Deno.test(
101106
"alerts rendering",
102107
async () => {
103108
const markdown = await Deno.readTextFile("./test/fixtures/alerts.md");
104-
const expected = await Deno.readTextFile("./test/fixtures/alerts.html");
109+
const expectedHTML = await Deno.readTextFile("./test/fixtures/alerts.html");
110+
const expectedStrip = await Deno.readTextFile(
111+
"./test/fixtures/alerts.strip",
112+
);
105113
const html = render(markdown);
106-
assertEquals(html, expected);
114+
assertEquals(html, expectedHTML);
115+
assertEquals(strip(html), expectedStrip);
107116
},
108117
);
109118

@@ -349,12 +358,16 @@ Deno.test("details, summary, and del", () => {
349358
350359
</details>
351360
`;
352-
const expected = Deno.readTextFileSync(
361+
const expectedHTML = Deno.readTextFileSync(
353362
"./test/fixtures/detailsSummaryDel.html",
354363
);
364+
const expectedStrip = Deno.readTextFileSync(
365+
"./test/fixtures/detailsSummaryDel.strip",
366+
);
355367

356368
const html = render(markdown);
357-
assertEquals(html, expected);
369+
assertEquals(html, expectedHTML);
370+
assertEquals(strip(markdown), expectedStrip);
358371
});
359372

360373
Deno.test("del tag test", () => {
@@ -363,6 +376,7 @@ Deno.test("del tag test", () => {
363376

364377
const html = render(markdown);
365378
assertEquals(html, result);
379+
assertEquals(strip(markdown), "tofu\n");
366380
});
367381

368382
Deno.test("h1 test", () => {
@@ -372,6 +386,7 @@ Deno.test("h1 test", () => {
372386

373387
const html = render(markdown);
374388
assertEquals(html, result);
389+
assertEquals(strip(markdown), "Hello\n");
375390
});
376391

377392
Deno.test("svg test", () => {
@@ -380,16 +395,19 @@ Deno.test("svg test", () => {
380395

381396
const html = render(markdown);
382397
assertEquals(html, result);
398+
assertEquals(strip(markdown), "\n");
383399
});
384400

385401
Deno.test("task list", () => {
386402
const markdown = `- Normal list
387403
- [x] done
388404
- [ ] not done`;
389-
const expected = Deno.readTextFileSync("./test/fixtures/taskList.html");
405+
const expectedHTML = Deno.readTextFileSync("./test/fixtures/taskList.html");
406+
const expectedStrip = Deno.readTextFileSync("./test/fixtures/taskList.strip");
390407

391408
const html = render(markdown);
392-
assertEquals(html, expected);
409+
assertEquals(html, expectedHTML);
410+
assertEquals(strip(markdown), expectedStrip);
393411
});
394412

395413
Deno.test("anchor test raw", () => {

0 commit comments

Comments
 (0)