Skip to content

Commit 10f42d4

Browse files
authored
feat: stripSplitBySections (#116)
1 parent 5830a85 commit 10f42d4

File tree

6 files changed

+511
-26
lines changed

6 files changed

+511
-26
lines changed

example/content.md

+13
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,19 @@ document.getElementsByTagName("head")[0].innerHTML +=
4747
</p>
4848
</details>
4949

50+
| Type | Description | example |
51+
| ---------------- | ------------------------------------------------ | --------------------------------- |
52+
| `string` | A string of characters. | `'Hello world'` |
53+
| `number` | A numeric value, either float or integer. | `42` |
54+
| `boolean` | A boolean value. | `true` |
55+
| `enum` | An enum value. | `'drama'` |
56+
| `geopoint` | A geopoint value. | `{ lat: 40.7128, lon: 74.0060 }` |
57+
| `string[]` | An array of strings. | `['red', 'green', 'blue']` |
58+
| `number[]` | An array of numbers. | `[42, 91, 28.5]` |
59+
| `boolean[]` | An array of booleans. | `[true, false, false]` |
60+
| `enum[]` | An array of enums. | `['comedy', 'action', 'romance']` |
61+
| `vector[<size>]` | A vector of numbers to perform vector search on. | `[0.403, 0.192, 0.830]` |
62+
5063
## Math rendering
5164

5265
We support code blocks with the "math" type!

mod.ts

+73-25
Original file line numberDiff line numberDiff line change
@@ -344,60 +344,81 @@ function mergeAttributes(
344344
return merged;
345345
}
346346

347-
function stripTokens(tokens: Marked.Token[]): string {
348-
let out = "";
347+
function stripTokens(
348+
tokens: Marked.Token[],
349+
sections: MarkdownSections[],
350+
header: boolean,
351+
) {
352+
let index = sections.length - 1;
353+
349354
for (const token of tokens) {
355+
if (token.type === "heading") {
356+
sections[index].header = sections[index].header.trim().replace(
357+
/\n{3,}/g,
358+
"\n",
359+
);
360+
sections[index].content = sections[index].content.trim().replace(
361+
/\n{3,}/g,
362+
"\n",
363+
);
364+
365+
sections.push({ header: "", depth: token.depth, content: "" });
366+
index += 1;
367+
}
368+
350369
if ("tokens" in token && token.tokens) {
351-
out += stripTokens(token.tokens);
370+
stripTokens(token.tokens, sections, token.type === "heading");
352371
}
353372

354373
switch (token.type) {
355374
case "space":
356-
out += token.raw;
375+
sections[index][header ? "header" : "content"] += token.raw;
357376
break;
358377
case "code":
359378
if (token.lang != "math") {
360-
out += token.text;
379+
sections[index][header ? "header" : "content"] += token.text;
361380
}
362381
break;
363382
case "heading":
364-
out += "\n\n";
365383
break;
366384
case "table":
367385
for (const cell of token.header) {
368-
out += stripTokens(cell.tokens) + " ";
386+
stripTokens(cell.tokens, sections, header);
387+
sections[index][header ? "header" : "content"] += " ";
369388
}
370-
out += "\n";
389+
sections[index][header ? "header" : "content"] += "\n";
371390
for (const row of token.rows) {
372391
for (const cell of row) {
373-
out += stripTokens(cell.tokens) + " ";
392+
stripTokens(cell.tokens, sections, header);
393+
sections[index][header ? "header" : "content"] += " ";
374394
}
375-
out += "\n";
395+
sections[index][header ? "header" : "content"] += "\n";
376396
}
377397
break;
378398
case "hr":
379399
break;
380400
case "blockquote":
381401
break;
382402
case "list":
383-
out += stripTokens(token.items);
403+
stripTokens(token.items, sections, header);
384404
break;
385405
case "list_item":
386-
out += "\n";
406+
sections[index][header ? "header" : "content"] += "\n";
387407
break;
388408
case "paragraph":
389409
break;
390410
case "html": {
391411
// TODO: extract alt from img
392-
out += sanitizeHtml(token.text, {
393-
allowedTags: [],
394-
allowedAttributes: {},
395-
}).trim() + "\n\n";
412+
sections[index][header ? "header" : "content"] +=
413+
sanitizeHtml(token.text, {
414+
allowedTags: [],
415+
allowedAttributes: {},
416+
}).trim() + "\n\n";
396417
break;
397418
}
398419
case "text":
399420
if (!("tokens" in token) || !token.tokens) {
400-
out += token.raw;
421+
sections[index][header ? "header" : "content"] += token.raw;
401422
}
402423
break;
403424
case "def":
@@ -408,26 +429,24 @@ function stripTokens(tokens: Marked.Token[]): string {
408429
break;
409430
case "image":
410431
if (token.title) {
411-
out += token.title;
432+
sections[index][header ? "header" : "content"] += token.title;
412433
} else {
413-
out += token.text;
434+
sections[index][header ? "header" : "content"] += token.text;
414435
}
415436
break;
416437
case "strong":
417438
break;
418439
case "em":
419440
break;
420441
case "codespan":
421-
out += token.text;
442+
sections[index][header ? "header" : "content"] += token.text;
422443
break;
423444
case "br":
424445
break;
425446
case "del":
426447
break;
427448
}
428449
}
429-
430-
return out;
431450
}
432451

433452
class StripTokenizer extends Marked.Tokenizer {
@@ -450,10 +469,22 @@ class StripTokenizer extends Marked.Tokenizer {
450469
}
451470
}
452471

472+
export interface MarkdownSections {
473+
/** The header of the section */
474+
header: string;
475+
/** The depth-level of the header. 0 if it is root level */
476+
depth: number;
477+
content: string;
478+
}
479+
453480
/**
454-
* Strip all markdown syntax to get a plaintext output
481+
* Strip all markdown syntax to get a plaintext output, divided up in sections
482+
* based on headers
455483
*/
456-
export function strip(markdown: string, opts: RenderOptions = {}): string {
484+
export function stripSplitBySections(
485+
markdown: string,
486+
opts: RenderOptions = {},
487+
): MarkdownSections[] {
457488
markdown = emojify(markdown).replace(BLOCK_MATH_REGEXP, "").replace(
458489
INLINE_MATH_REGEXP,
459490
"",
@@ -462,5 +493,22 @@ export function strip(markdown: string, opts: RenderOptions = {}): string {
462493
...getOpts(opts),
463494
tokenizer: new StripTokenizer(),
464495
});
465-
return stripTokens(tokens).trim().replace(/\n{3,}/g, "\n") + "\n";
496+
497+
const sections: MarkdownSections[] = [{
498+
header: "",
499+
depth: 0,
500+
content: "",
501+
}];
502+
stripTokens(tokens, sections, false);
503+
504+
return sections;
505+
}
506+
507+
/**
508+
* Strip all markdown syntax to get a plaintext output
509+
*/
510+
export function strip(markdown: string, opts: RenderOptions = {}): string {
511+
return stripSplitBySections(markdown, opts).map((section) =>
512+
section.header + "\n\n" + section.content
513+
).join("\n\n").trim().replace(/\n{3,}/g, "\n") + "\n";
466514
}

test/fixtures/example.html

+195
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
<pre><code>{
2+
"json": {
3+
"name": "Deno"
4+
}
5+
}</code></pre><pre><code>- hello
6+
+ world</code></pre><ul>
7+
<li>Buildscript</li>
8+
</ul>
9+
<pre><code>import { build } from "https://deno.land/x/esbuild/mod.ts";
10+
import sassPlugin from "https://deno.land/x/esbuild_plugin_sass_deno/mod.ts";
11+
12+
build({
13+
entryPoints: [
14+
"example/in.ts",
15+
],
16+
bundle: true,
17+
outfile: "example/out.js",
18+
plugins: [sassPlugin()],
19+
});</code></pre><ul>
20+
<li>Main Entrypoint File:</li>
21+
</ul>
22+
<pre><code>import styles from "./styles.scss";
23+
24+
document.getElementsByTagName("head")[0].innerHTML +=
25+
`&lt;style&gt;${styles}&lt;/style&gt;`;</code></pre><p><del>Some strikethrough <code>text</code></del></p>
26+
<details>
27+
<summary>Summary</summary>
28+
<p>Some Details
29+
30+
</p><p><strong>even more details</strong></p>
31+
<p></p>
32+
</details>
33+
34+
<table>
35+
<thead>
36+
<tr>
37+
<th>Type</th>
38+
<th>Description</th>
39+
<th>example</th>
40+
</tr>
41+
</thead>
42+
<tbody><tr>
43+
<td><code>string</code></td>
44+
<td>A string of characters.</td>
45+
<td><code>'Hello world'</code></td>
46+
</tr>
47+
<tr>
48+
<td><code>number</code></td>
49+
<td>A numeric value, either float or integer.</td>
50+
<td><code>42</code></td>
51+
</tr>
52+
<tr>
53+
<td><code>boolean</code></td>
54+
<td>A boolean value.</td>
55+
<td><code>true</code></td>
56+
</tr>
57+
<tr>
58+
<td><code>enum</code></td>
59+
<td>An enum value.</td>
60+
<td><code>'drama'</code></td>
61+
</tr>
62+
<tr>
63+
<td><code>geopoint</code></td>
64+
<td>A geopoint value.</td>
65+
<td><code>{ lat: 40.7128, lon: 74.0060 }</code></td>
66+
</tr>
67+
<tr>
68+
<td><code>string[]</code></td>
69+
<td>An array of strings.</td>
70+
<td><code>['red', 'green', 'blue']</code></td>
71+
</tr>
72+
<tr>
73+
<td><code>number[]</code></td>
74+
<td>An array of numbers.</td>
75+
<td><code>[42, 91, 28.5]</code></td>
76+
</tr>
77+
<tr>
78+
<td><code>boolean[]</code></td>
79+
<td>An array of booleans.</td>
80+
<td><code>[true, false, false]</code></td>
81+
</tr>
82+
<tr>
83+
<td><code>enum[]</code></td>
84+
<td>An array of enums.</td>
85+
<td><code>['comedy', 'action', 'romance']</code></td>
86+
</tr>
87+
<tr>
88+
<td><code>vector[&lt;size&gt;]</code></td>
89+
<td>A vector of numbers to perform vector search on.</td>
90+
<td><code>[0.403, 0.192, 0.830]</code></td>
91+
</tr>
92+
</tbody></table>
93+
<h2 id="math-rendering"><a class="anchor" aria-hidden="true" tabindex="-1" href="#math-rendering"><svg class="octicon octicon-link" viewBox="0 0 16 16" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a>Math rendering</h2>
94+
<p>We support code blocks with the "math" type!</p>
95+
<pre><code>G_{\mu v} = \frac{8 \pi G}{c^4} T_{\mu v}</code></pre><p>We also support math blocks and inline math blocks as well!</p>
96+
<p>When $a \ne 0$, there are two solutions to $(ax^2 + bx + c = 0)$ and they are</p>
97+
<p>$$ x = {-b \pm \sqrt{b^2-4ac} \over 2a} $$</p>
98+
<p>You can even typeset individual letters or whole sentences inline just like $x$
99+
or $Quadratic ; formula$. You can also use math blocks to typeset whole
100+
equations with $\LaTeX$:</p>
101+
<p>$$ \begin{aligned} \dot{x} &amp; = \sigma(y-x) \ \dot{y} &amp; = \rho x - y - xz \
102+
\dot{z} &amp; = -\beta z + xy \end{aligned} $$</p>
103+
<h1 id="deno"><a class="anchor" aria-hidden="true" tabindex="-1" href="#deno"><svg class="octicon octicon-link" viewBox="0 0 16 16" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a>Deno</h1>
104+
<p><a href="https://github.com/denoland/deno/actions" rel="noopener noreferrer"><img src="https://github.com/denoland/deno/workflows/ci/badge.svg?branch=main&amp;event=push" alt="Build Status - Cirrus" /></a> <a href="https://twitter.com/intent/follow?screen_name=deno_land" rel="noopener noreferrer"><img src="https://img.shields.io/twitter/follow/deno_land.svg?style=social&amp;label=Follow" alt="Twitter handle" /></a>
105+
<a href="https://discord.gg/deno" rel="noopener noreferrer"><img src="https://img.shields.io/discord/684898665143206084?logo=discord&amp;style=social" alt="Discord Chat" /></a></p>
106+
<img align="right" src="https://deno.land/logo.svg" height="150px" alt="the deno mascot dinosaur standing in the rain" />
107+
108+
<p>Deno is a <em>simple</em>, <em>modern</em> and <em>secure</em> runtime for <strong>JavaScript</strong> and
109+
<strong>TypeScript</strong> that uses V8 and is built in Rust.</p>
110+
<h3 id="features"><a class="anchor" aria-hidden="true" tabindex="-1" href="#features"><svg class="octicon octicon-link" viewBox="0 0 16 16" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a>Features</h3>
111+
<ul>
112+
<li>Secure by default. No file, network, or environment access, unless explicitly
113+
enabled.</li>
114+
<li>Supports TypeScript out of the box.</li>
115+
<li>Ships only a single executable file.</li>
116+
<li>Built-in utilities like a dependency inspector (deno info) and a code
117+
formatter (deno fmt).</li>
118+
<li>Set of reviewed standard modules that are guaranteed to work with
119+
<a href="https://deno.land/std/" rel="noopener noreferrer">Deno</a>.</li>
120+
</ul>
121+
<h3 id="install"><a class="anchor" aria-hidden="true" tabindex="-1" href="#install"><svg class="octicon octicon-link" viewBox="0 0 16 16" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a>Install</h3>
122+
<p>Shell (Mac, Linux):</p>
123+
<pre><code>curl -fsSL https://deno.land/x/install/install.sh | sh</code></pre><p>PowerShell (Windows):</p>
124+
<pre><code>iwr https://deno.land/x/install/install.ps1 -useb | iex</code></pre><p><a href="https://formulae.brew.sh/formula/deno" rel="noopener noreferrer">Homebrew</a> (Mac):</p>
125+
<pre><code>brew install deno</code></pre><p><a href="https://chocolatey.org/packages/deno" rel="noopener noreferrer">Chocolatey</a> (Windows):</p>
126+
<pre><code>choco install deno</code></pre><p><a href="https://scoop.sh/" rel="noopener noreferrer">Scoop</a> (Windows):</p>
127+
<pre><code>scoop install deno</code></pre><p>Build and install from source using <a href="https://crates.io/crates/deno" rel="noopener noreferrer">Cargo</a>:</p>
128+
<pre><code>cargo install deno --locked</code></pre><p>See
129+
<a href="https://github.com/denoland/deno_install/blob/master/README.md" rel="noopener noreferrer">deno_install</a>
130+
and <a href="https://github.com/denoland/deno/releases" rel="noopener noreferrer">releases</a> for other options.</p>
131+
<h3 id="getting-started"><a class="anchor" aria-hidden="true" tabindex="-1" href="#getting-started"><svg class="octicon octicon-link" viewBox="0 0 16 16" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a>Getting Started</h3>
132+
<p>Try running a simple program:</p>
133+
<pre><code>deno run https://deno.land/std/examples/welcome.ts</code></pre><p>Or a more complex one:</p>
134+
<pre><code>const listener = Deno.listen({ port: 8000 });
135+
console.log("http://localhost:8000/");
136+
137+
for await (const conn of listener) {
138+
serve(conn);
139+
}
140+
141+
async function serve(conn: Deno.Conn) {
142+
for await (const { respondWith } of Deno.serveHttp(conn)) {
143+
respondWith(new Response("Hello world"));
144+
}
145+
}</code></pre><p>You can find a deeper introduction, examples, and environment setup guides in
146+
the <a href="https://deno.land/manual" rel="noopener noreferrer">manual</a>.</p>
147+
148+
149+
<p>The complete API reference is available at the runtime
150+
<a href="https://doc.deno.land" rel="noopener noreferrer">documentation</a>.</p>
151+
<h3 id="contributing"><a class="anchor" aria-hidden="true" tabindex="-1" href="#contributing"><svg class="octicon octicon-link" viewBox="0 0 16 16" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a>Contributing</h3>
152+
<p>We appreciate your help!</p>
153+
<p>To contribute, please read our
154+
<a href="https://deno.land/manual/contributing" rel="noopener noreferrer">contributing instructions</a>.</p>
155+
<pre><code>/** @jsx h */
156+
import { h, IS_BROWSER, useState } from "../deps.ts";
157+
158+
export default function Home() {
159+
return (
160+
&lt;div&gt;
161+
&lt;p&gt;
162+
Welcome to `fresh`. Try update this message in the ./pages/index.tsx
163+
file, and refresh.
164+
&lt;/p&gt;
165+
&lt;Counter /&gt;
166+
&lt;p&gt;{IS_BROWSER ? "Viewing browser render." : "Viewing JIT render."}&lt;/p&gt;
167+
&lt;/div&gt;
168+
);
169+
}
170+
171+
function Counter() {
172+
const [count, setCount] = useState(0);
173+
return (
174+
&lt;div&gt;
175+
&lt;p&gt;{count}&lt;/p&gt;
176+
&lt;button
177+
onClick={() =&gt; setCount(count - 1)}
178+
disabled={!IS_BROWSER}
179+
&gt;
180+
-1
181+
&lt;/button&gt;
182+
&lt;button
183+
onClick={() =&gt; setCount(count + 1)}
184+
disabled={!IS_BROWSER}
185+
&gt;
186+
+1
187+
&lt;/button&gt;
188+
&lt;/div&gt;
189+
);
190+
}
191+
192+
export const config: PageConfig = { runtimeJS: true };</code></pre><figure>
193+
<img src="https://deno.land/logo.svg" />
194+
<figcaption><b>Figure 1.</b> The deno mascot dinosaur standing in the rain.</figcaption>
195+
</figure>

0 commit comments

Comments
 (0)