Skip to content

Commit c608b96

Browse files
Add support for lists
1 parent 543ecbd commit c608b96

File tree

3 files changed

+135
-73
lines changed

3 files changed

+135
-73
lines changed

node-zerox/src/openAI.ts

+3-73
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,6 @@
11
import { CompletionArgs, CompletionResponse } from "./types";
2-
import { convertKeysToSnakeCase, encodeImageToBase64 } from "./utils";
2+
import { convertKeysToSnakeCase, encodeImageToBase64, markdownToJson } from "./utils";
33
import axios from "axios";
4-
import { nanoid } from "nanoid";
5-
6-
const markdownToJson = async (markdownString: string) => {
7-
/**
8-
* Bypassing typescript transpiler using eval to use dynamic imports
9-
*
10-
* Source: https://stackoverflow.com/a/70546326
11-
*/
12-
const { unified } = await eval(`import('unified')`);
13-
const { default: remarkParse } = await eval(`import('remark-parse')`);
14-
const { remarkGfm } = await eval(`import('remark-gfm')`);
15-
16-
const parsedMd = unified()
17-
.use(remarkParse) // Parse Markdown to AST
18-
.use(remarkGfm)
19-
.parse(markdownString);
20-
21-
const parentIdManager: string[] = [];
22-
23-
let depths = [0];
24-
25-
const jsonObj = parsedMd.children.map((node: any) => {
26-
const isHeading = node.type === "heading";
27-
if (isHeading && node.depth <= (depths.at(-1) || 0)) {
28-
parentIdManager.pop();
29-
// TODO: keep removing depth number till it reaches the one less than node.depth
30-
depths.pop();
31-
}
32-
const processedNode = processNode(node, parentIdManager.at(-1));
33-
34-
if (isHeading) {
35-
parentIdManager.push(processedNode.id);
36-
if (depths.at(-1) !== node.depth) depths.push(node.depth);
37-
}
38-
39-
return processedNode;
40-
});
41-
42-
return jsonObj;
43-
};
44-
45-
const type: Record<string, string> = {
46-
heading: "heading",
47-
text: "text",
48-
};
49-
50-
const processNode = (node: any, parentId?: string) => {
51-
let value: any;
52-
53-
if (node.type === "heading") {
54-
value = node.children
55-
.map((childNode: any) => processText(childNode))
56-
.join(" ");
57-
} else if (node.type === "paragraph") {
58-
value = node.children
59-
.map((childNode: any) => processText(childNode))
60-
.join(" ");
61-
}
62-
63-
return {
64-
id: nanoid(),
65-
parentId,
66-
type: type[node.type as string] || type.text,
67-
value,
68-
};
69-
};
70-
71-
const processText = (node: any) => {
72-
return node.value;
73-
};
744

755
export const getCompletion = async ({
766
apiKey,
@@ -128,8 +58,8 @@ export const getCompletion = async ({
12858

12959
const data = response.data;
13060

131-
// const jsonOutput = await markdownToJson(data.choices[0].message.content);
132-
// console.log("====>>>>", JSON.stringify(jsonOutput, null, 2));
61+
const jsonOutput = await markdownToJson(data.choices[0].message.content);
62+
console.log("====>>>>", JSON.stringify(jsonOutput));
13363

13464
return {
13565
content: data.choices[0].message.content,

node-zerox/src/utils.ts

+131
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import fs from "fs-extra";
99
import mime from "mime-types";
1010
import path from "path";
1111
import sharp from "sharp";
12+
import { nanoid } from "nanoid";
1213

1314
const convertAsync = promisify(convert);
1415

@@ -311,3 +312,133 @@ export const convertKeysToSnakeCase = (
311312
Object.entries(obj).map(([key, value]) => [camelToSnakeCase(key), value])
312313
);
313314
};
315+
316+
interface ProcessedNode {
317+
id: string;
318+
parentId: string | undefined;
319+
type: string;
320+
value: any;
321+
}
322+
interface parentId {
323+
id: string;
324+
depth: number;
325+
}
326+
327+
export const markdownToJson = async (markdownString: string) => {
328+
/**
329+
* Bypassing typescript transpiler using eval to use dynamic imports
330+
*
331+
* Source: https://stackoverflow.com/a/70546326
332+
*/
333+
const { unified } = await eval(`import('unified')`);
334+
const { default: remarkParse } = await eval(`import('remark-parse')`);
335+
const { remarkGfm } = await eval(`import('remark-gfm')`);
336+
337+
const parsedMd = unified()
338+
.use(remarkParse) // Parse Markdown to AST
339+
.use(remarkGfm)
340+
.parse(markdownString);
341+
342+
console.log(JSON.stringify(parsedMd));
343+
344+
const parentIdManager: parentId[] = [];
345+
346+
const jsonObj: ProcessedNode[] = [];
347+
parsedMd.children.forEach((node: any) => {
348+
const isHeading = node.type === "heading";
349+
350+
if (isHeading && node.depth <= (parentIdManager.at(-1)?.depth || 0)) {
351+
for (let i = parentIdManager.length; i > 0; i--) {
352+
parentIdManager.pop();
353+
if (node.depth > (parentIdManager.at(-1)?.depth || 0)) {
354+
break;
355+
}
356+
}
357+
}
358+
const processedNode = processNode(node, parentIdManager.at(-1)?.id);
359+
360+
if (isHeading) {
361+
parentIdManager.push({ id: processedNode[0].id, depth: node.depth });
362+
}
363+
364+
jsonObj.push(...processedNode);
365+
});
366+
367+
return jsonObj;
368+
};
369+
370+
const type: Record<string, string> = {
371+
heading: "heading",
372+
text: "text",
373+
list: "list",
374+
};
375+
376+
const processNode = (node: any, parentId?: string): ProcessedNode[] => {
377+
let value: any;
378+
let siblingNodes: ProcessedNode[] = [];
379+
380+
if (node.type === "heading") {
381+
value = node.children
382+
.map((childNode: any) => processText(childNode))
383+
.join(" ");
384+
} else if (node.type === "paragraph") {
385+
value = node.children
386+
.map((childNode: any) => processText(childNode))
387+
.join(" ");
388+
} else if (node.type === "list") {
389+
const processedNodes = node.children.map((childNode: any) =>
390+
processListItem(childNode)
391+
);
392+
value = [];
393+
processedNodes.forEach((pn: any) => {
394+
value.push(...pn.node);
395+
siblingNodes.push(...pn.siblings);
396+
});
397+
}
398+
399+
return [
400+
{
401+
id: nanoid(),
402+
parentId,
403+
type: type[node.type as string] || type.text,
404+
value,
405+
},
406+
...(siblingNodes || []),
407+
];
408+
};
409+
410+
const processText = (node: any) => {
411+
return node.value;
412+
};
413+
414+
const processListItem = (node: any) => {
415+
let newNode: ProcessedNode[] = [];
416+
let siblings: ProcessedNode[] = [];
417+
418+
node.children.forEach((childNode: any) => {
419+
if (childNode.type !== "list") {
420+
const processedNode = processNode(childNode);
421+
if (newNode.length > 0) {
422+
newNode[0].value += processedNode.map(({ value }) => value).join(", ");
423+
} else {
424+
newNode[0] = processedNode[0];
425+
}
426+
siblings.push(...processedNode.slice(1));
427+
} else {
428+
if (newNode.length == 0) {
429+
newNode = [
430+
{
431+
id: nanoid(),
432+
type: "text",
433+
value: "",
434+
parentId: undefined,
435+
},
436+
];
437+
}
438+
const processedNode = processNode(childNode, newNode[0].id);
439+
siblings.push(...processedNode);
440+
}
441+
});
442+
443+
return { node: newNode, siblings };
444+
};

package-lock.json

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)