Skip to content

Commit ca408f1

Browse files
authored
feat: paragraphize content blocks in script mode (#24)
1 parent 37976a2 commit ca408f1

File tree

10 files changed

+5694
-30
lines changed

10 files changed

+5694
-30
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ Legend for syntax support:
6666
| Line break | Break || 🚫 |
6767
| Smart quote | | 🚫 | 🚫 |
6868
| Symbol shorthand | | 🚫 | 🚫 |
69-
| Code expression | | 🚫 | 🚫 |
69+
| Code expression | | | |
7070
| Character escape | | 🚫 | 🚫 |
7171
| Comment | Comment || 🚫 |
7272

src/typstToTextlintAst.ts

Lines changed: 215 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,64 @@ const isTypstType = (type: string, pattern: RegExp): boolean => {
190190
return pattern.test(type);
191191
};
192192

193+
const isTypstMarkupOrContentBlock = (node: AstNode): boolean => {
194+
if (typeof node.type !== "string") {
195+
return false;
196+
}
197+
return isTypstType(node.type, /^Marked::(Markup|ContentBlock)$/);
198+
};
199+
200+
const isScriptBracketNode = (content: Content): boolean => {
201+
if (!isAstNode(content)) {
202+
return false;
203+
}
204+
const type = (content as unknown as AstNode).type;
205+
return type === "Punc::LeftBracket" || type === "Punc::RightBracket";
206+
};
207+
208+
const flattenTypstMarkupChildren = (content: Content): Content[] => {
209+
if (!isAstNode(content) || !hasChildren(content)) {
210+
return [content];
211+
}
212+
if (!isTypstMarkupOrContentBlock(content)) {
213+
return [content];
214+
}
215+
return content.children
216+
.filter((child) => !isScriptBracketNode(child))
217+
.flatMap(flattenTypstMarkupChildren);
218+
};
219+
220+
const convertScriptContentBlocksToParagraphs = (node: AstNode): void => {
221+
if (hasChildren(node)) {
222+
for (const child of node.children) {
223+
if (isAstNode(child)) {
224+
convertScriptContentBlocksToParagraphs(child);
225+
}
226+
}
227+
}
228+
229+
if (node.type !== "Marked::ContentBlock") {
230+
return;
231+
}
232+
233+
const flattened = flattenTypstMarkupChildren(
234+
node as unknown as Content,
235+
).filter(isAstNode) as unknown as AstNode[];
236+
237+
node.type = ASTNodeTypes.Paragraph;
238+
node.children = flattened as unknown as Content[];
239+
240+
if (flattened.length > 0) {
241+
const firstChild = flattened[0];
242+
const lastChild = flattened[flattened.length - 1];
243+
node.loc = { start: firstChild.loc.start, end: lastChild.loc.end };
244+
node.range = [firstChild.range[0], lastChild.range[1]];
245+
node.raw = flattened.map((c) => c.raw).join("");
246+
} else {
247+
node.raw = "";
248+
}
249+
};
250+
193251
type TxtNodeLineLocation = TxtNode["loc"];
194252

195253
/**
@@ -444,16 +502,7 @@ export const convertRawTypstAstObjectToTextlintAstObject = (
444502

445503
const flattenedContent: Content[] = [];
446504
for (const child of contentChildren) {
447-
if (
448-
typeof child.type === "string" &&
449-
isTypstType(child.type, /^Marked::Markup$/) &&
450-
isAstNode(child) &&
451-
hasChildren(child)
452-
) {
453-
flattenedContent.push(...child.children);
454-
} else {
455-
flattenedContent.push(child);
456-
}
505+
flattenedContent.push(...flattenTypstMarkupChildren(child));
457506
}
458507
const textContent: Content[] = [];
459508
const nestedListItems: Content[] = [];
@@ -730,6 +779,9 @@ export const convertRawTypstAstObjectToTextlintAstObject = (
730779
calculateOffsets(textlintAstObject);
731780
}
732781

782+
// Convert script-mode `[...]` blocks to Paragraph nodes (recursive markup mode).
783+
convertScriptContentBlocksToParagraphs(textlintAstObject);
784+
733785
// Root node is always `Document` node
734786
textlintAstObject.type = ASTNodeTypes.Document;
735787

@@ -744,11 +796,148 @@ export const convertRawTypstAstObjectToTextlintAstObject = (
744796
export const paragraphizeTextlintAstObject = (
745797
rootNode: TxtDocumentNode,
746798
): TxtDocumentNode => {
799+
const whitelist = new Set<string>();
800+
801+
const isHashNode = (n: Content): boolean =>
802+
isAstNode(n) &&
803+
["Kw::Hash", "Fn::(Hash: &quot;#&quot;)"].includes(
804+
(n as unknown as AstNode).type,
805+
);
806+
807+
const includesLineBreak = (n: Content): boolean => {
808+
if (n.type === ASTNodeTypes.Str) {
809+
return n.raw.includes("\n");
810+
}
811+
if (n.type === ASTNodeTypes.Break) {
812+
return n.raw.includes("\n");
813+
}
814+
return false;
815+
};
816+
817+
const isStatementBoundaryBefore = (
818+
arr: Content[],
819+
index: number,
820+
): boolean => {
821+
if (index === 0) {
822+
return true;
823+
}
824+
return includesLineBreak(arr[index - 1]);
825+
};
826+
827+
const extractFirstIdentifier = (n: Content): string | undefined => {
828+
if (!isAstNode(n)) {
829+
return undefined;
830+
}
831+
const node = n as unknown as AstNode;
832+
if (typeof node.type === "string") {
833+
if (node.type.startsWith("Kw::") && node.type !== "Kw::Hash") {
834+
return node.type.slice("Kw::".length).toLowerCase();
835+
}
836+
if (node.type.includes("Ident:") && typeof node.value === "string") {
837+
return node.value;
838+
}
839+
}
840+
if (!hasChildren(node)) {
841+
return undefined;
842+
}
843+
for (const child of node.children) {
844+
const found = extractFirstIdentifier(child);
845+
if (found) {
846+
return found;
847+
}
848+
}
849+
return undefined;
850+
};
851+
852+
const getHashStatementName = (
853+
arr: Content[],
854+
index: number,
855+
): string | undefined => {
856+
for (let j = index + 1; j < arr.length; j++) {
857+
const n = arr[j];
858+
if (n.type === ASTNodeTypes.Str && n.raw.trim() === "") {
859+
continue;
860+
}
861+
return extractFirstIdentifier(n);
862+
}
863+
return undefined;
864+
};
865+
866+
const isHashStatementStartAt = (arr: Content[], index: number): boolean => {
867+
if (!isHashNode(arr[index])) {
868+
return false;
869+
}
870+
if (!isStatementBoundaryBefore(arr, index)) {
871+
return false;
872+
}
873+
const name = getHashStatementName(arr, index);
874+
if (name && whitelist.has(name)) {
875+
return false;
876+
}
877+
return true;
878+
};
879+
880+
const punctuationDepthDelta = (n: Content): number => {
881+
if (!isAstNode(n)) {
882+
return 0;
883+
}
884+
const type = (n as unknown as AstNode).type;
885+
if (typeof type !== "string") {
886+
return 0;
887+
}
888+
switch (type) {
889+
case "Punc::LeftParen":
890+
case "Punc::LeftBracket":
891+
case "Punc::LeftBrace":
892+
return 1;
893+
case "Punc::RightParen":
894+
case "Punc::RightBracket":
895+
case "Punc::RightBrace":
896+
return -1;
897+
default:
898+
return 0;
899+
}
900+
};
901+
902+
const collectHashStatement = (
903+
arr: Content[],
904+
startIndex: number,
905+
): { nodes: Content[]; nextIndex: number } => {
906+
const collected: Content[] = [];
907+
let depth = 0;
908+
let i = startIndex;
909+
while (i < arr.length) {
910+
const n = arr[i];
911+
collected.push(n);
912+
if (i !== startIndex) {
913+
depth += punctuationDepthDelta(n);
914+
if (depth < 0) {
915+
depth = 0;
916+
}
917+
if (depth === 0 && includesLineBreak(n)) {
918+
i++;
919+
break;
920+
}
921+
}
922+
i++;
923+
}
924+
return { nodes: collected, nextIndex: i };
925+
};
926+
927+
const sourceChildren = rootNode.children;
928+
747929
const children: Content[] = [];
748930
let i = 0;
749931

750-
while (i < rootNode.children.length) {
751-
const node = rootNode.children[i];
932+
while (i < sourceChildren.length) {
933+
if (isHashStatementStartAt(sourceChildren, i)) {
934+
const { nodes, nextIndex } = collectHashStatement(sourceChildren, i);
935+
children.push(...nodes);
936+
i = nextIndex;
937+
continue;
938+
}
939+
940+
const node = sourceChildren[i];
752941

753942
// Collect consecutive ListItems into a single List node.
754943
if (node.type === ASTNodeTypes.ListItem) {
@@ -760,8 +949,8 @@ export const paragraphizeTextlintAstObject = (
760949
const isOrdered = /^\d+\./.test(node.raw?.trim() || "");
761950

762951
// Collect consecutive ListItems including those separated by line breaks.
763-
while (i < rootNode.children.length) {
764-
const currentNode = rootNode.children[i];
952+
while (i < sourceChildren.length) {
953+
const currentNode = sourceChildren[i];
765954

766955
if (currentNode.type === ASTNodeTypes.ListItem) {
767956
// Check if the current item matches the list type (ordered/unordered).
@@ -778,11 +967,11 @@ export const paragraphizeTextlintAstObject = (
778967
// Skip line breaks between ListItems.
779968
if (currentNode.type === ASTNodeTypes.Str && currentNode.raw === "\n") {
780969
if (
781-
i + 1 < rootNode.children.length &&
782-
rootNode.children[i + 1].type === ASTNodeTypes.ListItem
970+
i + 1 < sourceChildren.length &&
971+
sourceChildren[i + 1].type === ASTNodeTypes.ListItem
783972
) {
784973
const nextIsOrdered = /^\d+\./.test(
785-
rootNode.children[i + 1].raw?.trim() || "",
974+
sourceChildren[i + 1].raw?.trim() || "",
786975
);
787976
if (nextIsOrdered === isOrdered) {
788977
i++;
@@ -799,11 +988,11 @@ export const paragraphizeTextlintAstObject = (
799988
currentNode.children[0].raw === "\n"
800989
) {
801990
if (
802-
i + 1 < rootNode.children.length &&
803-
rootNode.children[i + 1].type === ASTNodeTypes.ListItem
991+
i + 1 < sourceChildren.length &&
992+
sourceChildren[i + 1].type === ASTNodeTypes.ListItem
804993
) {
805994
const nextIsOrdered = /^\d+\./.test(
806-
rootNode.children[i + 1].raw?.trim() || "",
995+
sourceChildren[i + 1].raw?.trim() || "",
807996
);
808997
if (nextIsOrdered === isOrdered) {
809998
i++;
@@ -887,13 +1076,10 @@ export const paragraphizeTextlintAstObject = (
8871076
// Use the children of Marked::Markup nodes if they exist.
8881077
const actualContent: AstNode[] = [];
8891078
for (const child of contentChildren) {
890-
if (
891-
typeof child.type === "string" &&
892-
isTypstType(child.type, /^Marked::Markup$/) &&
893-
isAstNode(child) &&
894-
hasChildren(child)
895-
) {
896-
actualContent.push(...child.children.filter(isAstNode));
1079+
const flattenedChildren =
1080+
flattenTypstMarkupChildren(child).filter(isAstNode);
1081+
if (flattenedChildren.length > 0) {
1082+
actualContent.push(...flattenedChildren);
8971083
} else if (isAstNode(child)) {
8981084
actualContent.push(child);
8991085
}
@@ -959,8 +1145,8 @@ export const paragraphizeTextlintAstObject = (
9591145
i++;
9601146

9611147
// Collect consecutive nodes for paragraph grouping.
962-
while (i < rootNode.children.length) {
963-
const currentNode = rootNode.children[i];
1148+
while (i < sourceChildren.length) {
1149+
const currentNode = sourceChildren[i];
9641150

9651151
if (
9661152
currentNode.type === ASTNodeTypes.Header ||
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#let (x, y) = (1, 2)
2+
The coordinates are #x, #y.
3+
4+
#let (a, .., b) = (1, 2, 3, 4)
5+
The first element is #a.
6+
The last element is #b.
7+
8+
#let books = (
9+
Shakespeare: "Hamlet",
10+
Homer: "The Odyssey",
11+
Austen: "Persuasion",
12+
)
13+
14+
#let (Austen,) = books
15+
Austen wrote #Austen.
16+
17+
#let (Homer: h) = books
18+
Homer wrote #h.
19+
20+
#let (Homer, ..other) = books
21+
#for (author, title) in other [
22+
#author wrote #title.
23+
]

0 commit comments

Comments
 (0)