Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/wdmock-cf/apps/main/src/services/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export async function renderPage(
const expanded = resolveIncludes(source, (pageRef: PageRef) => {
return pageSourceMap.get(pageRef.page) ?? null;
});
const resolved = parse(expanded);
const { ast: resolved, diagnostics: _diagnostics = [] } = parse(expanded);

const { requirements, compiledListPagesTemplates } = extractDataRequirements(resolved);

Expand All @@ -66,7 +66,7 @@ export async function renderPage(
getPageTags: () => pageTags,
},
{
parse,
parse: (input: string) => parse(input).ast,
compiledListPagesTemplates,
requirements,
urlPath: options?.urlPath,
Expand Down
104 changes: 104 additions & 0 deletions packages/ast/src/diagnostic.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* Diagnostic types for reporting parse-time issues.
*
* When the parser encounters syntactically questionable or invalid markup
* (e.g. an unclosed `[[div]]` block), it records a {@link Diagnostic} rather
* than throwing an error. The parser is lenient: it always produces an AST,
* even when diagnostics are present.
*
* Diagnostics are returned alongside the AST via {@link ParseResult}.
*
* @since 2.0.0
* @module
*/

import type { Position } from "./position";
import type { SyntaxTree } from "./element";

/**
* Severity level of a diagnostic.
*
* - `"error"` — the markup is structurally broken (e.g. inline `[[div]]`
* without a newline after `]]`).
* - `"warning"` — the markup is likely unintentional but the parser can
* recover (e.g. a missing `[[/div]]` close tag).
* - `"info"` — informational hints (e.g. deprecated syntax).
*
* @since 2.0.0
* @group Diagnostics
*/
export type DiagnosticSeverity = "error" | "warning" | "info";

/**
* A single diagnostic emitted during parsing.
*
* Each diagnostic pinpoints a source location via {@link Position} and
* carries a machine-readable {@link Diagnostic.code | code} string for
* programmatic filtering (e.g. `"unclosed-block"`, `"inline-block-element"`).
*
* @example
* ```ts
* import { parse } from "@wdprlib/parser";
*
* const { ast, diagnostics } = parse("[[div]]\nHello");
* for (const d of diagnostics) {
* console.log(`[${d.severity}] ${d.message} (line ${d.position.start.line})`);
* }
* ```
*
* @since 2.0.0
* @group Diagnostics
*/
export interface Diagnostic {
/** How severe the issue is. */
severity: DiagnosticSeverity;

/**
* Machine-readable identifier for the diagnostic kind.
*
* Current codes:
* - `"unclosed-block"` — a block element has no matching close tag.
* - `"inline-block-element"` — a block element (e.g. `[[div]]`) is used
* inline without the required trailing newline.
*/
code: string;

/** Human-readable description of the issue. */
message: string;

/** Source range where the issue was detected. */
position: Position;

/**
* An optional related source range that provides additional context
* (e.g. the opening tag position when reporting a missing close tag).
*/
relatedPosition?: Position;
}

/**
* The result of parsing a Wikidot markup string.
*
* Contains both the parsed AST and any diagnostics emitted during parsing.
* The AST is always produced, even when diagnostics are present — the parser
* is lenient and recovers from errors.
*
* @example
* ```ts
* import { parse } from "@wdprlib/parser";
*
* const result = parse("**bold** and //italic//");
* console.log(result.ast.elements); // AST nodes
* console.log(result.diagnostics); // [] (no issues)
* ```
*
* @since 2.0.0
* @group Diagnostics
*/
export interface ParseResult {
/** The parsed syntax tree. */
ast: SyntaxTree;

/** Diagnostics emitted during parsing (empty when the input is clean). */
diagnostics: Diagnostic[];
}
3 changes: 3 additions & 0 deletions packages/ast/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ export {
isParagraphSafe,
} from "./element";

// Diagnostics
export type { Diagnostic, DiagnosticSeverity, ParseResult } from "./diagnostic";

// Constants
export { STYLE_SLOT_PREFIX } from "./constants";

Expand Down
6 changes: 5 additions & 1 deletion packages/parser/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* ```ts
* import { parse } from "@wdprlib/parser";
*
* const ast = parse("**bold** and //italic//");
* const { ast, diagnostics } = parse("**bold** and //italic//");
* ```
*
* For server-side module resolution, see {@link extractDataRequirements},
Expand Down Expand Up @@ -58,6 +58,10 @@ export type {
DateItem,
Embed,
TocEntry,
// Diagnostics
Diagnostic,
DiagnosticSeverity,
ParseResult,
} from "@wdprlib/ast";
export {
createPoint,
Expand Down
37 changes: 29 additions & 8 deletions packages/parser/src/parser/parse.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import type { Token } from "../lexer";
import { tokenize } from "../lexer";
import { preprocess } from "./preprocess";
import type { Element, SyntaxTree, WikitextSettings } from "@wdprlib/ast";
import type { Element, SyntaxTree, WikitextSettings, ParseResult } from "@wdprlib/ast";
import { DEFAULT_SETTINGS } from "@wdprlib/ast";
import { blockRules, blockFallbackRule, inlineRules, type ParseContext } from "./rules";
import { canApplyBlockRule } from "./rules/block/utils";
import { mergeSpanStripParagraphs, cleanInternalFlags } from "./postprocess";
import {
mergeSpanStripParagraphs,
cleanInternalFlags,
suppressDivAdjacentParagraphs,
} from "./postprocess";
import { buildTableOfContents } from "./toc";

/**
Expand Down Expand Up @@ -69,6 +73,8 @@ export class Parser {
// State flags
footnoteBlockParsed: false,
bibcites: [],
// Diagnostics
diagnostics: [],
// Rules (injected to avoid circular dependency)
blockRules,
blockFallbackRule,
Expand All @@ -77,9 +83,12 @@ export class Parser {
}

/**
* Parse tokens into SyntaxTree
* Parse tokens into a {@link ParseResult} containing the AST and
* any diagnostics emitted during parsing.
*
* @since 2.0.0
*/
parse(): SyntaxTree {
parse(): ParseResult {
const children: Element[] = [];

while (!this.isAtEnd()) {
Expand All @@ -90,8 +99,11 @@ export class Parser {
// Post-process: merge paragraphs that contain span_ (paragraph strip mode)
const mergedChildren = mergeSpanStripParagraphs(children);

// Wikidot: paragraphs directly adjacent to div blocks lose <p> wrapping
const divProcessed = suppressDivAdjacentParagraphs(mergedChildren);

// Clean internal flags from AST
const cleanedChildren = cleanInternalFlags(mergedChildren);
const cleanedChildren = cleanInternalFlags(divProcessed);

// Add footnote-block at the end if not present
const hasFootnoteBlock = cleanedChildren.some((el) => el.element === "footnote-block");
Expand Down Expand Up @@ -126,7 +138,7 @@ export class Parser {
result["html-blocks"] = this.ctx.htmlBlocks;
}

return result;
return { ast: result, diagnostics: this.ctx.diagnostics };
}

/**
Expand Down Expand Up @@ -211,9 +223,18 @@ export class Parser {
}

/**
* Parse source string into SyntaxTree
* Parse a Wikidot markup string into an AST with diagnostics.
*
* @example
* ```ts
* import { parse } from "@wdprlib/parser";
*
* const { ast, diagnostics } = parse("**bold** and //italic//");
* ```
*
* @since 2.0.0
*/
export function parse(source: string, options?: ParserOptions): SyntaxTree {
export function parse(source: string, options?: ParserOptions): ParseResult {
const preprocessed = preprocess(source);
const tokens = tokenize(preprocessed, { trackPositions: options?.trackPositions });
return new Parser(tokens, options).parse();
Expand Down
76 changes: 76 additions & 0 deletions packages/parser/src/parser/postprocess/divAdjacentParagraph.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/**
*
* Post-processing pass: suppress paragraph wrapping adjacent to div containers.
*
* In Wikidot, when a paragraph is a direct sibling of a `<div>` block (no other
* block elements between them), the `<p>` wrapping is removed and the inner
* elements are promoted to the parent level.
*
* When the unwrapped paragraph follows a div, a line-break element is prepended
* to represent the newline between the closing `</div>` and the bare text.
*
* Examples:
* `[[div]]inline[[/div]]\n[[div]]\n[[/div]]` → no `<p>` (adjacent to div)
* `[[div]]inline[[/div]]\n> a\n[[div]]\n[[/div]]` → has `<p>` (blockquote between)
*
* @module
*/
import type { Element, ContainerData } from "@wdprlib/ast";

function isParagraphContainer(el: Element | undefined): boolean {
if (!el || el.element !== "container") return false;
return (el.data as ContainerData).type === "paragraph";
}

function isDivContainer(el: Element | undefined): boolean {
if (!el || el.element !== "container") return false;
return (el.data as ContainerData).type === "div";
}

/**
* At a single nesting level, unwrap paragraph containers that are directly
* adjacent to div containers. A line-break is prepended when the paragraph
* follows a div.
*/
function suppressAtLevel(elements: Element[]): Element[] {
if (elements.length <= 1) return elements;

const unwrap = new Array<boolean>(elements.length).fill(false);

for (let i = 0; i < elements.length; i++) {
if (!isParagraphContainer(elements[i])) continue;
const prevIsDiv = i > 0 && isDivContainer(elements[i - 1]);
const nextIsDiv = i < elements.length - 1 && isDivContainer(elements[i + 1]);
if (prevIsDiv || nextIsDiv) {
unwrap[i] = true;
}
}

const result: Element[] = [];
for (let i = 0; i < elements.length; i++) {
const el = elements[i];
if (!el) continue;

if (unwrap[i] && el.element === "container") {
const inner = (el.data as ContainerData).elements;
if (i > 0 && isDivContainer(elements[i - 1])) {
result.push({ element: "line-break" });
}
result.push(...inner);
} else {
result.push(el);
}
}

return result;
}

/**
* Suppress paragraph wrapping adjacent to div containers.
*
* Applied only at the top level. Inside div containers, paragraphs adjacent
* to nested divs retain their `<p>` wrapping (matching Wikidot behavior).
*/
export function suppressDivAdjacentParagraphs(elements: Element[]): Element[] {
return suppressAtLevel(elements);
}
1 change: 1 addition & 0 deletions packages/parser/src/parser/postprocess/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
*/

export { mergeSpanStripParagraphs, cleanInternalFlags } from "./spanStrip";
export { suppressDivAdjacentParagraphs } from "./divAdjacentParagraph";
13 changes: 12 additions & 1 deletion packages/parser/src/parser/rules/block/align.ts
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,19 @@ export const alignRule: BlockRule = {
consumed += bodyResult.consumed;
pos += bodyResult.consumed;

// Consume closing tag
// Check for missing close tag
const directionSymbol = { left: "<", right: ">", center: "=", justify: "==" }[direction];
const closeCheck = isAlignClose({ ...ctx, pos }, direction);
if (!closeCheck.match) {
ctx.diagnostics.push({
severity: "warning",
code: "unclosed-block",
message: `Missing closing tag [[/${directionSymbol}]] for [[${directionSymbol}]]`,
position: openToken.position,
});
}

// Consume closing tag
if (closeCheck.match) {
consumed += closeCheck.consumed;
pos += closeCheck.consumed;
Expand Down
6 changes: 6 additions & 0 deletions packages/parser/src/parser/rules/block/bibliography.ts
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ export const bibliographyRule: BlockRule = {

// Require closing tag - without it, fail to prevent consuming entire document
if (!foundClose) {
ctx.diagnostics.push({
severity: "warning",
code: "unclosed-block",
message: "Missing closing tag [[/bibliography]] for [[bibliography]]",
position: openToken.position,
});
return { success: false };
}

Expand Down
Loading