@@ -5,17 +5,78 @@ const { spawnSync } = require("node:child_process");
55const path = require ( "node:path" ) ;
66
77// Customization section
8- const DEFAULT_INPUT_ABNF = "grammar/jsonc .abnf" ;
8+ const DEFAULT_INPUT_ABNF = "grammar/JSONC .abnf" ;
99const DEFAULT_PROCESSED_ABNF = "grammar/jsonc-processed.abnf" ;
1010const DEFAULT_OUTPUT_HTML = "grammar/railroad-diagram.html" ;
11+ const FORCED_HTML_HEADER = "JSONC GRAMMAR" ;
1112
1213// Rules to inline from their %x... definitions as literal ABNF strings.
1314// Add more rule names here to apply the same transformation.
1415const INLINE_HEX_RULES = [
1516 "multi-line-comment-start" ,
1617 "multi-line-comment-end" ,
1718 "asterisk" ,
18- "escape"
19+ "escape" ,
20+ "single-line-comment-start" ,
21+ "quotation-mark" ,
22+ "decimal-point" ,
23+ "minus" ,
24+ "plus" ,
25+ "zero" ,
26+ ] ;
27+
28+ // Inline selected rule references as quoted literals in specific target rules.
29+ // Add more mappings here to reuse this transformation pattern.
30+ const INLINE_LITERAL_REFS = [
31+ {
32+ targetRule : "value" ,
33+ referencedRules : [ "false" , "true" , "null" ] ,
34+ } ,
35+ ] ;
36+
37+ // Move selected rule definitions after another rule in the processed ABNF.
38+ // Add more entries here to control rule ordering in generated output.
39+ const REPOSITION_RULES_AFTER = [
40+ {
41+ ruleName : "begin-array" ,
42+ afterRule : "array" ,
43+ } ,
44+ {
45+ ruleName : "end-array" ,
46+ afterRule : "begin-array" ,
47+ } ,
48+ {
49+ ruleName : "begin-object" ,
50+ afterRule : "object" ,
51+ } ,
52+ {
53+ ruleName : "end-object" ,
54+ afterRule : "begin-object" ,
55+ } ,
56+ {
57+ ruleName : "name-separator" ,
58+ afterRule : "member" ,
59+ } ,
60+ {
61+ ruleName : "value-separator" ,
62+ afterRule : "value" ,
63+ } ,
64+ {
65+ ruleName : "digit" ,
66+ afterRule : "unescaped" ,
67+ } ,
68+ {
69+ ruleName : "digit1-9" ,
70+ afterRule : "digit" ,
71+ } ,
72+ {
73+ ruleName : "hexdigit" ,
74+ afterRule : "digit1-9" ,
75+ } ,
76+ {
77+ ruleName : "four-hexdigits" ,
78+ afterRule : "hexdigit" ,
79+ }
1980] ;
2081
2182function escapeRegExp ( value ) {
@@ -36,6 +97,24 @@ function decodeAbnfHexSequence(value) {
3697 return String . fromCodePoint ( ...bytes ) ;
3798}
3899
100+ function getHexRuleSequence ( source , ruleName ) {
101+ const escapedRuleName = escapeRegExp ( ruleName ) ;
102+ const ruleRegex = new RegExp (
103+ `^\\s*${ escapedRuleName } \\s*=\\s*(%x[0-9A-Fa-f]+(?:\\.[0-9A-Fa-f]+)*)\\b.*$` ,
104+ "m" ,
105+ ) ;
106+ const ruleMatch = source . match ( ruleRegex ) ;
107+ if ( ! ruleMatch ) {
108+ throw new Error ( `Rule ${ ruleName } was not found.` ) ;
109+ }
110+
111+ return ruleMatch [ 1 ] ;
112+ }
113+
114+ function getHexRuleLiteral ( source , ruleName ) {
115+ return decodeAbnfHexSequence ( getHexRuleSequence ( source , ruleName ) ) ;
116+ }
117+
39118function inlineHexRuleAsLiteral ( source , ruleName ) {
40119 const escapedRuleName = escapeRegExp ( ruleName ) ;
41120 const ruleRegex = new RegExp (
@@ -50,10 +129,10 @@ function inlineHexRuleAsLiteral(source, ruleName) {
50129 const hexSequence = ruleMatch [ 1 ] ;
51130 const literalChars = decodeAbnfHexSequence ( hexSequence ) ;
52131
53- // For backslash or other problematic characters, keep them as hex format
54- // ABNF doesn't support backslash escaping in quoted strings
132+ // Keep hex format for characters that cannot be represented safely
133+ // as a single ABNF quoted string literal.
55134 let replacement ;
56- if ( literalChars === "\\" ) {
135+ if ( literalChars === "\\" || literalChars === '"' ) {
57136 replacement = hexSequence ;
58137 } else {
59138 // For other characters, escape only double quotes (not backslashes)
@@ -90,16 +169,104 @@ function inlineHexRuleAsLiteral(source, ruleName) {
90169 . join ( "\n" ) ;
91170}
92171
172+ function inlineLiteralRefsInTargetRule ( source , targetRule , referencedRules ) {
173+ const escapedTargetRule = escapeRegExp ( targetRule ) ;
174+ const targetRuleRegex = new RegExp ( `^(\\s*${ escapedTargetRule } \\s*=\\s*)(.*)$` , "m" ) ;
175+ const match = source . match ( targetRuleRegex ) ;
176+ if ( ! match ) {
177+ throw new Error ( `Rule ${ targetRule } was not found.` ) ;
178+ }
179+
180+ const targetRulePrefix = match [ 1 ] ;
181+ const targetRuleRhs = match [ 2 ] ;
182+
183+ let updatedRhs = targetRuleRhs ;
184+ for ( const referencedRule of referencedRules ) {
185+ const replacementLiteral = getHexRuleSequence ( source , referencedRule ) ;
186+ const referencedRuleRegex = new RegExp (
187+ `(?<![A-Za-z0-9-])${ escapeRegExp ( referencedRule ) } (?![A-Za-z0-9-])` ,
188+ "g" ,
189+ ) ;
190+ updatedRhs = updatedRhs . replace ( referencedRuleRegex , replacementLiteral ) ;
191+ }
192+
193+ return source . replace ( targetRuleRegex , `${ targetRulePrefix } ${ updatedRhs } ` ) ;
194+ }
195+
196+ function removeRuleDefinitions ( source , ruleNames ) {
197+ const removalSet = new Set ( ruleNames ) ;
198+
199+ return source
200+ . split ( / \r ? \n / )
201+ . filter ( ( line ) => {
202+ const match = line . match ( / ^ \s * ( [ A - Z a - z ] [ A - Z a - z 0 - 9 - ] * ) \s * = / ) ;
203+ if ( ! match ) {
204+ return true ;
205+ }
206+ return ! removalSet . has ( match [ 1 ] ) ;
207+ } )
208+ . join ( "\n" ) ;
209+ }
210+
211+ function findRuleBlock ( lines , ruleName ) {
212+ const ruleStartRegex = new RegExp ( `^\\s*${ escapeRegExp ( ruleName ) } \\s*=` ) ;
213+ const startIndex = lines . findIndex ( ( line ) => ruleStartRegex . test ( line ) ) ;
214+ if ( startIndex === - 1 ) {
215+ throw new Error ( `Rule ${ ruleName } was not found.` ) ;
216+ }
217+
218+ let endIndex = startIndex + 1 ;
219+ while ( endIndex < lines . length && / ^ \s / . test ( lines [ endIndex ] ) ) {
220+ endIndex += 1 ;
221+ }
222+
223+ return {
224+ startIndex,
225+ endIndex,
226+ blockLines : lines . slice ( startIndex , endIndex ) ,
227+ } ;
228+ }
229+
230+ function repositionRulesAfter ( source , reorderings ) {
231+ let lines = source . split ( / \r ? \n / ) ;
232+
233+ for ( const { ruleName, afterRule } of reorderings ) {
234+ const ruleBlock = findRuleBlock ( lines , ruleName ) ;
235+ lines . splice ( ruleBlock . startIndex , ruleBlock . endIndex - ruleBlock . startIndex ) ;
236+
237+ const afterRuleBlock = findRuleBlock ( lines , afterRule ) ;
238+ lines . splice ( afterRuleBlock . endIndex , 0 , ...ruleBlock . blockLines ) ;
239+ }
240+
241+ return lines . join ( "\n" ) ;
242+ }
243+
93244function processAbnfSource ( source ) {
94245 let processed = source ;
95246
96247 for ( const ruleName of INLINE_HEX_RULES ) {
97248 processed = inlineHexRuleAsLiteral ( processed , ruleName ) ;
98249 }
99250
251+ for ( const { targetRule, referencedRules } of INLINE_LITERAL_REFS ) {
252+ processed = inlineLiteralRefsInTargetRule ( processed , targetRule , referencedRules ) ;
253+ processed = removeRuleDefinitions ( processed , referencedRules ) ;
254+ }
255+
256+ processed = repositionRulesAfter ( processed , REPOSITION_RULES_AFTER ) ;
257+
100258 return processed ;
101259}
102260
261+ function postProcessGeneratedHtml ( htmlPath ) {
262+ const html = fs . readFileSync ( htmlPath , "utf8" ) ;
263+ const updated = html . replace ( / < h 1 > [ ^ < ] * < \/ h 1 > / , `<h1>${ FORCED_HTML_HEADER } </h1>` ) ;
264+
265+ if ( updated !== html ) {
266+ fs . writeFileSync ( htmlPath , updated , "utf8" ) ;
267+ }
268+ }
269+
103270const args = process . argv . slice ( 2 ) ;
104271const titleIndex = args . indexOf ( "--title" ) ;
105272
@@ -173,4 +340,15 @@ if (result.error) {
173340 process . exit ( 1 ) ;
174341}
175342
176- process . exit ( result . status === null ? 1 : result . status ) ;
343+ if ( result . status !== 0 ) {
344+ process . exit ( result . status === null ? 1 : result . status ) ;
345+ }
346+
347+ try {
348+ postProcessGeneratedHtml ( outputPath ) ;
349+ } catch ( error ) {
350+ console . error ( `Failed to post-process generated HTML: ${ error . message } ` ) ;
351+ process . exit ( 1 ) ;
352+ }
353+
354+ process . exit ( 0 ) ;
0 commit comments