JSONC-org
diff --git a/‎generate-railroad.js‎
Lines changed: 184 additions & 6 deletions b/‎generate-railroad.js‎
Lines changed: 184 additions & 6 deletions
diff --git a/‎grammar/jsonc.abnf‎ ‎grammar/JSONC.abnf‎grammar/jsonc.abnf renamed to grammar/JSONC.abnf
Lines changed: 26 additions & 29 deletions b/‎grammar/jsonc.abnf‎ ‎grammar/JSONC.abnf‎grammar/jsonc.abnf renamed to grammar/JSONC.abnf
Lines changed: 26 additions & 29 deletions
diff --git a/‎grammar/README.md‎
Lines changed: 4 additions & 4 deletions b/‎grammar/README.md‎
Lines changed: 4 additions & 4 deletions
@@ -5,17 +5,78 @@ const { spawnSync } = require("node:child_process");
 const path = require("node:path");
 
 // Customization section
-const DEFAULT_INPUT_ABNF = "grammar/jsonc.abnf";
+const DEFAULT_INPUT_ABNF = "grammar/JSONC.abnf";
 const DEFAULT_PROCESSED_ABNF = "grammar/jsonc-processed.abnf";
 const DEFAULT_OUTPUT_HTML = "grammar/railroad-diagram.html";
+const FORCED_HTML_HEADER = "JSONC GRAMMAR";
 
 // Rules to inline from their %x... definitions as literal ABNF strings.
 // Add more rule names here to apply the same transformation.
 const INLINE_HEX_RULES = [
   "multi-line-comment-start",
   "multi-line-comment-end",
   "asterisk",
-  "escape"
+  "escape",
+  "single-line-comment-start",
+  "quotation-mark",
+  "decimal-point",
+  "minus",
+  "plus",
+  "zero",
+];
+
+// Inline selected rule references as quoted literals in specific target rules.
+// Add more mappings here to reuse this transformation pattern.
+const INLINE_LITERAL_REFS = [
+  {
+    targetRule: "value",
+    referencedRules: ["false", "true", "null"],
+  },
+];
+
+// Move selected rule definitions after another rule in the processed ABNF.
+// Add more entries here to control rule ordering in generated output.
+const REPOSITION_RULES_AFTER = [
+  {
+    ruleName: "begin-array",
+    afterRule: "array",
+  },
+  {
+    ruleName: "end-array",
+    afterRule: "begin-array",
+  },
+  {
+    ruleName: "begin-object",
+    afterRule: "object",
+  },
+  {
+    ruleName: "end-object",
+    afterRule: "begin-object",
+  },
+  {
+    ruleName: "name-separator",
+    afterRule: "member",
+  },
+  {
+    ruleName: "value-separator",
+    afterRule: "value",
+  },
+  {
+    ruleName: "digit",
+    afterRule: "unescaped",
+  },
+  {
+    ruleName: "digit1-9",
+    afterRule: "digit",
+  },
+  {
+    ruleName: "hexdigit",
+    afterRule: "digit1-9",
+  },
+  {
+    ruleName: "four-hexdigits",
+    afterRule: "hexdigit",
+  }
 ];
 
 function escapeRegExp(value) {
@@ -36,6 +97,24 @@ function decodeAbnfHexSequence(value) {
   return String.fromCodePoint(...bytes);
 }
 
+function getHexRuleSequence(source, ruleName) {
+  const escapedRuleName = escapeRegExp(ruleName);
+  const ruleRegex = new RegExp(
+    `^\\s*${escapedRuleName}\\s*=\\s*(%x[0-9A-Fa-f]+(?:\\.[0-9A-Fa-f]+)*)\\b.*$`,
+    "m",
+  );
+  const ruleMatch = source.match(ruleRegex);
+  if (!ruleMatch) {
+    throw new Error(`Rule ${ruleName} was not found.`);
+  }
+
+  return ruleMatch[1];
+}
+
+function getHexRuleLiteral(source, ruleName) {
+  return decodeAbnfHexSequence(getHexRuleSequence(source, ruleName));
+}
+
 function inlineHexRuleAsLiteral(source, ruleName) {
   const escapedRuleName = escapeRegExp(ruleName);
   const ruleRegex = new RegExp(
@@ -50,10 +129,10 @@ function inlineHexRuleAsLiteral(source, ruleName) {
   const hexSequence = ruleMatch[1];
   const literalChars = decodeAbnfHexSequence(hexSequence);
 
-  // For backslash or other problematic characters, keep them as hex format
-  // ABNF doesn't support backslash escaping in quoted strings
+  // Keep hex format for characters that cannot be represented safely
+  // as a single ABNF quoted string literal.
   let replacement;
-  if (literalChars === "\\") {
+  if (literalChars === "\\" || literalChars === '"') {
     replacement = hexSequence;
   } else {
     // For other characters, escape only double quotes (not backslashes)
@@ -90,16 +169,104 @@ function inlineHexRuleAsLiteral(source, ruleName) {
     .join("\n");
 }
 
+function inlineLiteralRefsInTargetRule(source, targetRule, referencedRules) {
+  const escapedTargetRule = escapeRegExp(targetRule);
+  const targetRuleRegex = new RegExp(`^(\\s*${escapedTargetRule}\\s*=\\s*)(.*)$`, "m");
+  const match = source.match(targetRuleRegex);
+  if (!match) {
+    throw new Error(`Rule ${targetRule} was not found.`);
+  }
+
+  const targetRulePrefix = match[1];
+  const targetRuleRhs = match[2];
+
+  let updatedRhs = targetRuleRhs;
+  for (const referencedRule of referencedRules) {
+    const replacementLiteral = getHexRuleSequence(source, referencedRule);
+    const referencedRuleRegex = new RegExp(
+      `(?<![A-Za-z0-9-])${escapeRegExp(referencedRule)}(?![A-Za-z0-9-])`,
+      "g",
+    );
+    updatedRhs = updatedRhs.replace(referencedRuleRegex, replacementLiteral);
+  }
+
+  return source.replace(targetRuleRegex, `${targetRulePrefix}${updatedRhs}`);
+}
+
+function removeRuleDefinitions(source, ruleNames) {
+  const removalSet = new Set(ruleNames);
+
+  return source
+    .split(/\r?\n/)
+    .filter((line) => {
+      const match = line.match(/^\s*([A-Za-z][A-Za-z0-9-]*)\s*=/);
+      if (!match) {
+        return true;
+      }
+      return !removalSet.has(match[1]);
+    })
+    .join("\n");
+}
+
+function findRuleBlock(lines, ruleName) {
+  const ruleStartRegex = new RegExp(`^\\s*${escapeRegExp(ruleName)}\\s*=`);
+  const startIndex = lines.findIndex((line) => ruleStartRegex.test(line));
+  if (startIndex === -1) {
+    throw new Error(`Rule ${ruleName} was not found.`);
+  }
+
+  let endIndex = startIndex + 1;
+  while (endIndex < lines.length && /^\s/.test(lines[endIndex])) {
+    endIndex += 1;
+  }
+
+  return {
+    startIndex,
+    endIndex,
+    blockLines: lines.slice(startIndex, endIndex),
+  };
+}
+
+function repositionRulesAfter(source, reorderings) {
+  let lines = source.split(/\r?\n/);
+
+  for (const { ruleName, afterRule } of reorderings) {
+    const ruleBlock = findRuleBlock(lines, ruleName);
+    lines.splice(ruleBlock.startIndex, ruleBlock.endIndex - ruleBlock.startIndex);
+
+    const afterRuleBlock = findRuleBlock(lines, afterRule);
+    lines.splice(afterRuleBlock.endIndex, 0, ...ruleBlock.blockLines);
+  }
+
+  return lines.join("\n");
+}
+
 function processAbnfSource(source) {
   let processed = source;
 
   for (const ruleName of INLINE_HEX_RULES) {
     processed = inlineHexRuleAsLiteral(processed, ruleName);
   }
 
+  for (const { targetRule, referencedRules } of INLINE_LITERAL_REFS) {
+    processed = inlineLiteralRefsInTargetRule(processed, targetRule, referencedRules);
+    processed = removeRuleDefinitions(processed, referencedRules);
+  }
+
+  processed = repositionRulesAfter(processed, REPOSITION_RULES_AFTER);
+
   return processed;
 }
 
+function postProcessGeneratedHtml(htmlPath) {
+  const html = fs.readFileSync(htmlPath, "utf8");
+  const updated = html.replace(/<h1>[^<]*<\/h1>/, `<h1>${FORCED_HTML_HEADER}</h1>`);
+
+  if (updated !== html) {
+    fs.writeFileSync(htmlPath, updated, "utf8");
+  }
+}
+
 const args = process.argv.slice(2);
 const titleIndex = args.indexOf("--title");
 
@@ -173,4 +340,15 @@ if (result.error) {
   process.exit(1);
 }
 
-process.exit(result.status === null ? 1 : result.status);
+if (result.status !== 0) {
+  process.exit(result.status === null ? 1 : result.status);
+}
+
+try {
+  postProcessGeneratedHtml(outputPath);
+} catch (error) {
+  console.error(`Failed to post-process generated HTML: ${error.message}`);
+  process.exit(1);
+}
+
+process.exit(0);
@@ -1,38 +1,29 @@
 ; JSONC grammar with comments support (RFC 8259 extended with JavaScript-style comments)
 ;
 ; Notes:
-; - Rule names and structure follow RFC 8259 ABNF snippets.
-; - DIGIT and HEXDIG are core rules from RFC 5234.
-; - comments are an extension not in RFC 8259.
+; - Rule names and structure follow RFC 8259 ABNF.
+; - Comments are an extension not in RFC 8259.
 ; - Trailing commas are NOT supported in this grammar.
 
 ; A JSONC-text is a serialized value surrounded by optional whitespace and comments.
 ; Comments can appear anywhere insignificant whitespace is allowed in JSON.
 JSONC-text = wsc value wsc
 
 ; Whitespace with Comments: zero or more whitespace characters or comments
-wsc = *(ws-char / comment)
+wsc = *(ws-char / comment)   ; Whitespace and/or comments
 
 ; Single whitespace character (space, tab, line feed, carriage return)
 ws-char = %x20 / %x09 / %x0A / %x0D  ; space / tab / LF / CR
 
 ; Comments: single-line or multi-line
 comment = single-line-comment / multi-line-comment
 
-; Source character: any Unicode code point, as per ECMAScript.
-source-character = %x00-10FFFF
-
-; Comment terminators and sequences (based on ECMAScript line terminators)
-comment-terminator = %x0A / %x0D / %x2028 / %x2029          ; LF / CR / LS / PS
-comment-terminator-sequence = %x0D.0A / %x0A / %x0D / %x2028 / %x2029 
-
 ; Single-line comment: starts with //, continues until line ending
-; Terminator is not part of the comment body.
 ; Note that the single-line-comment-end is optional, allowing comments to end at the end of the file without a line terminator.
 single-line-comment-start = %x2F.2F             ; // double solidus
-single-line-comment-end = comment-terminator-sequence
+single-line-comment-end = %x0D.0A / %x0A / %x0D
 single-line-comment = single-line-comment-start *single-line-comment-char [ single-line-comment-end ]
-single-line-comment-char = %x00-09 / %x0B-0C / %x0E-2027 / %x202A-10FFFF ; Any source character except comment terminators
+single-line-comment-char = %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except CR and LF (line terminator)
 
 ; Multi-line comment: /* ... */
 ; Cannot be nested. The first */ closes the comment.
@@ -58,7 +49,7 @@ name-separator  = wsc %x3A wsc  ; : colon
 value-separator = wsc %x2C wsc  ; , comma
 
 ; Any JSON value
-value = false / null / true / object / array / number / string
+value = object / array / number / string / true / false / null
 
 ; Literal names (boolean values and null)
 false = %x66.61.6C.73.65   ; false
@@ -73,31 +64,37 @@ member = string name-separator value
 array = begin-array [ value *( value-separator value ) ] end-array
 
 ; Numbers
-number = [ minus ] int [ frac ] [ exp ]
+number = [ minus ] ( zero / ( digit1-9 *digit ) ) [ decimal-point 1*digit ] [ ( %x65 / %x45 ) [ minus / plus ] 1*digit ]
 decimal-point = %x2E        ; .
+digit = %x30-39          ; 0-9
 digit1-9 = %x31-39          ; 1-9
-e = %x65 / %x45             ; e E
-exp = e [ minus / plus ] 1*DIGIT
-frac = decimal-point 1*DIGIT
-int = zero / ( digit1-9 *DIGIT )
+
 minus = %x2D                ; -
 plus = %x2B                 ; +
 zero = %x30                 ; 0
+hexdigit = digit / 
+  %x41 / %x61 /             ; A a
+  %x42 / %x62 /             ; B b
+  %x43 / %x63 /             ; C c
+  %x44 / %x64 /             ; D d
+  %x45 / %x65 /             ; E e
+  %x46 / %x66               ; F f
+four-hexdigits = 4hexdigit
 
 ; Strings
 string = quotation-mark *char quotation-mark
 
 char = unescaped /
  escape (
- %x22 /             ; "    quotation mark  U+0022
- %x5C /             ; \    reverse solidus U+005C
- %x2F /             ; /    solidus         U+002F
- %x62 /             ; b    backspace       U+0008
- %x66 /             ; f    form feed       U+000C
- %x6E /             ; n    line feed       U+000A
- %x72 /             ; r    carriage return U+000D
- %x74 /             ; t    tab             U+0009
- %x75 4HEXDIG       ; uXXXX                U+XXXX
+ %x22 /              ; "    quotation mark  U+0022
+ %x5C /              ; \    reverse solidus U+005C
+ %x2F /              ; /    solidus         U+002F
+ %x62 /              ; b    backspace       U+0008
+ %x66 /              ; f    form feed       U+000C
+ %x6E /              ; n    line feed       U+000A
+ %x72 /              ; r    carriage return U+000D
+ %x74 /              ; t    tab             U+0009
+ %x75 four-hexdigits ; uXXXX                U+XXXX
  )
 
 escape = %x5C               ; \
 
@@ -4,7 +4,7 @@ This directory contains the ABNF grammar for JSONC, along with plans for generat
 
 ## Railroad Diagram Generation Plan
 
-Generate railroad diagrams from `grammar/jsonc.abnf` using a simple one-file Node.js script.
+Generate railroad diagrams from `grammar/JSONC.abnf` using a simple one-file Node.js script.
 
 Instead of building a custom ABNF parser and converter to Tab Atkins constructor calls, use:
 
@@ -18,7 +18,7 @@ The wrapper script should:
 
 1. Accept input ABNF path and optional output HTML path.
 2. Default to:
-   - input: `grammar/jsonc.abnf`
+   - input: `grammar/JSONC.abnf`
    - output: `grammar/railroad-diagram.html`
 3. Optionally accept `--title` to set the HTML title.
 4. Execute the upstream CLI from our installed dependency.
@@ -53,13 +53,13 @@ npm run railroad
 Generate from a specific input and output:
 
 ```bash
-npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html
+npm run railroad -- grammar/JSONC.abnf grammar/railroad-diagram.html
 ```
 
 Generate with a custom title:
 
 ```bash
-npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html --title "JSONC Grammar"
+npm run railroad -- grammar/JSONC.abnf grammar/railroad-diagram.html --title "JSONC Grammar"
 ```
 
 ### Notes on EOF for single-line comments