|
| 1 | +## JSON-Z Grammar |
| 2 | + |
| 3 | +Features which exceed the JSON specification are flagged with the minimal feature set required for parsing them, in a hierarchy from least to most permissive: JSONC, JSON5, JSON-Z. |
| 4 | + |
| 5 | +### Overview |
| 6 | + |
| 7 | +```plantuml |
| 8 | +@startebnf |
| 9 | +
|
| 10 | +value = inert-content, ( primitive | array | object | extended-type (* JSON-Z *) ), inert-content ; |
| 11 | +
|
| 12 | +inert-content = { whitespace | comment (* JSONC *) } ; |
| 13 | +
|
| 14 | +primitive = "null" | "undefined" (* JSON-Z *) | "true" | "false" | number | string ; |
| 15 | +
|
| 16 | +array = "[", ( inert-content | ( (simple-array-content | sparse-array-content (* JSON-Z *) ), [ "," (* JSON5 *) ] ) ), "]"; |
| 17 | +
|
| 18 | +simple-array-content = value, { ",", value } ; |
| 19 | +
|
| 20 | +sparse-array-content (* JSON-Z *) = value, { { "," }, value }; |
| 21 | +
|
| 22 | +(* Note: The experimental hidden array properties feature is not diagrammed above. *) |
| 23 | +
|
| 24 | +object = "{", ( inert-content | ( object-content, [ "," (* JSON5 *) ] ) ), "}" ; |
| 25 | +
|
| 26 | +object-content = key-value-pair, { ",", key-value-pair } ; |
| 27 | +
|
| 28 | +key-value-pair = ( string | identifier (* JSON5 *) ), inert-content, ":", value ; |
| 29 | +
|
| 30 | +identifier = identifier-start, { identifier-character }; |
| 31 | +
|
| 32 | +(* Indentiers are in accord with the ECMAScript 5.1 specification: |
| 33 | + https://262.ecma-international.org/5.1/#sec-7.6 |
| 34 | +
|
| 35 | +Note: Identifiers can also use \uXXXX-style escapes with codepoints corresponding |
| 36 | +to valid identifier characters. For example: |
| 37 | +
|
| 38 | +{\u0061: 200} is the same as {a: 200} *) |
| 39 | +
|
| 40 | +@endebnf |
| 41 | +``` |
| 42 | + |
| 43 | +### Basic character classes |
| 44 | + |
| 45 | +```plantuml |
| 46 | +@startebnf |
| 47 | +
|
| 48 | +whitespace = { "\t" (* tab *) | "\n" (* newline *) | "\f" (* form-feed / JSON5 *) | "\r" (* return *) | " " (* space *) | "\v" (* vertical tab / JSON5 *) | ? Unicode whitespace character ? (* JSON5 *) }- ; |
| 49 | +
|
| 50 | +(* Unicode whitespace: https://www.compart.com/en/unicode/category/Zs *) |
| 51 | +
|
| 52 | +sign = "+" | "-" ; |
| 53 | +
|
| 54 | +binary-digit = 0-1 ; |
| 55 | +
|
| 56 | +octal-digit = 0-7 ; |
| 57 | +
|
| 58 | +decimal-digit = 0-9 ; |
| 59 | +
|
| 60 | +non-zero-digit = 1-9; |
| 61 | +
|
| 62 | +non-octal-digit = 8-9 ; |
| 63 | +
|
| 64 | +hex-digit = 0-9A-Fa-f ; |
| 65 | +
|
| 66 | +safe-string-character = ? All Unicode matching /[^\x00\x0A\x0D\x22\x27\x5C\x60\u2028\u2029]/ ? ; |
| 67 | +
|
| 68 | +(* Note: The above regex matches all characters other than NUL, LF, CR, quote, |
| 69 | +single-quote, backslash, backtick, LINE SEPARATOR, and PARAGRAPH SEPARATOR. *) |
| 70 | +
|
| 71 | +identifier-start = A-Za-z | "_" | "$" | ? Unicode letter ? ; |
| 72 | +
|
| 73 | +(* Unicode letter (comprised of five categories): |
| 74 | + https://www.compart.com/en/unicode/category/Ll |
| 75 | + https://www.compart.com/en/unicode/category/Lm |
| 76 | + https://www.compart.com/en/unicode/category/Lo |
| 77 | + https://www.compart.com/en/unicode/category/Lt |
| 78 | + https://www.compart.com/en/unicode/category/Lu |
| 79 | +*) |
| 80 | +
|
| 81 | +identifier-character = identifier-start | decimal-digit | "\u200C" (* ZERO WIDTH NON-JOINER *) | "\u200D" (* ZERO WIDTH JOINER *) | ? Unicode spacing combining mark ? | ? Unicode non-spacing mark ? | ? Unicode decimal digit number ? | ? Unicode connector punctuation ? ; |
| 82 | +
|
| 83 | +(* Unicode spacing combining mark: https://www.compart.com/en/unicode/category/Mc |
| 84 | +Unicode non-spacing mark: https://www.compart.com/en/unicode/category/Mc |
| 85 | +Unicode decimal digit number: hhttps://www.compart.com/en/unicode/category/Nd |
| 86 | +Unicode connector punctuation: https://www.compart.com/en/unicode/category/Pc *) |
| 87 | +
|
| 88 | +@endebnf |
| 89 | +``` |
| 90 | + |
| 91 | +### Numbers |
| 92 | + |
| 93 | +```plantuml |
| 94 | +@startebnf |
| 95 | +
|
| 96 | +number = [sign], unsigned-number ; |
| 97 | +
|
| 98 | +unsigned-number = integer | floating | symbolic-number (* JSON5 *) ; |
| 99 | +
|
| 100 | +integer = ( binary (* JSON-Z *) | octal (* JSON-Z *) | decimal | hex ), [ "n" ] (* JSON-Z *) ; |
| 101 | +
|
| 102 | +binary = "0b", binary-digit, { [ "_" ], binary-digit } ; |
| 103 | +
|
| 104 | +octal = explicit-octal | implied-octal ; |
| 105 | +
|
| 106 | +explicit-octal = "0o", octal-digit, { [ "_" ], octal-digit } ; |
| 107 | +
|
| 108 | +implied-octal = "0", octal-digit, { [ "_" ], octal-digit } ; |
| 109 | +
|
| 110 | +decimal = "0", [ decimal-sequence, [ "_" ] ], { non-octal-sequence }-, [ "_" ], [ decimal-sequence ] | non-zero-digit, [ [ "_" ], decimal-sequence ] ; |
| 111 | +
|
| 112 | +decimal-sequence = decimal-digit, { [ "_" ], decimal-digit } ; |
| 113 | +
|
| 114 | +non-octal-sequence = non-octal-digit, { [ "_" ], non-octal-digit } ; |
| 115 | +
|
| 116 | +hex = "0x", hex-digit, { [ "_" ], hex-digit } ; |
| 117 | +
|
| 118 | +floating = ( ( ( decimal-sequence, ".", [ decimal-sequence ] | ".", decimal-sequence ), [ exponent ] ) | decimal-sequence, exponent ), [ "d" (* JSON-Z *) | "n" (* JSON-Z / integer only *) | "m" (* JSON-Z *) ] ; |
| 119 | +
|
| 120 | +(* "n" suffix only allowed if the preceding value resolves to an integer, e.g. 1.2e10n *) |
| 121 | +
|
| 122 | +(* Note: leading or trailing decimal point requires JSON5 or JSON-Z *) |
| 123 | +
|
| 124 | +exponent = ("E" | 'e'), [ sign ], { decimal-digit }- ; |
| 125 | +
|
| 126 | +symbolic-number = ("NaN" | "Infinity"), [ "_d" (* JSON-Z *) | "_m" (* JSON-Z *) ] ; |
| 127 | +
|
| 128 | +@endebnf |
| 129 | +``` |
| 130 | + |
| 131 | +### Simplified numbers |
| 132 | + |
| 133 | +_This is what the number productions look like when the complications of underscore separators, leading/trailing decimal points, and implicit octal numbers are removed._ |
| 134 | + |
| 135 | +```plantuml |
| 136 | +@startebnf |
| 137 | +
|
| 138 | +binary (* JSON-Z *) = "0b", { binary-digit}- ; |
| 139 | +
|
| 140 | +octal (* JSON-Z *) = "0o", { octal-digit }- ; |
| 141 | +
|
| 142 | +decimal = { decimal-digit }- ; |
| 143 | +
|
| 144 | +hex = "0x", { hex-digit }- ; |
| 145 | +
|
| 146 | +floating = { decimal-digit }-, ( ( ".", { decimal-digit }-, [ exponent ]) | ( exponent ) ), [ "d" (* JSON-Z *) | "n" (* JSON-Z / integer only *) | "m" (* JSON-Z *) ] ; |
| 147 | +
|
| 148 | +exponent = ("E" | 'e'), [ sign ], { decimal-digit }- ; |
| 149 | +
|
| 150 | +@endebnf |
| 151 | +``` |
| 152 | + |
| 153 | +### Strings |
| 154 | + |
| 155 | +```plantuml |
| 156 | +@startebnf |
| 157 | +
|
| 158 | +string = double-quoted-string | single-quoted-string | backtick-quoted-string (* JSON-Z *) ; |
| 159 | +
|
| 160 | +double-quoted-string = '"', { safe-string-character | "'" | "`" | escape }, '"'; |
| 161 | +
|
| 162 | +single-quoted-string = "'", { safe-string-character | '"' | "`" | escape }, "'"; |
| 163 | +
|
| 164 | +backtick-quoted-string = "`", { safe-string-character (* Note: the sequence ${ must be escaped as $\{ *) | "'" | '"' | escape }, "`"; |
| 165 | +
|
| 166 | +escape = "\", ( simple-escape | short-escape (* JSON5 *) | unicode-escape ) ; |
| 167 | +
|
| 168 | +simple-escape = '"' | "'" | "`" | "0" | "b" | "f" | "n" | "r" | "t" | "v" | "\" | "\n" | "\r\n" | "\r" ; |
| 169 | +
|
| 170 | +(* quote, single-quote, backtick, null, backspace, form feed, newline, return, tab, |
| 171 | +vertical tab, backslash, LF, CRLF, CR *) |
| 172 | +
|
| 173 | +(* Note: \0 cannot precede a 0-9 digit in a string. Use \x00 or \u0000 instead. *) |
| 174 | +
|
| 175 | +short-escape = "x", 2 * hex-digit ; |
| 176 | +
|
| 177 | +unicode-escape = "u", 4 * hex-digit ; |
| 178 | +@endebnf |
| 179 | +``` |
| 180 | + |
| 181 | +### Comments |
| 182 | + |
| 183 | +```plantuml |
| 184 | +@startebnf |
| 185 | +
|
| 186 | +comment (* JSONC *) = block-comment | line-comment ; |
| 187 | +
|
| 188 | +block-comment = "/*", ? any characters not containing the sequence "*/" ?, "*/"; |
| 189 | +
|
| 190 | +line-comment = "//", ? any characters other than \n or \r ?, ( "\n" | "\r\n" | "\r" ) ; |
| 191 | +
|
| 192 | +@endebnf |
| 193 | +``` |
| 194 | + |
| 195 | +### Extended types |
| 196 | + |
| 197 | +The form of an extended type mirrors that of a JavaScript function call, allowing these values to be parsed as JSONP, via the JavaScript `eval` function, or via the safer JavaScript `new Function(...)` technique, so long as evaluation takes place in a context where the named functions are defined. |
| 198 | + |
| 199 | +The `JSONZ.globalizeTypeHandlers()` function can be used to establish such a context. |
| 200 | + |
| 201 | +```plantuml |
| 202 | +@startebnf |
| 203 | +
|
| 204 | +extended-type (* JSON-Z *) = type-prefix, (built-in | identifier), inert-content, "(", value, ")"; |
| 205 | +
|
| 206 | +type-prefix = "_" ; |
| 207 | +
|
| 208 | +(* Note: A single underscore is the default type-prefix, but this can be customized. *) |
| 209 | +
|
| 210 | +custom-type-prefix = "_", { "_" | "$" | 0-9A-Za-z }, "_" ; |
| 211 | +
|
| 212 | +built-in = "BigDecimal" | "BigInt" | "Date" | "Decimal" | "Map" | "RegExp" | "Set" | "Uint8Array" ; |
| 213 | +
|
| 214 | +@endebnf |
| 215 | +``` |
0 commit comments