@@ -172,29 +172,37 @@ function __hextodec(h) {
172172 return 256 * __HEX[substr(h, 1, 2)] + __HEX[substr(h, 3)]
173173}
174174
175- function __unescape(s, i, s2, c, u, h) {
176- i = match(s, /\\([bfnrt"\\\/]|u[0-9a-fA-F]{4})/)
177- if (!i) return s
178- s2 = ""
179- while (i) {
180- c = substr(s, RSTART, RLENGTH)
175+ function __error(msg) {
176+ printf "%s: %s\n", __ARGV0, msg >"/dev/stderr"
177+ exit 1
178+ }
179+
180+ function __unescape(s, i, out, c, u, h, l) {
181+ out = ""
182+ while ((i = match(s, /\\/))) {
183+ c = substr(s, i, 2)
181184 if (c in __UNESCAPE) u = __UNESCAPE[c]
182- else {
185+ else if (match(substr(s, i), /^\\u[0-9a-fA-F]{4}/)) {
186+ c = substr(s, i, RLENGTH)
183187 h = __hextodec(substr(c, 3))
184188 if (h >= 55296 && h <= 56319) {
185- c = substr(s, RSTART + RLENGTH, 6)
186- RLENGTH += 6
187- h = 65536 + ((h - 55296) * 1024) + \
188- (__hextodec(substr(c, 3)) - 56320)
189+ if (!match( \
190+ substr(s, i+length(c)),
191+ /^\\u[0-9a-fA-F]{4}/ \
192+ ))
193+ __error("unpaired high surrogate " c)
194+ c = c substr(s, i+length(c), RLENGTH)
195+ l = __hextodec(substr(c, 9))
196+ if (l < 56320 || l > 57343)
197+ __error("invalid surrogate pair" c)
198+ h = 65536 + ((h - 55296) * 1024) + (l - 56320)
189199 }
190200 u = __utf8enc(h)
191- }
192- s2 = s2 substr(s, 1, RSTART - 1) u
193- s = substr(s, RSTART + RLENGTH)
194- i = match(s, /\\([bfnrt"\\\/]|u[0-9a-fA-F]{4})/)
201+ } else __error("invalid json escape sequence " c)
202+ out = out substr(s, 1, i-1) u
203+ s = substr(s, i+length(c))
195204 }
196- s2 = s2 s
197- return s2
205+ return out s
198206}
199207
200208function keys(a, o, n, i) {
@@ -219,20 +227,13 @@ function keys(a, o, n, i) {
219227 return n
220228}
221229
222- function __error(t) {
223- printf "%s: unexpected token %s\n", __ARGV0, t >"/dev/stderr"
224- exit 1
230+ function __terror(t) {
231+ __error("unexpected token " t)
225232}
226233
227234function __get_token(t) {
228- if (getline t == -1) {
229- printf "%s: read error\n", __ARGV0 >"/dev/stderr"
230- exit 1
231- }
232- if (t == "") {
233- printf "%s: unexpected EOF\n", __ARGV0 >"/dev/stderr"
234- exit 1
235- }
235+ if (getline t == -1) __error("read error")
236+ if (t == "") __error("unexpected EOF")
236237 return t
237238}
238239
@@ -243,13 +244,13 @@ function __parse_array(path, i, sep, raw_value, value) {
243244 while (sep != "]") {
244245 value = __get_token()
245246 if (value == "]") {
246- if (sep) __error (value)
247+ if (sep) __terror (value)
247248 raw_value = raw_value value
248249 break
249250 }
250251 value = __parse_value(value, __getpath(path, ++i))
251252 sep = __get_token()
252- if (sep != "," && sep != "]") __error (sep)
253+ if (sep != "," && sep != "]") __terror (sep)
253254 raw_value = raw_value value sep
254255 }
255256 _[__getpath(path, "length")] = i
@@ -288,7 +289,7 @@ function __parse_value(value, path, raw_value, start, type) {
288289 type = "number"
289290 }
290291 else {
291- __error (value)
292+ __terror (value)
292293 }
293294 if (path == "" && path == 0)
294295 _[0] = value
@@ -311,18 +312,18 @@ function __parse_object(path, sep, i, raw_value, key, colon, value, raw_key) {
311312 while (sep != "}") {
312313 key = __get_token()
313314 if (key == "}") {
314- if (sep) __error (key)
315+ if (sep) __terror (key)
315316 raw_value = raw_value key
316317 break
317318 }
318- if (length(key) < 2 || substr(key, 1, 1) != "\"") __error (key)
319+ if (length(key) < 2 || substr(key, 1, 1) != "\"") __terror (key)
319320 raw_key = key
320321 key = substr(key, 2, length(key) - 2)
321322 colon = __get_token()
322- if (colon != ":") __error (colon)
323+ if (colon != ":") __terror (colon)
323324 value = __parse_value(__get_token(), __getpath(path, key))
324325 sep = __get_token()
325- if (sep != "," && sep != "}") __error (sep)
326+ if (sep != "," && sep != "}") __terror (sep)
326327 raw_value = raw_value raw_key colon value sep
327328 ++i
328329 _[__getpath(path, __KEYS SUBSEP i)] = key
0 commit comments