Fixes in parsing of malformed JSON

lahmatiy · lahmatiy · commit 739ac54cbc8b · 2026-02-27T13:53:05.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,13 @@
+## next
+
+- Refactored `parseChunked()` from class-based to function-based implementation, as a result a bit smaller and faster
+- `parseChunked()`:
+  - Fixed OOM on very long arrays (corner case, millions of elements)
+  - Fixed crash on malformed top-level closing tokens
+  - Fixed handling of empty input and input with only whitespaces
+  - Fixed re-parsing a second top-level value; now extra non-whitespace after a complete root fails gracefully as parse error
+  - Fixed handling of trailing whitespace after a complete root value
+
 ## 0.6.3 (2024-10-24)
 
 - Fixed an issue with `types` in the `exports` of `package.json` that introduced in version `0.6.2`
diff --git a/src/parse-chunked.js b/src/parse-chunked.js
@@ -172,13 +172,19 @@ function createChunkParser() {
             if (flushDepth > 0) {
                 parseAndAppend(prepareAddition(fragment), true);
             } else {
-                // That's an entire value on a top level
-                value = JSON.parse(fragment);
-                valueStack = {
-                    value,
-                    key: null,
-                    prev: null
-                };
+                if (valueStack === null) {
+                    // That's an entire value on a top level
+                    value = JSON.parse(fragment);
+                    valueStack = {
+                        value,
+                        key: null,
+                        prev: null
+                    };
+                } else if (/\S/.test(fragment)) {
+                    // Extra non-whitespace after complete root value should fail to parse
+                    jsonParseOffset -= 3;
+                    JSON.parse('[[]' + fragment);
+                }
             }
         } else if (flushDepth > lastFlushDepth) {
             // Add missed closing brackets/parentheses
@@ -346,6 +352,13 @@ function createChunkParser() {
                     flushPoint = i + 1;
                     flushDepth--;
 
+                    // Unmatched closing bracket/brace at top level
+                    if (flushDepth < 0) {
+                        flushDepth = lastFlushDepth;
+                        flush(chunk, lastFlushPoint, flushPoint);
+                        return;
+                    }
+
                     if (flushDepth < lastFlushDepth) {
                         flush(chunk, lastFlushPoint, flushPoint);
                         lastFlushPoint = flushPoint;
diff --git a/src/parse-chunked.test.js b/src/parse-chunked.test.js
@@ -128,6 +128,30 @@ describe('parseChunked()', () => {
     });
 
     describe('errors', () => {
+        it('unmatched closing bracket at start', () =>
+            assert.rejects(
+                () => parseChunked([']']),
+                /Unexpected token ] in JSON at position 0|Unexpected token ']'(, "]" is not valid JSON)?/
+            )
+        );
+        it('unmatched closing brace at start', () =>
+            assert.rejects(
+                () => parseChunked(['}']),
+                /Unexpected token } in JSON at position 0|Unexpected token '}'(, "}" is not valid JSON)?/
+            )
+        );
+        it('extra token after complete value', () =>
+            assert.rejects(
+                () => parseChunked(['[] true']),
+                /(Unexpected token t in JSON at position 3|Unexpected token t in JSON at position 6|Unexpected non-whitespace character after JSON at position 2|Expected ',' or ']' after array element in JSON at position 3)/
+            )
+        );
+        it('extra opening after root', () =>
+            assert.rejects(
+                () => parseChunked(['{}[']),
+                /(Unexpected token \[ in JSON at position 2|Unexpected non-whitespace character after JSON at position 2)/
+            )
+        );
         it('abs pos across chunks', () =>
             assert.rejects(
                 async () => await parse(['{"test":"he', 'llo",}']),
@@ -172,6 +196,49 @@ describe('parseChunked()', () => {
         );
     });
 
+    describe('trailing whitespace after full value', () => {
+        it('spaces and newlines after array', async () => {
+            const actual = await parse(['[1,2]\n\n  \t  ']);
+            assert.deepStrictEqual(actual, [1, 2]);
+        });
+        it('split chunks with trailing whitespace', async () => {
+            const actual = await parse(['[1,2]', '   ', '\n\t']);
+            assert.deepStrictEqual(actual, [1, 2]);
+        });
+    });
+
+    describe('chunk boundary for escapes and multi-byte utf-8', () => {
+        it('escaped quote split', async () => {
+            const actual = await parse(['"hello \\"', 'world"']);
+            assert.deepStrictEqual(actual, 'hello "world');
+        });
+        it('backslash escape split across chunks', async () => {
+            // create a string with a literal backslash then a quote and more text: "foo \"bar"
+            const chunks = ['"foo \\"', 'bar"'];
+            const actual = await parse(chunks);
+            assert.deepStrictEqual(actual, 'foo "bar');
+        });
+        it('multi-byte emoji split across chunks', async () => {
+            const json = JSON.stringify('a😅b');
+            // split inside surrogate pair intentionally
+            const first = json.slice(0, 4); // "a
+            const middle = json.slice(4, 6); // first part of surrogate maybe
+            const rest = json.slice(6);
+            const actual = await parse([first, middle, rest]);
+            assert.deepStrictEqual(actual, 'a😅b');
+        });
+        it('multi-byte via Uint8Array boundary', async () => {
+            const str = '"start 🤓 end"';
+            const enc = new TextEncoder().encode(str);
+            // slice across multi-byte boundary of 🤓 (U+1F913)
+            const idx = enc.indexOf(0xF0); // start of 4-byte sequence
+            const part1 = enc.slice(0, idx + 2); // cut in middle of sequence
+            const part2 = enc.slice(idx + 2);
+            const actual = await parseChunked([part1, part2]);
+            assert.deepStrictEqual(actual, 'start 🤓 end');
+        });
+    });
+
     describe('use with buffers', () => {
         const input = '[1234,{"🤓\\uD800\\uDC00":"🤓\\uD800\\uDC00\\u006f\\ufffd\\uffff\\ufffd"}]';
         const expected = [1234, { '🤓\uD800\uDC00': '🤓\uD800\uDC00\u006f\ufffd\uffff\ufffd' }];