Skip to content

Commit 739ac54

Browse files
committed
Fixes in parsing of malformed JSON
1 parent de26ba0 commit 739ac54

3 files changed

Lines changed: 97 additions & 7 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
## next
2+
3+
- Refactored `parseChunked()` from class-based to function-based implementation, as a result a bit smaller and faster
4+
- `parseChunked()`:
5+
- Fixed OOM on very long arrays (corner case, millions of elements)
6+
- Fixed crash on malformed top-level closing tokens
7+
- Fixed handling of empty input and input with only whitespaces
8+
- Fixed re-parsing a second top-level value; now extra non-whitespace after a complete root fails gracefully as parse error
9+
- Fixed handling of trailing whitespace after a complete root value
10+
111
## 0.6.3 (2024-10-24)
212

313
- Fixed an issue with `types` in the `exports` of `package.json` that introduced in version `0.6.2`

src/parse-chunked.js

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,19 @@ function createChunkParser() {
172172
if (flushDepth > 0) {
173173
parseAndAppend(prepareAddition(fragment), true);
174174
} else {
175-
// That's an entire value on a top level
176-
value = JSON.parse(fragment);
177-
valueStack = {
178-
value,
179-
key: null,
180-
prev: null
181-
};
175+
if (valueStack === null) {
176+
// That's an entire value on a top level
177+
value = JSON.parse(fragment);
178+
valueStack = {
179+
value,
180+
key: null,
181+
prev: null
182+
};
183+
} else if (/\S/.test(fragment)) {
184+
// Extra non-whitespace after complete root value should fail to parse
185+
jsonParseOffset -= 3;
186+
JSON.parse('[[]' + fragment);
187+
}
182188
}
183189
} else if (flushDepth > lastFlushDepth) {
184190
// Add missed closing brackets/parentheses
@@ -346,6 +352,13 @@ function createChunkParser() {
346352
flushPoint = i + 1;
347353
flushDepth--;
348354

355+
// Unmatched closing bracket/brace at top level
356+
if (flushDepth < 0) {
357+
flushDepth = lastFlushDepth;
358+
flush(chunk, lastFlushPoint, flushPoint);
359+
return;
360+
}
361+
349362
if (flushDepth < lastFlushDepth) {
350363
flush(chunk, lastFlushPoint, flushPoint);
351364
lastFlushPoint = flushPoint;

src/parse-chunked.test.js

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,30 @@ describe('parseChunked()', () => {
128128
});
129129

130130
describe('errors', () => {
131+
it('unmatched closing bracket at start', () =>
132+
assert.rejects(
133+
() => parseChunked([']']),
134+
/Unexpected token ] in JSON at position 0|Unexpected token ']'(, "]" is not valid JSON)?/
135+
)
136+
);
137+
it('unmatched closing brace at start', () =>
138+
assert.rejects(
139+
() => parseChunked(['}']),
140+
/Unexpected token } in JSON at position 0|Unexpected token '}'(, "}" is not valid JSON)?/
141+
)
142+
);
143+
it('extra token after complete value', () =>
144+
assert.rejects(
145+
() => parseChunked(['[] true']),
146+
/(Unexpected token t in JSON at position 3|Unexpected token t in JSON at position 6|Unexpected non-whitespace character after JSON at position 2|Expected ',' or ']' after array element in JSON at position 3)/
147+
)
148+
);
149+
it('extra opening after root', () =>
150+
assert.rejects(
151+
() => parseChunked(['{}[']),
152+
/(Unexpected token \[ in JSON at position 2|Unexpected non-whitespace character after JSON at position 2)/
153+
)
154+
);
131155
it('abs pos across chunks', () =>
132156
assert.rejects(
133157
async () => await parse(['{"test":"he', 'llo",}']),
@@ -172,6 +196,49 @@ describe('parseChunked()', () => {
172196
);
173197
});
174198

199+
describe('trailing whitespace after full value', () => {
200+
it('spaces and newlines after array', async () => {
201+
const actual = await parse(['[1,2]\n\n \t ']);
202+
assert.deepStrictEqual(actual, [1, 2]);
203+
});
204+
it('split chunks with trailing whitespace', async () => {
205+
const actual = await parse(['[1,2]', ' ', '\n\t']);
206+
assert.deepStrictEqual(actual, [1, 2]);
207+
});
208+
});
209+
210+
describe('chunk boundary for escapes and multi-byte utf-8', () => {
211+
it('escaped quote split', async () => {
212+
const actual = await parse(['"hello \\"', 'world"']);
213+
assert.deepStrictEqual(actual, 'hello "world');
214+
});
215+
it('backslash escape split across chunks', async () => {
216+
// create a string with a literal backslash then a quote and more text: "foo \"bar"
217+
const chunks = ['"foo \\"', 'bar"'];
218+
const actual = await parse(chunks);
219+
assert.deepStrictEqual(actual, 'foo "bar');
220+
});
221+
it('multi-byte emoji split across chunks', async () => {
222+
const json = JSON.stringify('a😅b');
223+
// split inside surrogate pair intentionally
224+
const first = json.slice(0, 4); // "a
225+
const middle = json.slice(4, 6); // first part of surrogate maybe
226+
const rest = json.slice(6);
227+
const actual = await parse([first, middle, rest]);
228+
assert.deepStrictEqual(actual, 'a😅b');
229+
});
230+
it('multi-byte via Uint8Array boundary', async () => {
231+
const str = '"start 🤓 end"';
232+
const enc = new TextEncoder().encode(str);
233+
// slice across multi-byte boundary of 🤓 (U+1F913)
234+
const idx = enc.indexOf(0xF0); // start of 4-byte sequence
235+
const part1 = enc.slice(0, idx + 2); // cut in middle of sequence
236+
const part2 = enc.slice(idx + 2);
237+
const actual = await parseChunked([part1, part2]);
238+
assert.deepStrictEqual(actual, 'start 🤓 end');
239+
});
240+
});
241+
175242
describe('use with buffers', () => {
176243
const input = '[1234,{"🤓\\uD800\\uDC00":"🤓\\uD800\\uDC00\\u006f\\ufffd\\uffff\\ufffd"}]';
177244
const expected = [1234, { '🤓\uD800\uDC00': '🤓\uD800\uDC00\u006f\ufffd\uffff\ufffd' }];

0 commit comments

Comments
 (0)