Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 64 additions & 54 deletions Parser.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,8 @@
'use strict';

const Utf8Stream = require('./utils/Utf8Stream');

const patterns = {
value1: /^(?:[\"\{\[\]\-\d]|true\b|false\b|null\b|\s{1,256})/,
string: /^(?:[^\"\\]{1,256}|\\[bfnrt\"\\\/]|\\u[\da-fA-F]{4}|\")/,
key1: /^(?:[\"\}]|\s{1,256})/,
colon: /^(?:\:|\s{1,256})/,
comma: /^(?:[\,\]\}]|\s{1,256})/,
ws: /^\s{1,256}/,
numberStart: /^\d/,
numberDigit: /^\d{0,256}/,
numberFraction: /^[\.eE]/,
numberExponent: /^[eE]/,
numberExpSign: /^[-+]/
};
const MAX_PATTERN_SIZE = 16;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this const name is now confusing (compared to DEFAULT_PATTERN_SIZE) but I couldn't find a better name

const DEFAULT_PATTERN_SIZE = 256;

let noSticky = true;
try {
Expand All @@ -25,17 +12,35 @@ try {
// suppress
}

!noSticky &&
Object.keys(patterns).forEach(key => {
let src = patterns[key].source.slice(1); // lop off ^
if (src.slice(0, 3) === '(?:' && src.slice(-1) === ')') {
src = src.slice(3, -1);
}
patterns[key] = new RegExp(src, 'y');
});
const getPatterns = (patternSize) => {
const patterns = {
value1: new RegExp(`^(?:[\"\\{\\[\\]\\-\\d]|true\\b|false\\b|null\\b|\\s{1,${patternSize}})`),
string: new RegExp(`^(?:[^\"\\\\]{1,${patternSize}}|\\\\[bfnrt\"\\\\\\/]|\\\\u[\\da-fA-F]{4}|\\")`),
key1: new RegExp(`^(?:[\"\\}]|\\s{1,${patternSize}})`),
colon: new RegExp(`^(?:\\:|\\s{1,${patternSize}})`),
comma: new RegExp(`^(?:[\\,\\]\\}]|\\s{1,${patternSize}})`),
ws: new RegExp(`^\\s{1,${patternSize}}`),
numberStart: /^\d/,
numberDigit: new RegExp(`^\\d{0,${patternSize}}`),
numberFraction: /^[\.eE]/,
numberExponent: /^[eE]/,
numberExpSign: /^[-+]/
};

!noSticky &&
Object.keys(patterns).forEach(key => {
let src = patterns[key].source.slice(1); // lop off ^
if (src.slice(0, 3) === '(?:' && src.slice(-1) === ')') {
src = src.slice(3, -1);
}
patterns[key] = new RegExp(src, 'y');
});

patterns.numberFracStart = patterns.numberExpStart = patterns.numberStart;
patterns.numberFracDigit = patterns.numberExpDigit = patterns.numberDigit;
patterns.numberFracStart = patterns.numberExpStart = patterns.numberStart;
patterns.numberFracDigit = patterns.numberExpDigit = patterns.numberDigit;

return patterns;
}

const values = {true: true, false: false, null: null},
expected = {object: 'objectStop', array: 'arrayStop', '': 'done'};
Expand All @@ -55,6 +60,7 @@ class Parser extends Utf8Stream {
super(Object.assign({}, options, {readableObjectMode: true}));

this._packKeys = this._packStrings = this._packNumbers = this._streamKeys = this._streamStrings = this._streamNumbers = true;
this._patternSize = DEFAULT_PATTERN_SIZE;
if (options) {
'packValues' in options && (this._packKeys = this._packStrings = this._packNumbers = options.packValues);
'packKeys' in options && (this._packKeys = options.packKeys);
Expand All @@ -64,6 +70,8 @@ class Parser extends Utf8Stream {
'streamKeys' in options && (this._streamKeys = options.streamKeys);
'streamStrings' in options && (this._streamStrings = options.streamStrings);
'streamNumbers' in options && (this._streamNumbers = options.streamNumbers);
'patternSize' in options && (this._patternSize = options.patternSize);

this._jsonStreaming = options.jsonStreaming;
}
!this._packKeys && (this._streamKeys = true);
Expand All @@ -76,6 +84,8 @@ class Parser extends Utf8Stream {
this._parent = '';
this._open_number = false;
this._accumulator = '';

this._patterns = getPatterns(this._patternSize);
}

_flush(callback) {
Expand Down Expand Up @@ -104,8 +114,8 @@ class Parser extends Utf8Stream {
switch (this._expect) {
case 'value1':
case 'value':
patterns.value1.lastIndex = index;
match = patterns.value1.exec(this._buffer);
this._patterns.value1.lastIndex = index;
match = this._patterns.value1.exec(this._buffer);
if (!match) {
if (this._done || index + MAX_PATTERN_SIZE < this._buffer.length) {
if (index < this._buffer.length) return callback(new Error('Parser cannot parse input: expected a value'));
Expand Down Expand Up @@ -197,8 +207,8 @@ class Parser extends Utf8Stream {
break;
case 'keyVal':
case 'string':
patterns.string.lastIndex = index;
match = patterns.string.exec(this._buffer);
this._patterns.string.lastIndex = index;
match = this._patterns.string.exec(this._buffer);
if (!match) {
if (index < this._buffer.length && (this._done || this._buffer.length - index >= 6))
return callback(new Error('Parser cannot parse input: escaped characters'));
Expand Down Expand Up @@ -246,8 +256,8 @@ class Parser extends Utf8Stream {
break;
case 'key1':
case 'key':
patterns.key1.lastIndex = index;
match = patterns.key1.exec(this._buffer);
this._patterns.key1.lastIndex = index;
match = this._patterns.key1.exec(this._buffer);
if (!match) {
if (index < this._buffer.length || this._done) return callback(new Error('Parser cannot parse input: expected an object key'));
break main; // wait for more input
Expand All @@ -269,8 +279,8 @@ class Parser extends Utf8Stream {
}
break;
case 'colon':
patterns.colon.lastIndex = index;
match = patterns.colon.exec(this._buffer);
this._patterns.colon.lastIndex = index;
match = this._patterns.colon.exec(this._buffer);
if (!match) {
if (index < this._buffer.length || this._done) return callback(new Error("Parser cannot parse input: expected ':'"));
break main; // wait for more input
Expand All @@ -285,8 +295,8 @@ class Parser extends Utf8Stream {
break;
case 'arrayStop':
case 'objectStop':
patterns.comma.lastIndex = index;
match = patterns.comma.exec(this._buffer);
this._patterns.comma.lastIndex = index;
match = this._patterns.comma.exec(this._buffer);
if (!match) {
if (index < this._buffer.length || this._done) return callback(new Error("Parser cannot parse input: expected ','"));
break main; // wait for more input
Expand Down Expand Up @@ -318,8 +328,8 @@ class Parser extends Utf8Stream {
break;
// number chunks
case 'numberStart': // [0-9]
patterns.numberStart.lastIndex = index;
match = patterns.numberStart.exec(this._buffer);
this._patterns.numberStart.lastIndex = index;
match = this._patterns.numberStart.exec(this._buffer);
if (!match) {
if (index < this._buffer.length || this._done) return callback(new Error('Parser cannot parse input: expected a starting digit'));
break main; // wait for more input
Expand All @@ -335,8 +345,8 @@ class Parser extends Utf8Stream {
}
break;
case 'numberDigit': // [0-9]*
patterns.numberDigit.lastIndex = index;
match = patterns.numberDigit.exec(this._buffer);
this._patterns.numberDigit.lastIndex = index;
match = this._patterns.numberDigit.exec(this._buffer);
if (!match) {
if (index < this._buffer.length || this._done) return callback(new Error('Parser cannot parse input: expected a digit'));
break main; // wait for more input
Expand All @@ -363,8 +373,8 @@ class Parser extends Utf8Stream {
}
break;
case 'numberFraction': // [\.eE]?
patterns.numberFraction.lastIndex = index;
match = patterns.numberFraction.exec(this._buffer);
this._patterns.numberFraction.lastIndex = index;
match = this._patterns.numberFraction.exec(this._buffer);
if (!match) {
if (index < this._buffer.length || this._done) {
this._expect = expected[this._parent];
Expand All @@ -383,8 +393,8 @@ class Parser extends Utf8Stream {
}
break;
case 'numberFracStart': // [0-9]
patterns.numberFracStart.lastIndex = index;
match = patterns.numberFracStart.exec(this._buffer);
this._patterns.numberFracStart.lastIndex = index;
match = this._patterns.numberFracStart.exec(this._buffer);
if (!match) {
if (index < this._buffer.length || this._done) return callback(new Error('Parser cannot parse input: expected a fractional part of a number'));
break main; // wait for more input
Expand All @@ -400,8 +410,8 @@ class Parser extends Utf8Stream {
}
break;
case 'numberFracDigit': // [0-9]*
patterns.numberFracDigit.lastIndex = index;
match = patterns.numberFracDigit.exec(this._buffer);
this._patterns.numberFracDigit.lastIndex = index;
match = this._patterns.numberFracDigit.exec(this._buffer);
value = match[0];
if (value) {
this._streamNumbers && this.push({name: 'numberChunk', value: value});
Expand All @@ -424,8 +434,8 @@ class Parser extends Utf8Stream {
}
break;
case 'numberExponent': // [eE]?
patterns.numberExponent.lastIndex = index;
match = patterns.numberExponent.exec(this._buffer);
this._patterns.numberExponent.lastIndex = index;
match = this._patterns.numberExponent.exec(this._buffer);
if (!match) {
if (index < this._buffer.length) {
this._expect = expected[this._parent];
Expand All @@ -448,8 +458,8 @@ class Parser extends Utf8Stream {
}
break;
case 'numberExpSign': // [-+]?
patterns.numberExpSign.lastIndex = index;
match = patterns.numberExpSign.exec(this._buffer);
this._patterns.numberExpSign.lastIndex = index;
match = this._patterns.numberExpSign.exec(this._buffer);
if (!match) {
if (index < this._buffer.length) {
this._expect = 'numberExpStart';
Expand All @@ -469,8 +479,8 @@ class Parser extends Utf8Stream {
}
break;
case 'numberExpStart': // [0-9]
patterns.numberExpStart.lastIndex = index;
match = patterns.numberExpStart.exec(this._buffer);
this._patterns.numberExpStart.lastIndex = index;
match = this._patterns.numberExpStart.exec(this._buffer);
if (!match) {
if (index < this._buffer.length || this._done) return callback(new Error('Parser cannot parse input: expected an exponent part of a number'));
break main; // wait for more input
Expand All @@ -486,8 +496,8 @@ class Parser extends Utf8Stream {
}
break;
case 'numberExpDigit': // [0-9]*
patterns.numberExpDigit.lastIndex = index;
match = patterns.numberExpDigit.exec(this._buffer);
this._patterns.numberExpDigit.lastIndex = index;
match = this._patterns.numberExpDigit.exec(this._buffer);
value = match[0];
if (value) {
this._streamNumbers && this.push({name: 'numberChunk', value: value});
Expand All @@ -506,8 +516,8 @@ class Parser extends Utf8Stream {
}
break;
case 'done':
patterns.ws.lastIndex = index;
match = patterns.ws.exec(this._buffer);
this._patterns.ws.lastIndex = index;
match = this._patterns.ws.exec(this._buffer);
if (!match) {
if (index < this._buffer.length) {
if (this._jsonStreaming) {
Expand Down
50 changes: 50 additions & 0 deletions tests/test_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@ const emit = require('../utils/emit');
const ReadString = require('./ReadString');
const Counter = require('./Counter');

const generateRandomBase64String = (length) => {
const base64Chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';

// Generate the random string
let result = '';
for (let i = 0; i < length; i++) {
result += base64Chars.charAt(Math.floor(Math.random() * base64Chars.length));
}

return result;
};

const survivesRoundtrip = (t, object) => {
const async = t.startAsync('survivesRoundtrip: ' + object);

Expand Down Expand Up @@ -375,5 +387,43 @@ unit.add(module, [
eval(t.ASSERT('result.length === 0'));
async.done();
});
},
function test_parser_long_string_default_pattern_size(t) {
const string = generateRandomBase64String(1000000);
const async = t.startAsync('test_parser_long_string_default_pattern_size');

const input = string,
pipeline = new ReadString(JSON.stringify(input)).pipe(
new Parser({packStrings: false})
),
result = [];

pipeline.on('data', function (chunk) {
result.push({name: chunk.name, val: chunk.value});
});
pipeline.on('end', function () {
// 3909 taken from test run
eval(t.ASSERT('result.length === 3909'));
async.done();
});
},
function test_parser_long_string_long_pattern_size(t) {
const string = generateRandomBase64String(1000000);
const async = t.startAsync('test_parser_long_string_long_pattern_size');

const input = string,
pipeline = new ReadString(JSON.stringify(input)).pipe(
new Parser({packStrings: false, patternSize: 10000})
),
result = [];

pipeline.on('data', function (chunk) {
result.push({name: chunk.name, val: chunk.value});
});
pipeline.on('end', function () {
// 100 = startString + 100 x stringChunk + endString
eval(t.ASSERT('result.length === 102'));
async.done();
});
}
]);