Skip to content

Commit 1091051

Browse files
committed
buffer: optimize writing short strings
PR-URL: nodejs#54310
1 parent 298ff4f commit 1091051

File tree

4 files changed

+309
-324
lines changed

4 files changed

+309
-324
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
'use strict';
2+
3+
const common = require('../common.js');
4+
const bench = common.createBenchmark(main, {
5+
encoding: [
6+
'', 'utf8', 'ascii', 'latin1',
7+
],
8+
len: [0, 1, 8, 16, 32],
9+
n: [1e6],
10+
});
11+
12+
function main({ len, n, encoding }) {
13+
const buf = Buffer.allocUnsafe(len);
14+
const string = Buffer.from('a'.repeat(len)).toString()
15+
bench.start();
16+
for (let i = 0; i < n; ++i) {
17+
buf.write(string, 0, encoding);
18+
}
19+
bench.end(n);
20+
}

lib/buffer.js

+110-45
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@
2323

2424
const {
2525
Array,
26-
ArrayBufferIsView,
26+
ArrayFrom,
2727
ArrayIsArray,
2828
ArrayPrototypeForEach,
29+
ArrayPrototypeIndexOf,
2930
MathFloor,
3031
MathMin,
3132
MathTrunc,
32-
NumberIsInteger,
3333
NumberIsNaN,
3434
NumberMAX_SAFE_INTEGER,
3535
NumberMIN_SAFE_INTEGER,
@@ -43,10 +43,10 @@ const {
4343
StringPrototypeTrim,
4444
SymbolSpecies,
4545
SymbolToPrimitive,
46-
TypedArrayPrototypeFill,
4746
TypedArrayPrototypeGetBuffer,
4847
TypedArrayPrototypeGetByteLength,
4948
TypedArrayPrototypeGetByteOffset,
49+
TypedArrayPrototypeFill,
5050
TypedArrayPrototypeGetLength,
5151
TypedArrayPrototypeSet,
5252
TypedArrayPrototypeSlice,
@@ -58,7 +58,6 @@ const {
5858
byteLengthUtf8,
5959
compare: _compare,
6060
compareOffset,
61-
copy: _copy,
6261
createFromString,
6362
fill: bindingFill,
6463
isAscii: bindingIsAscii,
@@ -71,9 +70,10 @@ const {
7170
swap64: _swap64,
7271
kMaxLength,
7372
kStringMaxLength,
74-
atob: _atob,
75-
btoa: _btoa,
7673
} = internalBinding('buffer');
74+
75+
const bufferBinding = internalBinding('buffer');
76+
7777
const {
7878
constants: {
7979
ALL_PROPERTIES,
@@ -88,7 +88,6 @@ const {
8888
normalizeEncoding,
8989
kIsEncodingSymbol,
9090
defineLazyProperties,
91-
encodingsMap,
9291
} = require('internal/util');
9392
const {
9493
isAnyArrayBuffer,
@@ -99,15 +98,16 @@ const {
9998
const {
10099
inspect: utilInspect,
101100
} = require('internal/util/inspect');
101+
const { encodings } = internalBinding('string_decoder');
102102

103103
const {
104104
codes: {
105105
ERR_BUFFER_OUT_OF_BOUNDS,
106106
ERR_INVALID_ARG_TYPE,
107107
ERR_INVALID_ARG_VALUE,
108108
ERR_INVALID_BUFFER_SIZE,
109-
ERR_MISSING_ARGS,
110109
ERR_OUT_OF_RANGE,
110+
ERR_MISSING_ARGS,
111111
ERR_UNKNOWN_ENCODING,
112112
},
113113
genericNodeError,
@@ -152,6 +152,10 @@ const constants = ObjectDefineProperties({}, {
152152
Buffer.poolSize = 8 * 1024;
153153
let poolSize, poolOffset, allocPool;
154154

155+
const encodingsMap = { __proto__: null };
156+
for (let i = 0; i < encodings.length; ++i)
157+
encodingsMap[encodings[i]] = i;
158+
155159
function createPool() {
156160
poolSize = Buffer.poolSize;
157161
allocPool = createUnsafeBuffer(poolSize).buffer;
@@ -202,55 +206,55 @@ function toInteger(n, defaultVal) {
202206
return defaultVal;
203207
}
204208

205-
function copyImpl(source, target, targetStart, sourceStart, sourceEnd) {
206-
if (!ArrayBufferIsView(source))
209+
function _copy(source, target, targetStart, sourceStart, sourceEnd) {
210+
if (!isUint8Array(source))
207211
throw new ERR_INVALID_ARG_TYPE('source', ['Buffer', 'Uint8Array'], source);
208-
if (!ArrayBufferIsView(target))
212+
if (!isUint8Array(target))
209213
throw new ERR_INVALID_ARG_TYPE('target', ['Buffer', 'Uint8Array'], target);
210214

211215
if (targetStart === undefined) {
212216
targetStart = 0;
213217
} else {
214-
targetStart = NumberIsInteger(targetStart) ? targetStart : toInteger(targetStart, 0);
218+
targetStart = toInteger(targetStart, 0);
215219
if (targetStart < 0)
216220
throw new ERR_OUT_OF_RANGE('targetStart', '>= 0', targetStart);
217221
}
218222

219223
if (sourceStart === undefined) {
220224
sourceStart = 0;
221225
} else {
222-
sourceStart = NumberIsInteger(sourceStart) ? sourceStart : toInteger(sourceStart, 0);
223-
if (sourceStart < 0 || sourceStart > source.byteLength)
224-
throw new ERR_OUT_OF_RANGE('sourceStart', `>= 0 && <= ${source.byteLength}`, sourceStart);
226+
sourceStart = toInteger(sourceStart, 0);
227+
if (sourceStart < 0 || sourceStart > source.length)
228+
throw new ERR_OUT_OF_RANGE('sourceStart', `>= 0 && <= ${source.length}`, sourceStart);
225229
}
226230

227231
if (sourceEnd === undefined) {
228-
sourceEnd = source.byteLength;
232+
sourceEnd = source.length;
229233
} else {
230-
sourceEnd = NumberIsInteger(sourceEnd) ? sourceEnd : toInteger(sourceEnd, 0);
234+
sourceEnd = toInteger(sourceEnd, 0);
231235
if (sourceEnd < 0)
232236
throw new ERR_OUT_OF_RANGE('sourceEnd', '>= 0', sourceEnd);
233237
}
234238

235-
if (targetStart >= target.byteLength || sourceStart >= sourceEnd)
239+
if (targetStart >= target.length || sourceStart >= sourceEnd)
236240
return 0;
237241

238242
return _copyActual(source, target, targetStart, sourceStart, sourceEnd);
239243
}
240244

241245
function _copyActual(source, target, targetStart, sourceStart, sourceEnd) {
242-
if (sourceEnd - sourceStart > target.byteLength - targetStart)
243-
sourceEnd = sourceStart + target.byteLength - targetStart;
246+
if (sourceEnd - sourceStart > target.length - targetStart)
247+
sourceEnd = sourceStart + target.length - targetStart;
244248

245249
let nb = sourceEnd - sourceStart;
246-
const sourceLen = source.byteLength - sourceStart;
250+
const sourceLen = source.length - sourceStart;
247251
if (nb > sourceLen)
248252
nb = sourceLen;
249253

250-
if (nb <= 0)
251-
return 0;
254+
if (sourceStart !== 0 || sourceEnd < source.length)
255+
source = new Uint8Array(source.buffer, source.byteOffset + sourceStart, nb);
252256

253-
_copy(source, target, targetStart, sourceStart, nb);
257+
TypedArrayPrototypeSet(target, source, targetStart);
254258

255259
return nb;
256260
}
@@ -620,7 +624,7 @@ const encodingOps = {
620624
encoding: 'utf8',
621625
encodingVal: encodingsMap.utf8,
622626
byteLength: byteLengthUtf8,
623-
write: (buf, string, offset, len) => buf.utf8Write(string, offset, len),
627+
write: (buf, string, offset, len) => bufferBinding.utf8WriteStatic(buf, string, offset, len),
624628
slice: (buf, start, end) => buf.utf8Slice(start, end),
625629
indexOf: (buf, val, byteOffset, dir) =>
626630
indexOfString(buf, val, byteOffset, encodingsMap.utf8, dir),
@@ -647,7 +651,7 @@ const encodingOps = {
647651
encoding: 'latin1',
648652
encodingVal: encodingsMap.latin1,
649653
byteLength: (string) => string.length,
650-
write: (buf, string, offset, len) => buf.latin1Write(string, offset, len),
654+
write: (buf, string, offset, len) => bufferBinding.latin1WriteStatic(buf, string, offset, len),
651655
slice: (buf, start, end) => buf.latin1Slice(start, end),
652656
indexOf: (buf, val, byteOffset, dir) =>
653657
indexOfString(buf, val, byteOffset, encodingsMap.latin1, dir),
@@ -656,7 +660,7 @@ const encodingOps = {
656660
encoding: 'ascii',
657661
encodingVal: encodingsMap.ascii,
658662
byteLength: (string) => string.length,
659-
write: (buf, string, offset, len) => buf.asciiWrite(string, offset, len),
663+
write: (buf, string, offset, len) => bufferBinding.asciiWriteStatic(buf, string, offset, len),
660664
slice: (buf, start, end) => buf.asciiSlice(start, end),
661665
indexOf: (buf, val, byteOffset, dir) =>
662666
indexOfBuffer(buf,
@@ -804,7 +808,7 @@ ObjectDefineProperty(Buffer.prototype, 'offset', {
804808

805809
Buffer.prototype.copy =
806810
function copy(target, targetStart, sourceStart, sourceEnd) {
807-
return copyImpl(this, target, targetStart, sourceStart, sourceEnd);
811+
return _copy(this, target, targetStart, sourceStart, sourceEnd);
808812
};
809813

810814
// No need to verify that "buf.length <= MAX_UINT32" since it's a read-only
@@ -1253,41 +1257,102 @@ function btoa(input) {
12531257
if (arguments.length === 0) {
12541258
throw new ERR_MISSING_ARGS('input');
12551259
}
1256-
const result = _btoa(`${input}`);
1257-
if (result === -1) {
1258-
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1260+
input = `${input}`;
1261+
for (let n = 0; n < input.length; n++) {
1262+
if (input[n].charCodeAt(0) > 0xff)
1263+
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
12591264
}
1260-
return result;
1265+
const buf = Buffer.from(input, 'latin1');
1266+
return buf.toString('base64');
12611267
}
12621268

1269+
// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode
1270+
const kForgivingBase64AllowedChars = [
1271+
// ASCII whitespace
1272+
// Refs: https://infra.spec.whatwg.org/#ascii-whitespace
1273+
0x09, 0x0A, 0x0C, 0x0D, 0x20,
1274+
1275+
// Uppercase letters
1276+
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i),
1277+
1278+
// Lowercase letters
1279+
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('a') + i),
1280+
1281+
// Decimal digits
1282+
...ArrayFrom({ length: 10 }, (_, i) => StringPrototypeCharCodeAt('0') + i),
1283+
1284+
0x2B, // +
1285+
0x2F, // /
1286+
0x3D, // =
1287+
];
1288+
const kEqualSignIndex = ArrayPrototypeIndexOf(kForgivingBase64AllowedChars,
1289+
0x3D);
1290+
12631291
function atob(input) {
1292+
// The implementation here has not been performance optimized in any way and
1293+
// should not be.
1294+
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
12641295
if (arguments.length === 0) {
12651296
throw new ERR_MISSING_ARGS('input');
12661297
}
12671298

1268-
const result = _atob(`${input}`);
1299+
input = `${input}`;
1300+
let nonAsciiWhitespaceCharCount = 0;
1301+
let equalCharCount = 0;
1302+
1303+
for (let n = 0; n < input.length; n++) {
1304+
const index = ArrayPrototypeIndexOf(
1305+
kForgivingBase64AllowedChars,
1306+
StringPrototypeCharCodeAt(input, n));
1307+
1308+
if (index > 4) {
1309+
// The first 5 elements of `kForgivingBase64AllowedChars` are
1310+
// ASCII whitespace char codes.
1311+
nonAsciiWhitespaceCharCount++;
1312+
1313+
if (index === kEqualSignIndex) {
1314+
equalCharCount++;
1315+
} else if (equalCharCount) {
1316+
// The `=` char is only allowed at the end.
1317+
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1318+
}
12691319

1270-
switch (result) {
1271-
case -2: // Invalid character
1320+
if (equalCharCount > 2) {
1321+
// Only one more `=` is permitted after the first equal sign.
1322+
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1323+
}
1324+
} else if (index === -1) {
12721325
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1273-
case -1: // Single character remained
1274-
throw lazyDOMException(
1275-
'The string to be decoded is not correctly encoded.',
1276-
'InvalidCharacterError');
1277-
case -3: // Possible overflow
1278-
// TODO(@anonrig): Throw correct error in here.
1279-
throw lazyDOMException('The input causes overflow.', 'InvalidCharacterError');
1280-
default:
1281-
return result;
1326+
}
12821327
}
1328+
1329+
let reminder = nonAsciiWhitespaceCharCount % 4;
1330+
1331+
// See #2, #3, #4 - https://infra.spec.whatwg.org/#forgiving-base64
1332+
if (!reminder) {
1333+
// Remove all trailing `=` characters and get the new reminder.
1334+
reminder = (nonAsciiWhitespaceCharCount - equalCharCount) % 4;
1335+
} else if (equalCharCount) {
1336+
// `=` should not in the input if there's a reminder.
1337+
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1338+
}
1339+
1340+
// See #3 - https://infra.spec.whatwg.org/#forgiving-base64
1341+
if (reminder === 1) {
1342+
throw lazyDOMException(
1343+
'The string to be decoded is not correctly encoded.',
1344+
'InvalidCharacterError');
1345+
}
1346+
1347+
return Buffer.from(input, 'base64').toString('latin1');
12831348
}
12841349

12851350
function isUtf8(input) {
12861351
if (isTypedArray(input) || isAnyArrayBuffer(input)) {
12871352
return bindingIsUtf8(input);
12881353
}
12891354

1290-
throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'Buffer', 'TypedArray'], input);
1355+
throw new ERR_INVALID_ARG_TYPE('input', ['TypedArray', 'Buffer'], input);
12911356
}
12921357

12931358
function isAscii(input) {

0 commit comments

Comments
 (0)