Skip to content

Commit 99d4fd5

Browse files
committed
Use TextDecoder for toString('utf8')
1 parent 795bbb5 commit 99d4fd5

File tree

2 files changed

+16
-100
lines changed

2 files changed

+16
-100
lines changed

index.js

+3-96
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ const customInspectSymbol =
1515
? Symbol['for']('nodejs.util.inspect.custom') // eslint-disable-line dot-notation
1616
: null
1717

18+
const decoderUTF8 = new TextDecoder('utf8')
19+
1820
exports.Buffer = Buffer
1921
exports.SlowBuffer = SlowBuffer
2022
exports.INSPECT_MAX_BYTES = 50
@@ -952,102 +954,7 @@ function base64Slice (buf, start, end) {
952954
}
953955

954956
function utf8Slice (buf, start, end) {
955-
end = Math.min(buf.length, end)
956-
const res = []
957-
958-
let i = start
959-
while (i < end) {
960-
const firstByte = buf[i]
961-
let codePoint = null
962-
let bytesPerSequence = (firstByte > 0xEF)
963-
? 4
964-
: (firstByte > 0xDF)
965-
? 3
966-
: (firstByte > 0xBF)
967-
? 2
968-
: 1
969-
970-
if (i + bytesPerSequence <= end) {
971-
let secondByte, thirdByte, fourthByte, tempCodePoint
972-
973-
switch (bytesPerSequence) {
974-
case 1:
975-
if (firstByte < 0x80) {
976-
codePoint = firstByte
977-
}
978-
break
979-
case 2:
980-
secondByte = buf[i + 1]
981-
if ((secondByte & 0xC0) === 0x80) {
982-
tempCodePoint = (firstByte & 0x1F) << 0x6 | (secondByte & 0x3F)
983-
if (tempCodePoint > 0x7F) {
984-
codePoint = tempCodePoint
985-
}
986-
}
987-
break
988-
case 3:
989-
secondByte = buf[i + 1]
990-
thirdByte = buf[i + 2]
991-
if ((secondByte & 0xC0) === 0x80 && (thirdByte & 0xC0) === 0x80) {
992-
tempCodePoint = (firstByte & 0xF) << 0xC | (secondByte & 0x3F) << 0x6 | (thirdByte & 0x3F)
993-
if (tempCodePoint > 0x7FF && (tempCodePoint < 0xD800 || tempCodePoint > 0xDFFF)) {
994-
codePoint = tempCodePoint
995-
}
996-
}
997-
break
998-
case 4:
999-
secondByte = buf[i + 1]
1000-
thirdByte = buf[i + 2]
1001-
fourthByte = buf[i + 3]
1002-
if ((secondByte & 0xC0) === 0x80 && (thirdByte & 0xC0) === 0x80 && (fourthByte & 0xC0) === 0x80) {
1003-
tempCodePoint = (firstByte & 0xF) << 0x12 | (secondByte & 0x3F) << 0xC | (thirdByte & 0x3F) << 0x6 | (fourthByte & 0x3F)
1004-
if (tempCodePoint > 0xFFFF && tempCodePoint < 0x110000) {
1005-
codePoint = tempCodePoint
1006-
}
1007-
}
1008-
}
1009-
}
1010-
1011-
if (codePoint === null) {
1012-
// we did not generate a valid codePoint so insert a
1013-
// replacement char (U+FFFD) and advance only 1 byte
1014-
codePoint = 0xFFFD
1015-
bytesPerSequence = 1
1016-
} else if (codePoint > 0xFFFF) {
1017-
// encode to utf16 (surrogate pair dance)
1018-
codePoint -= 0x10000
1019-
res.push(codePoint >>> 10 & 0x3FF | 0xD800)
1020-
codePoint = 0xDC00 | codePoint & 0x3FF
1021-
}
1022-
1023-
res.push(codePoint)
1024-
i += bytesPerSequence
1025-
}
1026-
1027-
return decodeCodePointsArray(res)
1028-
}
1029-
1030-
// Based on http://stackoverflow.com/a/22747272/680742, the browser with
1031-
// the lowest limit is Chrome, with 0x10000 args.
1032-
// We go 1 magnitude less, for safety
1033-
const MAX_ARGUMENTS_LENGTH = 0x1000
1034-
1035-
function decodeCodePointsArray (codePoints) {
1036-
const len = codePoints.length
1037-
if (len <= MAX_ARGUMENTS_LENGTH) {
1038-
return String.fromCharCode.apply(String, codePoints) // avoid extra slice()
1039-
}
1040-
1041-
// Decode in chunks to avoid "call stack size exceeded".
1042-
let res = ''
1043-
let i = 0
1044-
while (i < len) {
1045-
res += String.fromCharCode.apply(
1046-
String,
1047-
codePoints.slice(i, i += MAX_ARGUMENTS_LENGTH)
1048-
)
1049-
}
1050-
return res
957+
return decoderUTF8.decode(buf.slice(start, end))
1051958
}
1052959

1053960
function asciiSlice (buf, start, end) {

test/to-string.js

+13-4
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ test('utf8 replacement chars (3 byte sequences)', function (t) {
162162
)
163163
t.equal(
164164
new B([0xE0, 0xAC]).toString(),
165-
'\uFFFD\uFFFD'
165+
'\uFFFD'
166166
)
167167
t.equal(
168168
new B([0xE0, 0xAC, 0xB9]).toString(),
@@ -178,11 +178,11 @@ test('utf8 replacement chars (4 byte sequences)', function (t) {
178178
)
179179
t.equal(
180180
new B([0xF4, 0x8F]).toString(),
181-
'\uFFFD\uFFFD'
181+
'\uFFFD'
182182
)
183183
t.equal(
184184
new B([0xF4, 0x8F, 0x80]).toString(),
185-
'\uFFFD\uFFFD\uFFFD'
185+
'\uFFFD'
186186
)
187187
t.equal(
188188
new B([0xF4, 0x8F, 0x80, 0x84]).toString(),
@@ -201,8 +201,17 @@ test('utf8 replacement chars (4 byte sequences)', function (t) {
201201

202202
test('utf8 replacement chars on 256 random bytes', function (t) {
203203
t.equal(
204+
// generated via
205+
// let str = Buffer.from([...]).toString()
206+
// let encoded = "";
207+
// for (let i = 0; i < str.length; i++) {
208+
// encoded +=
209+
// "\\u" + str.charCodeAt(i).toString(16).toUpperCase().padStart(4, 0);
210+
// }
211+
// console.log(encoded);
212+
204213
new B([152, 130, 206, 23, 243, 238, 197, 44, 27, 86, 208, 36, 163, 184, 164, 21, 94, 242, 178, 46, 25, 26, 253, 178, 72, 147, 207, 112, 236, 68, 179, 190, 29, 83, 239, 147, 125, 55, 143, 19, 157, 68, 157, 58, 212, 224, 150, 39, 128, 24, 94, 225, 120, 121, 75, 192, 112, 19, 184, 142, 203, 36, 43, 85, 26, 147, 227, 139, 242, 186, 57, 78, 11, 102, 136, 117, 180, 210, 241, 92, 3, 215, 54, 167, 249, 1, 44, 225, 146, 86, 2, 42, 68, 21, 47, 238, 204, 153, 216, 252, 183, 66, 222, 255, 15, 202, 16, 51, 134, 1, 17, 19, 209, 76, 238, 38, 76, 19, 7, 103, 249, 5, 107, 137, 64, 62, 170, 57, 16, 85, 179, 193, 97, 86, 166, 196, 36, 148, 138, 193, 210, 69, 187, 38, 242, 97, 195, 219, 252, 244, 38, 1, 197, 18, 31, 246, 53, 47, 134, 52, 105, 72, 43, 239, 128, 203, 73, 93, 199, 75, 222, 220, 166, 34, 63, 236, 11, 212, 76, 243, 171, 110, 78, 39, 205, 204, 6, 177, 233, 212, 243, 0, 33, 41, 122, 118, 92, 252, 0, 157, 108, 120, 70, 137, 100, 223, 243, 171, 232, 66, 126, 111, 142, 33, 3, 39, 117, 27, 107, 54, 1, 217, 227, 132, 13, 166, 3, 73, 53, 127, 225, 236, 134, 219, 98, 214, 125, 148, 24, 64, 142, 111, 231, 194, 42, 150, 185, 10, 182, 163, 244, 19, 4, 59, 135, 16]).toString(),
205-
'\uFFFD\uFFFD\uFFFD\u0017\uFFFD\uFFFD\uFFFD\u002C\u001B\u0056\uFFFD\u0024\uFFFD\uFFFD\uFFFD\u0015\u005E\uFFFD\uFFFD\u002E\u0019\u001A\uFFFD\uFFFD\u0048\uFFFD\uFFFD\u0070\uFFFD\u0044\uFFFD\uFFFD\u001D\u0053\uFFFD\uFFFD\u007D\u0037\uFFFD\u0013\uFFFD\u0044\uFFFD\u003A\uFFFD\uFFFD\uFFFD\u0027\uFFFD\u0018\u005E\uFFFD\u0078\u0079\u004B\uFFFD\u0070\u0013\uFFFD\uFFFD\uFFFD\u0024\u002B\u0055\u001A\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0039\u004E\u000B\u0066\uFFFD\u0075\uFFFD\uFFFD\uFFFD\u005C\u0003\uFFFD\u0036\uFFFD\uFFFD\u0001\u002C\uFFFD\uFFFD\u0056\u0002\u002A\u0044\u0015\u002F\uFFFD\u0319\uFFFD\uFFFD\uFFFD\u0042\uFFFD\uFFFD\u000F\uFFFD\u0010\u0033\uFFFD\u0001\u0011\u0013\uFFFD\u004C\uFFFD\u0026\u004C\u0013\u0007\u0067\uFFFD\u0005\u006B\uFFFD\u0040\u003E\uFFFD\u0039\u0010\u0055\uFFFD\uFFFD\u0061\u0056\uFFFD\uFFFD\u0024\uFFFD\uFFFD\uFFFD\uFFFD\u0045\uFFFD\u0026\uFFFD\u0061\uFFFD\uFFFD\uFFFD\uFFFD\u0026\u0001\uFFFD\u0012\u001F\uFFFD\u0035\u002F\uFFFD\u0034\u0069\u0048\u002B\uFFFD\uFFFD\uFFFD\u0049\u005D\uFFFD\u004B\uFFFD\u0726\u0022\u003F\uFFFD\u000B\uFFFD\u004C\uFFFD\uFFFD\u006E\u004E\u0027\uFFFD\uFFFD\u0006\uFFFD\uFFFD\uFFFD\uFFFD\u0000\u0021\u0029\u007A\u0076\u005C\uFFFD\u0000\uFFFD\u006C\u0078\u0046\uFFFD\u0064\uFFFD\uFFFD\uFFFD\uFFFD\u0042\u007E\u006F\uFFFD\u0021\u0003\u0027\u0075\u001B\u006B\u0036\u0001\uFFFD\uFFFD\uFFFD\u000D\uFFFD\u0003\u0049\u0035\u007F\uFFFD\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u007D\uFFFD\u0018\u0040\uFFFD\u006F\uFFFD\uFFFD\u002A\uFFFD\uFFFD\u000A\uFFFD\uFFFD\uFFFD\u0013\u0004\u003B\uFFFD\u0010'
214+
'\uFFFD\uFFFD\uFFFD\u0017\uFFFD\uFFFD\uFFFD\u002C\u001B\u0056\uFFFD\u0024\uFFFD\uFFFD\uFFFD\u0015\u005E\uFFFD\u002E\u0019\u001A\uFFFD\uFFFD\u0048\uFFFD\uFFFD\u0070\uFFFD\u0044\uFFFD\uFFFD\u001D\u0053\uFFFD\u007D\u0037\uFFFD\u0013\uFFFD\u0044\uFFFD\u003A\uFFFD\uFFFD\uFFFD\u0027\uFFFD\u0018\u005E\uFFFD\u0078\u0079\u004B\uFFFD\u0070\u0013\uFFFD\uFFFD\uFFFD\u0024\u002B\u0055\u001A\uFFFD\uFFFD\uFFFD\u0039\u004E\u000B\u0066\uFFFD\u0075\uFFFD\uFFFD\uFFFD\u005C\u0003\uFFFD\u0036\uFFFD\uFFFD\u0001\u002C\uFFFD\u0056\u0002\u002A\u0044\u0015\u002F\uFFFD\u0319\uFFFD\uFFFD\uFFFD\u0042\uFFFD\uFFFD\u000F\uFFFD\u0010\u0033\uFFFD\u0001\u0011\u0013\uFFFD\u004C\uFFFD\u0026\u004C\u0013\u0007\u0067\uFFFD\u0005\u006B\uFFFD\u0040\u003E\uFFFD\u0039\u0010\u0055\uFFFD\uFFFD\u0061\u0056\uFFFD\uFFFD\u0024\uFFFD\uFFFD\uFFFD\uFFFD\u0045\uFFFD\u0026\uFFFD\u0061\uFFFD\uFFFD\uFFFD\uFFFD\u0026\u0001\uFFFD\u0012\u001F\uFFFD\u0035\u002F\uFFFD\u0034\u0069\u0048\u002B\uFFFD\uFFFD\u0049\u005D\uFFFD\u004B\uFFFD\u0726\u0022\u003F\uFFFD\u000B\uFFFD\u004C\uFFFD\u006E\u004E\u0027\uFFFD\uFFFD\u0006\uFFFD\uFFFD\uFFFD\uFFFD\u0000\u0021\u0029\u007A\u0076\u005C\uFFFD\u0000\uFFFD\u006C\u0078\u0046\uFFFD\u0064\uFFFD\uFFFD\uFFFD\u0042\u007E\u006F\uFFFD\u0021\u0003\u0027\u0075\u001B\u006B\u0036\u0001\uFFFD\uFFFD\u000D\uFFFD\u0003\u0049\u0035\u007F\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u007D\uFFFD\u0018\u0040\uFFFD\u006F\uFFFD\uFFFD\u002A\uFFFD\uFFFD\u000A\uFFFD\uFFFD\uFFFD\u0013\u0004\u003B\uFFFD\u0010'
206215
)
207216
t.end()
208217
})

0 commit comments

Comments
 (0)