Skip to content

Commit 4f7f681

Browse files
authored
Merge pull request #25 from Borewit/fix-issue-24-avoid-spread
Avoid String.fromCharCode spread in decodeASCII
2 parents d599d9a + e17b8d7 commit 4f7f681

File tree

1 file changed

+123
-45
lines changed

1 file changed

+123
-45
lines changed

lib/index.ts

Lines changed: 123 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
export type SupportedEncoding =
32
| "utf-8"
43
| "utf8"
@@ -18,9 +17,26 @@ const WINDOWS_1252_EXTRA: Record<number, string> = {
1817

1918
const WINDOWS_1252_REVERSE: Record<string, number> = {};
2019
for (const [code, char] of Object.entries(WINDOWS_1252_EXTRA)) {
21-
WINDOWS_1252_REVERSE[char] = Number.parseInt(code);
20+
WINDOWS_1252_REVERSE[char] = Number.parseInt(code, 10);
21+
}
22+
23+
// ---------- Cached decoders/encoders ----------
24+
let _utf8Decoder: TextDecoder | undefined;
25+
let _utf8Encoder: TextEncoder | undefined;
26+
27+
function utf8Decoder(): TextDecoder | undefined {
28+
if (typeof globalThis.TextDecoder === "undefined") return undefined;
29+
return (_utf8Decoder ??= new globalThis.TextDecoder("utf-8"));
30+
}
31+
function utf8Encoder(): TextEncoder | undefined {
32+
if (typeof globalThis.TextEncoder === "undefined") return undefined;
33+
return (_utf8Encoder ??= new globalThis.TextEncoder());
2234
}
2335

36+
// Safe chunk size well under your measured ~105k cliff.
37+
// 32k keeps memory reasonable and is plenty fast.
38+
const CHUNK = 32 * 1024;
39+
2440
/**
2541
* Decode text from binary data
2642
* @param bytes Binary data
@@ -30,14 +46,12 @@ export function textDecode(
3046
bytes: Uint8Array,
3147
encoding: SupportedEncoding = "utf-8"
3248
): string {
33-
3449
switch (encoding.toLowerCase() as SupportedEncoding) {
3550
case "utf-8":
36-
case "utf8":
37-
if (typeof globalThis.TextDecoder !== "undefined") {
38-
return new globalThis.TextDecoder("utf-8").decode(bytes);
39-
}
40-
return decodeUTF8(bytes);
51+
case "utf8": {
52+
const dec = utf8Decoder();
53+
return dec ? dec.decode(bytes) : decodeUTF8(bytes);
54+
}
4155
case "utf-16le":
4256
return decodeUTF16LE(bytes);
4357
case "ascii":
@@ -58,11 +72,10 @@ export function textEncode(
5872
): Uint8Array {
5973
switch (encoding.toLowerCase() as SupportedEncoding) {
6074
case "utf-8":
61-
case "utf8":
62-
if (typeof globalThis.TextEncoder !== "undefined") {
63-
return new globalThis.TextEncoder().encode(input);
64-
}
65-
return encodeUTF8(input);
75+
case "utf8": {
76+
const enc = utf8Encoder();
77+
return enc ? enc.encode(input) : encodeUTF8(input);
78+
}
6679
case "utf-16le":
6780
return encodeUTF16LE(input);
6881
case "ascii":
@@ -80,6 +93,7 @@ export function textEncode(
8093
// --- Internal helpers ---
8194

8295
function decodeUTF8(bytes: Uint8Array): string {
96+
const parts: string[] = [];
8397
let out = "";
8498
let i = 0;
8599
while (i < bytes.length) {
@@ -97,53 +111,107 @@ function decodeUTF8(bytes: Uint8Array): string {
97111
const b2 = bytes[i++] & 0x3f;
98112
const b3 = bytes[i++] & 0x3f;
99113
const b4 = bytes[i++] & 0x3f;
100-
let cp =
101-
((b1 & 0x07) << 18) |
102-
(b2 << 12) |
103-
(b3 << 6) |
104-
b4;
114+
let cp = ((b1 & 0x07) << 18) | (b2 << 12) | (b3 << 6) | b4;
105115
cp -= 0x10000;
106116
out += String.fromCharCode(
107117
0xd800 + ((cp >> 10) & 0x3ff),
108118
0xdc00 + (cp & 0x3ff)
109119
);
110120
}
121+
122+
if (out.length >= CHUNK) {
123+
parts.push(out);
124+
out = "";
125+
}
111126
}
112-
return out;
127+
128+
if (out) parts.push(out);
129+
return parts.join("");
113130
}
114131

115132
function decodeUTF16LE(bytes: Uint8Array): string {
116-
let out = "";
117-
for (let i = 0; i < bytes.length; i += 2) {
118-
out += String.fromCharCode(bytes[i] | (bytes[i + 1] << 8));
133+
// Use chunked fromCharCode on 16-bit code units.
134+
// If odd length, ignore trailing byte (common behavior).
135+
const len = bytes.length & ~1;
136+
if (len === 0) return "";
137+
138+
const parts: string[] = [];
139+
// Build a temporary code-unit array per chunk.
140+
const maxUnits = CHUNK; // CHUNK code units per chunk
141+
142+
for (let i = 0; i < len; ) {
143+
const unitsThis = Math.min(maxUnits, (len - i) >> 1);
144+
const units = new Array<number>(unitsThis);
145+
for (let j = 0; j < unitsThis; j++, i += 2) {
146+
units[j] = bytes[i] | (bytes[i + 1] << 8);
147+
}
148+
parts.push(String.fromCharCode.apply(null, units as unknown as number[]));
119149
}
120-
return out;
150+
return parts.join("");
121151
}
122152

123153
function decodeASCII(bytes: Uint8Array): string {
124-
return String.fromCharCode(...bytes.map((b) => b & 0x7f));
154+
// 7-bit ASCII: mask high bit. (Kept to match your original semantics.)
155+
const parts: string[] = [];
156+
for (let i = 0; i < bytes.length; i += CHUNK) {
157+
const end = Math.min(bytes.length, i + CHUNK);
158+
const codes = new Array<number>(end - i);
159+
for (let j = i, k = 0; j < end; j++, k++) {
160+
codes[k] = bytes[j] & 0x7f;
161+
}
162+
parts.push(String.fromCharCode.apply(null, codes as unknown as number[]));
163+
}
164+
return parts.join("");
125165
}
126166

127167
function decodeLatin1(bytes: Uint8Array): string {
128-
return String.fromCharCode(...bytes);
168+
// Latin-1 is 0x00..0xFF direct mapping; avoid spread.
169+
const parts: string[] = [];
170+
for (let i = 0; i < bytes.length; i += CHUNK) {
171+
const end = Math.min(bytes.length, i + CHUNK);
172+
const codes = new Array<number>(end - i);
173+
for (let j = i, k = 0; j < end; j++, k++) {
174+
codes[k] = bytes[j];
175+
}
176+
parts.push(String.fromCharCode.apply(null, codes as unknown as number[]));
177+
}
178+
return parts.join("");
129179
}
130180

131181
function decodeWindows1252(bytes: Uint8Array): string {
182+
// Only 0x80..0x9F need mapping; others are direct 1-byte codes.
183+
const parts: string[] = [];
132184
let out = "";
133-
for (const b of bytes) {
134-
if (b >= 0x80 && b <= 0x9f && WINDOWS_1252_EXTRA[b]) {
135-
out += WINDOWS_1252_EXTRA[b];
136-
} else {
137-
out += String.fromCharCode(b);
185+
186+
for (let i = 0; i < bytes.length; i++) {
187+
const b = bytes[i];
188+
const extra = b >= 0x80 && b <= 0x9f ? WINDOWS_1252_EXTRA[b] : undefined;
189+
out += extra ?? String.fromCharCode(b);
190+
191+
if (out.length >= CHUNK) {
192+
parts.push(out);
193+
out = "";
138194
}
139195
}
140-
return out;
196+
197+
if (out) parts.push(out);
198+
return parts.join("");
141199
}
142200

143201
function encodeUTF8(str: string): Uint8Array {
144202
const out: number[] = [];
145203
for (let i = 0; i < str.length; i++) {
146-
const cp = str.charCodeAt(i);
204+
let cp = str.charCodeAt(i);
205+
206+
// surrogate pair
207+
if (cp >= 0xd800 && cp <= 0xdbff && i + 1 < str.length) {
208+
const lo = str.charCodeAt(i + 1);
209+
if (lo >= 0xdc00 && lo <= 0xdfff) {
210+
cp = 0x10000 + ((cp - 0xd800) << 10) + (lo - 0xdc00);
211+
i++;
212+
}
213+
}
214+
147215
if (cp < 0x80) {
148216
out.push(cp);
149217
} else if (cp < 0x800) {
@@ -170,28 +238,38 @@ function encodeUTF16LE(str: string): Uint8Array {
170238
const out = new Uint8Array(str.length * 2);
171239
for (let i = 0; i < str.length; i++) {
172240
const code = str.charCodeAt(i);
173-
out[i * 2] = code & 0xff;
174-
out[i * 2 + 1] = code >> 8;
241+
const o = i * 2;
242+
out[o] = code & 0xff;
243+
out[o + 1] = code >>> 8;
175244
}
176245
return out;
177246
}
178247

179248
function encodeASCII(str: string): Uint8Array {
180-
return new Uint8Array([...str].map((ch) => ch.charCodeAt(0) & 0x7f));
249+
// 7-bit ASCII: mask high bit
250+
const out = new Uint8Array(str.length);
251+
for (let i = 0; i < str.length; i++) out[i] = str.charCodeAt(i) & 0x7f;
252+
return out;
181253
}
182254

183255
function encodeLatin1(str: string): Uint8Array {
184-
return new Uint8Array([...str].map((ch) => ch.charCodeAt(0) & 0xff));
256+
const out = new Uint8Array(str.length);
257+
for (let i = 0; i < str.length; i++) out[i] = str.charCodeAt(i) & 0xff;
258+
return out;
185259
}
186260

187261
function encodeWindows1252(str: string): Uint8Array {
188-
return new Uint8Array(
189-
[...str].map((ch) => {
190-
const code = ch.charCodeAt(0);
191-
if (code <= 0xff) return code;
192-
if (WINDOWS_1252_REVERSE[ch] !== undefined)
193-
return WINDOWS_1252_REVERSE[ch];
194-
return 0x3f; // '?'
195-
})
196-
);
262+
const out = new Uint8Array(str.length);
263+
for (let i = 0; i < str.length; i++) {
264+
const ch = str[i];
265+
const code = ch.charCodeAt(0);
266+
267+
if (code <= 0xff) {
268+
out[i] = code;
269+
continue;
270+
}
271+
const mapped = WINDOWS_1252_REVERSE[ch];
272+
out[i] = mapped !== undefined ? mapped : 0x3f; // '?'
273+
}
274+
return out;
197275
}

0 commit comments

Comments
 (0)