1-
21export type SupportedEncoding =
32 | "utf-8"
43 | "utf8"
@@ -18,9 +17,26 @@ const WINDOWS_1252_EXTRA: Record<number, string> = {
1817
1918const WINDOWS_1252_REVERSE : Record < string , number > = { } ;
2019for ( const [ code , char ] of Object . entries ( WINDOWS_1252_EXTRA ) ) {
21- WINDOWS_1252_REVERSE [ char ] = Number . parseInt ( code ) ;
20+ WINDOWS_1252_REVERSE [ char ] = Number . parseInt ( code , 10 ) ;
21+ }
22+
23+ // ---------- Cached decoders/encoders ----------
24+ let _utf8Decoder : TextDecoder | undefined ;
25+ let _utf8Encoder : TextEncoder | undefined ;
26+
27+ function utf8Decoder ( ) : TextDecoder | undefined {
28+ if ( typeof globalThis . TextDecoder === "undefined" ) return undefined ;
29+ return ( _utf8Decoder ??= new globalThis . TextDecoder ( "utf-8" ) ) ;
30+ }
31+ function utf8Encoder ( ) : TextEncoder | undefined {
32+ if ( typeof globalThis . TextEncoder === "undefined" ) return undefined ;
33+ return ( _utf8Encoder ??= new globalThis . TextEncoder ( ) ) ;
2234}
2335
36+ // Safe chunk size well under your measured ~105k cliff.
37+ // 32k keeps memory reasonable and is plenty fast.
38+ const CHUNK = 32 * 1024 ;
39+
2440/**
2541 * Decode text from binary data
2642 * @param bytes Binary data
@@ -30,14 +46,12 @@ export function textDecode(
3046 bytes : Uint8Array ,
3147 encoding : SupportedEncoding = "utf-8"
3248) : string {
33-
3449 switch ( encoding . toLowerCase ( ) as SupportedEncoding ) {
3550 case "utf-8" :
36- case "utf8" :
37- if ( typeof globalThis . TextDecoder !== "undefined" ) {
38- return new globalThis . TextDecoder ( "utf-8" ) . decode ( bytes ) ;
39- }
40- return decodeUTF8 ( bytes ) ;
51+ case "utf8" : {
52+ const dec = utf8Decoder ( ) ;
53+ return dec ? dec . decode ( bytes ) : decodeUTF8 ( bytes ) ;
54+ }
4155 case "utf-16le" :
4256 return decodeUTF16LE ( bytes ) ;
4357 case "ascii" :
@@ -58,11 +72,10 @@ export function textEncode(
5872) : Uint8Array {
5973 switch ( encoding . toLowerCase ( ) as SupportedEncoding ) {
6074 case "utf-8" :
61- case "utf8" :
62- if ( typeof globalThis . TextEncoder !== "undefined" ) {
63- return new globalThis . TextEncoder ( ) . encode ( input ) ;
64- }
65- return encodeUTF8 ( input ) ;
75+ case "utf8" : {
76+ const enc = utf8Encoder ( ) ;
77+ return enc ? enc . encode ( input ) : encodeUTF8 ( input ) ;
78+ }
6679 case "utf-16le" :
6780 return encodeUTF16LE ( input ) ;
6881 case "ascii" :
@@ -80,6 +93,7 @@ export function textEncode(
8093// --- Internal helpers ---
8194
8295function decodeUTF8 ( bytes : Uint8Array ) : string {
96+ const parts : string [ ] = [ ] ;
8397 let out = "" ;
8498 let i = 0 ;
8599 while ( i < bytes . length ) {
@@ -97,53 +111,107 @@ function decodeUTF8(bytes: Uint8Array): string {
97111 const b2 = bytes [ i ++ ] & 0x3f ;
98112 const b3 = bytes [ i ++ ] & 0x3f ;
99113 const b4 = bytes [ i ++ ] & 0x3f ;
100- let cp =
101- ( ( b1 & 0x07 ) << 18 ) |
102- ( b2 << 12 ) |
103- ( b3 << 6 ) |
104- b4 ;
114+ let cp = ( ( b1 & 0x07 ) << 18 ) | ( b2 << 12 ) | ( b3 << 6 ) | b4 ;
105115 cp -= 0x10000 ;
106116 out += String . fromCharCode (
107117 0xd800 + ( ( cp >> 10 ) & 0x3ff ) ,
108118 0xdc00 + ( cp & 0x3ff )
109119 ) ;
110120 }
121+
122+ if ( out . length >= CHUNK ) {
123+ parts . push ( out ) ;
124+ out = "" ;
125+ }
111126 }
112- return out ;
127+
128+ if ( out ) parts . push ( out ) ;
129+ return parts . join ( "" ) ;
113130}
114131
115132function decodeUTF16LE ( bytes : Uint8Array ) : string {
116- let out = "" ;
117- for ( let i = 0 ; i < bytes . length ; i += 2 ) {
118- out += String . fromCharCode ( bytes [ i ] | ( bytes [ i + 1 ] << 8 ) ) ;
133+ // Use chunked fromCharCode on 16-bit code units.
134+ // If odd length, ignore trailing byte (common behavior).
135+ const len = bytes . length & ~ 1 ;
136+ if ( len === 0 ) return "" ;
137+
138+ const parts : string [ ] = [ ] ;
139+ // Build a temporary code-unit array per chunk.
140+ const maxUnits = CHUNK ; // CHUNK code units per chunk
141+
142+ for ( let i = 0 ; i < len ; ) {
143+ const unitsThis = Math . min ( maxUnits , ( len - i ) >> 1 ) ;
144+ const units = new Array < number > ( unitsThis ) ;
145+ for ( let j = 0 ; j < unitsThis ; j ++ , i += 2 ) {
146+ units [ j ] = bytes [ i ] | ( bytes [ i + 1 ] << 8 ) ;
147+ }
148+ parts . push ( String . fromCharCode . apply ( null , units as unknown as number [ ] ) ) ;
119149 }
120- return out ;
150+ return parts . join ( "" ) ;
121151}
122152
123153function decodeASCII ( bytes : Uint8Array ) : string {
124- return String . fromCharCode ( ...bytes . map ( ( b ) => b & 0x7f ) ) ;
154+ // 7-bit ASCII: mask high bit. (Kept to match your original semantics.)
155+ const parts : string [ ] = [ ] ;
156+ for ( let i = 0 ; i < bytes . length ; i += CHUNK ) {
157+ const end = Math . min ( bytes . length , i + CHUNK ) ;
158+ const codes = new Array < number > ( end - i ) ;
159+ for ( let j = i , k = 0 ; j < end ; j ++ , k ++ ) {
160+ codes [ k ] = bytes [ j ] & 0x7f ;
161+ }
162+ parts . push ( String . fromCharCode . apply ( null , codes as unknown as number [ ] ) ) ;
163+ }
164+ return parts . join ( "" ) ;
125165}
126166
127167function decodeLatin1 ( bytes : Uint8Array ) : string {
128- return String . fromCharCode ( ...bytes ) ;
168+ // Latin-1 is 0x00..0xFF direct mapping; avoid spread.
169+ const parts : string [ ] = [ ] ;
170+ for ( let i = 0 ; i < bytes . length ; i += CHUNK ) {
171+ const end = Math . min ( bytes . length , i + CHUNK ) ;
172+ const codes = new Array < number > ( end - i ) ;
173+ for ( let j = i , k = 0 ; j < end ; j ++ , k ++ ) {
174+ codes [ k ] = bytes [ j ] ;
175+ }
176+ parts . push ( String . fromCharCode . apply ( null , codes as unknown as number [ ] ) ) ;
177+ }
178+ return parts . join ( "" ) ;
129179}
130180
131181function decodeWindows1252 ( bytes : Uint8Array ) : string {
182+ // Only 0x80..0x9F need mapping; others are direct 1-byte codes.
183+ const parts : string [ ] = [ ] ;
132184 let out = "" ;
133- for ( const b of bytes ) {
134- if ( b >= 0x80 && b <= 0x9f && WINDOWS_1252_EXTRA [ b ] ) {
135- out += WINDOWS_1252_EXTRA [ b ] ;
136- } else {
137- out += String . fromCharCode ( b ) ;
185+
186+ for ( let i = 0 ; i < bytes . length ; i ++ ) {
187+ const b = bytes [ i ] ;
188+ const extra = b >= 0x80 && b <= 0x9f ? WINDOWS_1252_EXTRA [ b ] : undefined ;
189+ out += extra ?? String . fromCharCode ( b ) ;
190+
191+ if ( out . length >= CHUNK ) {
192+ parts . push ( out ) ;
193+ out = "" ;
138194 }
139195 }
140- return out ;
196+
197+ if ( out ) parts . push ( out ) ;
198+ return parts . join ( "" ) ;
141199}
142200
143201function encodeUTF8 ( str : string ) : Uint8Array {
144202 const out : number [ ] = [ ] ;
145203 for ( let i = 0 ; i < str . length ; i ++ ) {
146- const cp = str . charCodeAt ( i ) ;
204+ let cp = str . charCodeAt ( i ) ;
205+
206+ // surrogate pair
207+ if ( cp >= 0xd800 && cp <= 0xdbff && i + 1 < str . length ) {
208+ const lo = str . charCodeAt ( i + 1 ) ;
209+ if ( lo >= 0xdc00 && lo <= 0xdfff ) {
210+ cp = 0x10000 + ( ( cp - 0xd800 ) << 10 ) + ( lo - 0xdc00 ) ;
211+ i ++ ;
212+ }
213+ }
214+
147215 if ( cp < 0x80 ) {
148216 out . push ( cp ) ;
149217 } else if ( cp < 0x800 ) {
@@ -170,28 +238,38 @@ function encodeUTF16LE(str: string): Uint8Array {
170238 const out = new Uint8Array ( str . length * 2 ) ;
171239 for ( let i = 0 ; i < str . length ; i ++ ) {
172240 const code = str . charCodeAt ( i ) ;
173- out [ i * 2 ] = code & 0xff ;
174- out [ i * 2 + 1 ] = code >> 8 ;
241+ const o = i * 2 ;
242+ out [ o ] = code & 0xff ;
243+ out [ o + 1 ] = code >>> 8 ;
175244 }
176245 return out ;
177246}
178247
179248function encodeASCII ( str : string ) : Uint8Array {
180- return new Uint8Array ( [ ...str ] . map ( ( ch ) => ch . charCodeAt ( 0 ) & 0x7f ) ) ;
249+ // 7-bit ASCII: mask high bit
250+ const out = new Uint8Array ( str . length ) ;
251+ for ( let i = 0 ; i < str . length ; i ++ ) out [ i ] = str . charCodeAt ( i ) & 0x7f ;
252+ return out ;
181253}
182254
183255function encodeLatin1 ( str : string ) : Uint8Array {
184- return new Uint8Array ( [ ...str ] . map ( ( ch ) => ch . charCodeAt ( 0 ) & 0xff ) ) ;
256+ const out = new Uint8Array ( str . length ) ;
257+ for ( let i = 0 ; i < str . length ; i ++ ) out [ i ] = str . charCodeAt ( i ) & 0xff ;
258+ return out ;
185259}
186260
187261function encodeWindows1252 ( str : string ) : Uint8Array {
188- return new Uint8Array (
189- [ ...str ] . map ( ( ch ) => {
190- const code = ch . charCodeAt ( 0 ) ;
191- if ( code <= 0xff ) return code ;
192- if ( WINDOWS_1252_REVERSE [ ch ] !== undefined )
193- return WINDOWS_1252_REVERSE [ ch ] ;
194- return 0x3f ; // '?'
195- } )
196- ) ;
262+ const out = new Uint8Array ( str . length ) ;
263+ for ( let i = 0 ; i < str . length ; i ++ ) {
264+ const ch = str [ i ] ;
265+ const code = ch . charCodeAt ( 0 ) ;
266+
267+ if ( code <= 0xff ) {
268+ out [ i ] = code ;
269+ continue ;
270+ }
271+ const mapped = WINDOWS_1252_REVERSE [ ch ] ;
272+ out [ i ] = mapped !== undefined ? mapped : 0x3f ; // '?'
273+ }
274+ return out ;
197275}
0 commit comments