11"use strict" ;
22
3- // Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
3+ // == UTF16-LE codec. ==========================================================
4+ // Note: We're not using Node.js native codec because StringDecoder implementation is buggy
5+ // (adds \0 in some chunks; doesn't flag non-even number of bytes). We do use raw encoding/decoding
6+ // routines for performance where possible, though.
7+
8+ exports . utf16le = class Utf16LECodec {
9+ createEncoder ( options , iconv ) {
10+ return new Utf16LEEncoder ( iconv . backend ) ;
11+ }
12+ createDecoder ( options , iconv ) {
13+ return new Utf16LEDecoder ( iconv . backend , iconv . defaultCharUnicode ) ;
14+ }
15+ get bomAware ( ) { return true ; }
16+ }
17+
18+ class Utf16LEEncoder {
19+ constructor ( backend ) {
20+ this . backend = backend ;
21+ }
22+
23+ write ( str ) {
24+ const bytes = this . backend . allocBytes ( str . length * 2 ) ;
25+ const chars = new Uint16Array ( bytes . buffer , bytes . byteOffset , str . length ) ;
26+ for ( let i = 0 ; i < str . length ; i ++ ) {
27+ chars [ i ] = str . charCodeAt ( i ) ;
28+ }
29+ return this . backend . bytesToResult ( bytes , bytes . length ) ;
30+ }
31+
32+ end ( ) { }
33+ }
34+
35+ class Utf16LEDecoder {
36+ constructor ( backend , defaultChar ) {
37+ this . backend = backend ;
38+ this . defaultChar = defaultChar ;
39+ this . leadByte = - 1 ;
40+ this . leadSurrogate = undefined ;
41+ }
42+
43+ write ( buf ) {
44+ // NOTE: This function is mostly the same as Utf16BEDecoder.write() with bytes swapped.
45+ // Please keep them in sync.
46+ // NOTE: The logic here is more complicated than barely necessary due to several limitations:
47+ // 1. Input data chunks can split 2-byte code units, making 'leadByte' necessary.
48+ // 2. Input data chunks can split valid surrogate pairs, making 'leadSurrogate' necessary.
49+ // 3. rawCharsToResult() of Web backend converts all lone surrogates to '�', so we need to make
50+ // sure we don't feed it parts of valid surrogate pairs.
51+ // 4. For performance reasons we want to use initial buffer as much as we can. This is not
52+ // possible if after our calculations the 2-byte memory alignment of a Uint16Array is lost,
53+ // in which case we have to do a copy.
54+
55+ if ( buf . length == 0 ) {
56+ return '' ;
57+ }
58+ let offset = 0 ;
59+ let byteLen = buf . length ;
60+
61+ // Process previous leadByte
62+ let prefix = '' ;
63+ if ( this . leadByte !== - 1 ) {
64+ offset ++ ; byteLen -- ;
65+ prefix = String . fromCharCode ( this . leadByte | ( buf [ 0 ] << 8 ) ) ;
66+ }
67+
68+ // Set new leadByte if needed
69+ if ( byteLen & 1 ) {
70+ this . leadByte = buf [ buf . length - 1 ] ;
71+ byteLen -- ;
72+ } else {
73+ this . leadByte = - 1 ;
74+ }
75+
76+ // Process leadSurrogate
77+ if ( prefix . length || byteLen ) {
78+ // Add high surrogate from previous chunk.
79+ if ( this . leadSurrogate ) {
80+ if ( prefix . length ) {
81+ prefix = this . leadSurrogate + prefix ;
82+ } else {
83+ // Make sure 'chars' don't start with a lone low surrogate; it will mess with rawCharsToResult.
84+ prefix = this . leadSurrogate + String . fromCharCode ( buf [ offset ] | ( buf [ offset + 1 ] << 8 ) ) ;
85+ offset += 2 ; byteLen -= 2 ;
86+ }
87+ this . leadSurrogate = undefined ;
88+ }
89+
90+ // Slice off a new high surrogate at the end of the current chunk.
91+ if ( byteLen ) {
92+ const lastIdx = offset + byteLen - 2 ;
93+ const lastChar = buf [ lastIdx ] | ( buf [ lastIdx + 1 ] << 8 ) ;
94+ if ( 0xD800 <= lastChar && lastChar < 0xDC00 ) {
95+ this . leadSurrogate = String . fromCharCode ( lastChar ) ;
96+ byteLen -= 2 ;
97+ }
98+ } else { // slice from prefix
99+ const lastChar = prefix . charCodeAt ( prefix . length - 1 ) ;
100+ if ( 0xD800 <= lastChar && lastChar < 0xDC00 ) {
101+ this . leadSurrogate = prefix [ prefix . length - 1 ] ;
102+ prefix = prefix . slice ( 0 , - 1 ) ;
103+ }
104+ }
105+ }
106+
107+ let chars ;
108+ if ( ( buf . byteOffset + offset ) & 1 === 0 ) {
109+ // If byteOffset is aligned, just use the ArrayBuffer from input buf.
110+ chars = new Uint16Array ( buf . buffer , buf . byteOffset + offset , byteLen >> 1 ) ;
111+ } else {
112+ // If byteOffset is NOT aligned, create a new aligned buffer and copy the data.
113+ chars = this . backend . allocRawChars ( byteLen >> 1 ) ;
114+ const srcByteView = new Uint8Array ( buf . buffer , buf . byteOffset + offset , byteLen ) ;
115+ const destByteView = new Uint8Array ( chars . buffer , chars . byteOffset , byteLen ) ;
116+ destByteView . set ( srcByteView ) ;
117+ }
118+
119+ return prefix + this . backend . rawCharsToResult ( chars , chars . length ) ;
120+ }
121+
122+ end ( ) {
123+ if ( this . leadSurrogate || this . leadByte !== - 1 ) {
124+ const res = ( this . leadSurrogate ? this . leadSurrogate : '' ) + ( this . leadByte !== - 1 ? this . defaultChar : '' ) ;
125+ this . leadSurrogate = undefined ;
126+ this . leadByte = - 1 ;
127+ return res ;
128+ }
129+ }
130+ }
131+ exports . ucs2 = "utf16le" ; // Alias
132+
4133
5134// == UTF16-BE codec. ==========================================================
6135
7136exports . utf16be = class Utf16BECodec {
8- get encoder ( ) { return Utf16BEEncoder ; }
9- get decoder ( ) { return Utf16BEDecoder ; }
137+ createEncoder ( options , iconv ) {
138+ return new Utf16BEEncoder ( iconv . backend ) ;
139+ }
140+ createDecoder ( options , iconv ) {
141+ return new Utf16BEDecoder ( iconv . backend , iconv . defaultCharUnicode ) ;
142+ }
10143 get bomAware ( ) { return true ; }
11144}
12145
13146class Utf16BEEncoder {
14- constructor ( opts , codec , backend ) {
147+ constructor ( backend ) {
15148 this . backend = backend ;
16149 }
17150
@@ -30,30 +163,86 @@ class Utf16BEEncoder {
30163}
31164
32165class Utf16BEDecoder {
33- constructor ( opts , codec , backend ) {
166+ constructor ( backend , defaultChar ) {
34167 this . backend = backend ;
35- this . overflowByte = - 1 ;
168+ this . defaultChar = defaultChar ;
169+ this . leadByte = - 1 ;
170+ this . leadSurrogate = undefined ;
36171 }
37172
38173 write ( buf ) {
39- const chars = this . backend . allocRawChars ( ( buf . length + 1 ) >> 1 ) ;
40- let charsPos = 0 , i = 0 ;
41-
42- if ( this . overflowByte !== - 1 && i < buf . length ) {
43- chars [ charsPos ++ ] = ( this . overflowByte << 8 ) + buf [ i ++ ] ;
174+ // NOTE: This function is mostly copy/paste from Utf16LEDecoder.write() with bytes swapped.
175+ // Please keep them in sync. Comments in that function apply here too.
176+ if ( buf . length === 0 ) {
177+ return '' ;
44178 }
45-
46- for ( ; i < buf . length - 1 ; i += 2 ) {
47- chars [ charsPos ++ ] = ( buf [ i ] << 8 ) + buf [ i + 1 ] ;
179+
180+ let offset = 0 ;
181+ let byteLen = buf . length ;
182+
183+ // Process previous leadByte
184+ let prefix = '' ;
185+ if ( this . leadByte !== - 1 ) {
186+ offset ++ ; byteLen -- ;
187+ prefix = String . fromCharCode ( ( this . leadByte << 8 ) | buf [ 0 ] ) ;
188+ }
189+
190+ // Set new leadByte
191+ if ( byteLen & 1 ) {
192+ this . leadByte = buf [ buf . length - 1 ] ;
193+ byteLen -- ;
194+ } else {
195+ this . leadByte = - 1 ;
48196 }
49197
50- this . overflowByte = ( i == buf . length - 1 ) ? buf [ i ] : - 1 ;
198+ // Process leadSurrogate
199+ if ( prefix . length || byteLen ) {
200+ // Add high surrogate from previous chunk.
201+ if ( this . leadSurrogate ) {
202+ if ( prefix . length ) {
203+ prefix = this . leadSurrogate + prefix ;
204+ } else {
205+ // Make sure 'chars' don't start with a lone low surrogate; it will mess with rawCharsToResult.
206+ prefix = this . leadSurrogate + String . fromCharCode ( ( buf [ offset ] << 8 ) | buf [ offset + 1 ] ) ;
207+ offset += 2 ; byteLen -= 2 ;
208+ }
209+ this . leadSurrogate = undefined ;
210+ }
211+
212+ // Slice off a new high surrogate at the end of the current chunk.
213+ if ( byteLen ) {
214+ const lastIdx = offset + byteLen - 2 ;
215+ const lastChar = ( buf [ lastIdx ] << 8 ) | buf [ lastIdx + 1 ] ;
216+ if ( 0xD800 <= lastChar && lastChar < 0xDC00 ) {
217+ this . leadSurrogate = String . fromCharCode ( lastChar ) ;
218+ byteLen -= 2 ;
219+ }
220+ } else { // slice from prefix
221+ const lastChar = prefix . charCodeAt ( prefix . length - 1 ) ;
222+ if ( 0xD800 <= lastChar && lastChar < 0xDC00 ) {
223+ this . leadSurrogate = prefix [ prefix . length - 1 ] ;
224+ prefix = prefix . slice ( 0 , - 1 ) ;
225+ }
226+ }
227+ }
228+
229+ // Convert the main chunk of bytes
230+ const chars = this . backend . allocRawChars ( byteLen >> 1 ) ;
231+ const srcBytes = new DataView ( buf . buffer , buf . byteOffset + offset , byteLen ) ;
232+ for ( let i = 0 ; i < chars . length ; i ++ ) {
233+ chars [ i ] = srcBytes . getUint16 ( i * 2 ) ;
234+ }
51235
52- return this . backend . rawCharsToResult ( chars , charsPos ) ;
236+ return prefix + this . backend . rawCharsToResult ( chars , chars . length ) ;
53237 }
54238
55239 end ( ) {
56- this . overflowByte = - 1 ;
240+ if ( this . leadSurrogate || this . leadByte !== - 1 ) {
241+ const res = ( this . leadSurrogate ? this . leadSurrogate : '' ) + ( this . leadByte !== - 1 ? this . defaultChar : '' ) ;
242+ this . leadSurrogate = undefined ;
243+ this . leadByte = - 1 ;
244+ return res ;
245+ }
57246 }
58247}
59248
@@ -67,39 +256,25 @@ class Utf16BEDecoder {
67256// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
68257
69258exports . utf16 = class Utf16Codec {
70- constructor ( opts , iconv ) {
71- this . iconv = iconv ;
72- }
73- get encoder ( ) { return Utf16Encoder ; }
74- get decoder ( ) { return Utf16Decoder ; }
75- }
76-
77- class Utf16Encoder {
78- constructor ( options , codec ) {
259+ createEncoder ( options , iconv ) {
79260 options = options || { } ;
80261 if ( options . addBOM === undefined )
81262 options . addBOM = true ;
82- this . encoder = codec . iconv . getEncoder ( options . use || 'utf-16le' , options ) ;
263+ return iconv . getEncoder ( 'utf-16le' , options ) ;
83264 }
84-
85- // Pass-through to this.encoder
86- write ( str ) {
87- return this . encoder . write ( str ) ;
88- }
89-
90- end ( ) {
91- return this . encoder . end ( ) ;
265+ createDecoder ( options , iconv ) {
266+ return new Utf16Decoder ( options , iconv ) ;
92267 }
93268}
94269
95270class Utf16Decoder {
96- constructor ( options , codec ) {
271+ constructor ( options , iconv ) {
97272 this . decoder = null ;
98273 this . initialBufs = [ ] ;
99274 this . initialBufsLen = 0 ;
100275
101276 this . options = options || { } ;
102- this . iconv = codec . iconv ;
277+ this . iconv = iconv ;
103278 }
104279
105280 write ( buf ) {
0 commit comments