11import { expect } from "chai" ;
2+ import { toHex } from "@exodus/bytes/hex.js" ;
23import { textEncode , textDecode , type SupportedEncoding } from '../lib/index.js' ;
34
5+ const nonUtf8 = [
6+ { bytes : [ 0 , 254 , 255 ] , charcodes : [ 0 , 0xff_fd , 0xff_fd ] } ,
7+ { bytes : [ 0x80 ] , charcodes : [ 0xff_fd ] } ,
8+ { bytes : [ 0xf0 , 0x90 , 0x80 ] , charcodes : [ 0xff_fd ] } ,
9+ { bytes : [ 0xf0 , 0x80 , 0x80 ] , charcodes : [ 0xff_fd , 0xff_fd , 0xff_fd ] } ,
10+ ]
11+
12+ const orphans = [
13+ { charcodes : [ 0x61 , 0x62 , 0xd8_00 , 0x77 , 0x78 ] , replaced : [ 0x61 , 0x62 , 0xff_fd , 0x77 , 0x78 ] , utf8 : '6162efbfbd7778' } ,
14+ { charcodes : [ 0xd8_00 ] , replaced : [ 0xff_fd ] , utf8 : 'efbfbd' } ,
15+ { charcodes : [ 0xd8_00 , 0xd8_00 ] , replaced : [ 0xff_fd , 0xff_fd ] , utf8 : 'efbfbdefbfbd' } ,
16+ { charcodes : [ 0x61 , 0x62 , 0xdf_ff , 0x77 , 0x78 ] , replaced : [ 0x61 , 0x62 , 0xff_fd , 0x77 , 0x78 ] , utf8 : '6162efbfbd7778' } ,
17+ { charcodes : [ 0xdf_ff , 0xd8_00 ] , replaced : [ 0xff_fd , 0xff_fd ] , utf8 : 'efbfbdefbfbd' } ,
18+ ]
19+
420describe ( "Text polyfill encode/decode" , ( ) => {
521 const encodings : [ SupportedEncoding , string ] [ ] = [
622 [ "utf-8" , "Hello 🌍" ] ,
723 [ "utf-16le" , "Hello 🌍" ] ,
24+ [ "utf-16be" , "Hello 🌍" ] ,
825 [ "ascii" , "Hello!" ] ,
926 [ "latin1" , "Héllo ¢" ] ,
1027 [ "windows-1252" , "Hello €—World" ] ,
@@ -30,6 +47,24 @@ describe("Text polyfill encode/decode", () => {
3047 const str = "𝄞" ; // U+1D11E
3148 expect ( textDecode ( textEncode ( str , "utf-8" ) , "utf-8" ) ) . to . equal ( str ) ;
3249 } ) ;
50+ it ( "should ignore (not remove) BOM" , ( ) => {
51+ expect ( textDecode ( Uint8Array . of ( 0xef , 0xbb , 0xbf ) , "utf-8" ) , "utf-8" ) . to . equal ( "\uFEFF" ) ;
52+ expect ( textDecode ( Uint8Array . of ( 0xef , 0xbb , 0xbf , 0x42 ) , "utf-8" ) , "utf-8" ) . to . equal ( "\uFEFFB" ) ;
53+ } ) ;
54+ it ( "textDecode replacement" , ( ) => {
55+ for ( const { bytes, charcodes } of nonUtf8 ) {
56+ const string = String . fromCharCode ( ...charcodes )
57+ expect ( textDecode ( Uint8Array . from ( bytes ) , "utf-8" ) ) . to . equal ( string ) ;
58+ expect ( textDecode ( textEncode ( string , "utf-8" ) , "utf-8" ) ) . to . equal ( string ) ;
59+ }
60+ } ) ;
61+ it ( "textEncode replacement" , ( ) => {
62+ for ( const { charcodes, replaced, utf8 } of orphans ) {
63+ const bytes = textEncode ( String . fromCharCode ( ...charcodes ) , "utf-8" ) ;
64+ expect ( toHex ( bytes ) ) . to . equal ( utf8 ) ;
65+ expect ( textDecode ( bytes , "utf-8" ) ) . to . equal ( String . fromCharCode ( ...replaced ) ) ;
66+ }
67+ } ) ;
3368 } ) ;
3469
3570 describe ( "UTF-16LE" , ( ) => {
@@ -41,6 +76,67 @@ describe("Text polyfill encode/decode", () => {
4176 const str = "😀" ;
4277 expect ( textDecode ( textEncode ( str , "utf-16le" ) , "utf-16le" ) ) . to . equal ( str ) ;
4378 } ) ;
79+ it ( "should ignore (not remove) BOM" , ( ) => {
80+ expect ( textDecode ( Uint8Array . of ( 0xff , 0xfe ) , "utf-16le" ) , "utf-16le" ) . to . equal ( "\uFEFF" ) ;
81+ expect ( textDecode ( Uint8Array . of ( 0xff , 0xfe , 0x42 , 0 ) , "utf-16le" ) , "utf-16le" ) . to . equal ( "\uFEFFB" ) ;
82+ } ) ;
83+ it ( "textDecode replacement" , ( ) => {
84+ for ( const { charcodes, replaced } of orphans ) {
85+ const bytes = new Uint8Array ( replaced . length * 2 ) ;
86+ const view = new DataView ( bytes . buffer , bytes . byteOffset , bytes . byteLength ) ;
87+ for ( let i = 0 ; i < charcodes . length ; i ++ ) view . setUint16 ( i * 2 , charcodes [ i ] , true ) ;
88+ const string = String . fromCharCode ( ...replaced ) ;
89+ expect ( textDecode ( bytes , "utf-16le" ) ) . to . equal ( string ) ;
90+ expect ( textDecode ( textEncode ( string , "utf-16le" ) , "utf-16le" ) ) . to . equal ( string ) ;
91+ }
92+ } ) ;
93+ it ( "textEncode replacement" , ( ) => {
94+ for ( const { charcodes, replaced } of orphans ) {
95+ const bytes = textEncode ( String . fromCharCode ( ...charcodes ) , "utf-16le" ) ;
96+ const view = new DataView ( bytes . buffer , bytes . byteOffset , bytes . byteLength ) ;
97+ expect ( view . byteLength ) . to . equal ( replaced . length * 2 ) ;
98+ for ( let i = 0 ; i < replaced . length ; i ++ ) {
99+ expect ( view . getUint16 ( i * 2 , true ) ) . to . equal ( replaced [ i ] ) ;
100+ }
101+ expect ( textDecode ( bytes , "utf-16le" ) ) . to . equal ( String . fromCharCode ( ...replaced ) ) ;
102+ }
103+ } ) ;
104+ } ) ;
105+
106+ describe ( "UTF-16BE" , ( ) => {
107+ it ( "should handle BMP chars" , ( ) => {
108+ const str = "ABC" ;
109+ expect ( textDecode ( textEncode ( str , "utf-16be" ) , "utf-16be" ) ) . to . equal ( str ) ;
110+ } ) ;
111+ it ( "should handle emoji" , ( ) => {
112+ const str = "😀" ;
113+ expect ( textDecode ( textEncode ( str , "utf-16be" ) , "utf-16be" ) ) . to . equal ( str ) ;
114+ } ) ;
115+ it ( "should ignore (not remove) BOM" , ( ) => {
116+ expect ( textDecode ( Uint8Array . of ( 0xfe , 0xff ) , "utf-16be" ) , "utf-16be" ) . to . equal ( "\uFEFF" ) ;
117+ expect ( textDecode ( Uint8Array . of ( 0xfe , 0xff , 0 , 0x42 ) , "utf-16be" ) , "utf-16be" ) . to . equal ( "\uFEFFB" ) ;
118+ } ) ;
119+ it ( "textDecode replacement" , ( ) => {
120+ for ( const { charcodes, replaced } of orphans ) {
121+ const bytes = new Uint8Array ( replaced . length * 2 ) ;
122+ const view = new DataView ( bytes . buffer , bytes . byteOffset , bytes . byteLength ) ;
123+ for ( let i = 0 ; i < charcodes . length ; i ++ ) view . setUint16 ( i * 2 , charcodes [ i ] , false ) ;
124+ const string = String . fromCharCode ( ...replaced ) ;
125+ expect ( textDecode ( bytes , "utf-16be" ) ) . to . equal ( string ) ;
126+ expect ( textDecode ( textEncode ( string , "utf-16be" ) , "utf-16be" ) ) . to . equal ( string ) ;
127+ }
128+ } ) ;
129+ it ( "textEncode replacement" , ( ) => {
130+ for ( const { charcodes, replaced } of orphans ) {
131+ const bytes = textEncode ( String . fromCharCode ( ...charcodes ) , "utf-16be" ) ;
132+ const view = new DataView ( bytes . buffer , bytes . byteOffset , bytes . byteLength ) ;
133+ expect ( view . byteLength ) . to . equal ( replaced . length * 2 ) ;
134+ for ( let i = 0 ; i < replaced . length ; i ++ ) {
135+ expect ( view . getUint16 ( i * 2 , false ) ) . to . equal ( replaced [ i ] ) ;
136+ }
137+ expect ( textDecode ( bytes , "utf-16be" ) ) . to . equal ( String . fromCharCode ( ...replaced ) ) ;
138+ }
139+ } ) ;
44140 } ) ;
45141
46142 describe ( "ASCII" , ( ) => {
0 commit comments