@@ -54,10 +54,10 @@ constinit const char *const CHSETS_KSC[] = {"CP949", "EUC-KR", "JOHAB", 0};
5454constinit const char *const CHSETS_BIG5[] = {" CP950" , " EUC-TW" , " BIG5-HKSCS" , " BIG5HKSCS" , " BIG-5" , " BIG5" , 0 };
5555constinit const char *const CHSETS_UTF8[] = {" UTF-8" , " UTF8" , 0 };
5656constinit const char *const CHSETS_UTF7[] = {" UTF-7" , " UTF7" , 0 };
57- constinit const char *const CHSETS_UTF16LE[] = {" UCS-2LE " , " UTF-16LE " , " USC2LE " , " UTF16LE " , 0 };
58- constinit const char *const CHSETS_UTF16BE[] = {" UCS-2BE " , " UTF-16BE " , " USC2BE " , " UTF16BE " , 0 };
59- constinit const char *const CHSETS_UTF32LE[] = {" UCS-4LE " , " UTF-32LE " , " USC4LE " , " UTF32LE " , 0 };
60- constinit const char *const CHSETS_UTF32BE[] = {" UCS-4BE " , " UTF-32BE " , " USC4BE " , " UTF32BE " , 0 };
57+ constinit const char *const CHSETS_UTF16LE[] = {" UTF-16LE " , " UTF16LE " , " UCS-2LE " , " USC2LE " , 0 };
58+ constinit const char *const CHSETS_UTF16BE[] = {" UTF-16BE " , " UTF16BE " , " UCS-2BE " , " USC2BE " , 0 };
59+ constinit const char *const CHSETS_UTF32LE[] = {" UTF-32LE " , " UTF32LE " , " UCS-4LE " , " USC4LE " , 0 };
60+ constinit const char *const CHSETS_UTF32BE[] = {" UTF-32BE " , " UTF32BE " , " UCS-4BE " , " USC4BE " , 0 };
6161#endif
6262
6363//
@@ -341,7 +341,7 @@ void C_UnicodeIn::init()
341341 m_ReadMethod = &C_UnicodeIn::readReverseUTF16;
342342 return ;
343343 default :
344- if (m_Src.size () >= 3 && 0 == memcmp (m_Src.buffer (), " \xef\xbb\xbf " , 3 ))
344+ if (m_Src.size () >= 3 && 0 == memcmp (m_Src.buffer (), u8" \uFEFF " , 3 ))
345345 // UTF-8 with BOM
346346 {
347347 m_Src.pop (3 );
@@ -357,13 +357,13 @@ void C_UnicodeIn::init()
357357#ifdef _WIN32
358358 const auto size = m_Src.size ();
359359 int mask = IS_TEXT_UNICODE_UNICODE_MASK;
360- if (IsTextUnicode (m_Src.buffer (), int (size), &mask))
360+ if (IsTextUnicode (m_Src.buffer (), int (size), &mask) || mask )
361361 {
362362 m_ReadMethod = &C_UnicodeIn::readUTF16;
363363 return ;
364364 }
365365 mask = IS_TEXT_UNICODE_REVERSE_MASK;
366- if (IsTextUnicode (m_Src.buffer (), int (size), &mask))
366+ if (IsTextUnicode (m_Src.buffer (), int (size), &mask) || mask )
367367 {
368368 m_ReadMethod = &C_UnicodeIn::readReverseUTF16;
369369 return ;
@@ -429,17 +429,17 @@ bool C_UnicodeIn::guessCodePage()
429429{
430430 static constinit const T_Encoding MBCS_CODEPAGES[] ={
431431#ifdef _WIN32
432- CP_ACP, CP_UTF8 ,
432+ CP_UTF8, CP_ACP ,
433433 932 , 936 , 949 , 950 , 951 , // from https://en.wikipedia.org/wiki/Windows_code_page#East_Asian_multi-byte_code_pages
434434 CP_UTF7
435435#elif defined(__unix__)
436436 CHSETS_UTF32LE, CHSETS_UTF32BE, CHSETS_UTF8, CHSETS_SJIS, CHSETS_GB, CHSETS_KSC, CHSETS_BIG5, CHSETS_UTF7, CHSETS_UTF16LE, CHSETS_UTF16BE
437437#endif
438438 };
439- for (size_t i = 0 ; i < std::size (MBCS_CODEPAGES); ++i )
439+ for (auto i: MBCS_CODEPAGES )
440440 {
441441 m_ErrCode = UIE_EOF; // reset error code
442- setCodePage (MBCS_CODEPAGES[i] );
442+ setCodePage (i );
443443 ingestMBCS ();
444444 if (m_ErrCode != UIE_NO_UNICODE_TRANSLATION)
445445 {
0 commit comments