Skip to content

Commit 6d6576d

Browse files
committed
改进UTF8编码的兼容性。
1 parent da77679 commit 6d6576d

3 files changed

Lines changed: 50 additions & 16 deletions

File tree

src/common/QUtf8.cpp

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,64 @@
11
#include "QUtf8.h"
22
#include "utf8.h"
33
#include <QVector>
4+
45
QString utf8_to_QString(const char* _pBuff, quint32 _len)
56
{
7+
QByteArray t_byteArr(_pBuff, _len);
68
int t_needlen = (int)u8decodelen(_pBuff) + 1;
79
QVector<ucs4_t> t_ucsBuff(t_needlen, 0);
810
int t_decodelen = (int)u8decode(_pBuff, t_ucsBuff.data(), t_needlen, NULL);
9-
QVector<ushort> t_utf16(t_decodelen + 1, 0);
10-
for (int i = 0; i < t_decodelen; ++i)
11+
QVector<ushort> t_utf16;
12+
for (int i = 0, j = 0; i < t_decodelen; ++i)
1113
{
12-
Q_ASSERT(t_ucsBuff[i] < 0xFFFF);
13-
t_utf16[i] = (ushort)t_ucsBuff[i];
14+
uint32_t t_ucs4 = t_ucsBuff[i];
15+
if (t_ucs4 > 0x10FFFF || (0xD800 <= t_ucs4 && t_ucs4 <= 0xDFFF))
16+
{
17+
Q_ASSERT(false);
18+
}
19+
else if (t_ucsBuff[i] > 0xFFFF)
20+
{
21+
uint32_t t_ucs = t_ucs4 - 0x10000;
22+
t_utf16.push_back((ushort)((t_ucs >> 10) + 0xD800));
23+
t_utf16.push_back((ushort)((t_ucs & 0x3FF) + 0xDC00));
24+
}
25+
else
26+
t_utf16.push_back((ushort)t_ucs4);
1427
}
15-
16-
return QString::fromUtf16(t_utf16.data(), t_decodelen);;
28+
return QString::fromUtf16(t_utf16.data(), t_utf16.length());
1729
}
30+
1831
QByteArray QString_to_utf8(const QString& _str)
1932
{
20-
QVector<ucs4_t> t_ucsBuff2(_str.length() + 1, 0);
33+
QVector<ucs4_t> t_ucsBuff;
34+
const ushort* t_pstr = _str.utf16();
2135
for (int i = 0; i < _str.length(); ++i)
22-
t_ucsBuff2[i] = *(_str.utf16() + i);
23-
24-
int t_needlen2 = (int)u8encodelen(t_ucsBuff2.data()) + 1;
36+
{
37+
uint32_t t_ucs4;
38+
ushort t_ch = *(t_pstr + i);
39+
if (t_ch >= 0xD800 && t_ch <= 0xDBFF && (i + 1) < _str.length())
40+
{
41+
ushort t_ch2 = *(t_pstr + i + 1);
42+
if (t_ch2 >= 0xDC00 && t_ch2 <= 0xDFFF)
43+
{
44+
t_ucs4 = ((uint32_t)(t_ch - 0xD800) << 10) + (uint32_t)(t_ch2 - 0xDC00) + 0x10000;
45+
i++;
46+
}
47+
else
48+
{
49+
Q_ASSERT(false);
50+
t_ucs4 = t_ch;
51+
}
52+
}
53+
else
54+
{
55+
t_ucs4 = t_ch;
56+
}
57+
t_ucsBuff.push_back(t_ucs4);
58+
}
59+
int t_needlen2 = (int)u8encodelen(t_ucsBuff.data()) + 1;
2560
QByteArray t_arr(t_needlen2, char('\0'));
26-
/*int t_encodelen = (int)*/u8encode(t_ucsBuff2.data(), t_arr.data(), t_needlen2, NULL);
61+
/*int t_encodelen = (int)*/u8encode(t_ucsBuff.data(), t_arr.data(), t_needlen2, NULL);
2762
t_arr.resize(t_arr.size() - 1);
2863
return t_arr;
29-
}
64+
}

src/common/QUtf8.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,4 @@
1010

1111
extern QString utf8_to_QString(const char* _pBuff, quint32 _len);
1212
extern QByteArray QString_to_utf8(const QString& _str);
13-
1413
#endif // QUtf8_h__

src/common/utf8.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
* or commercial. It's free.
99
*/
1010

11-
# include <stdint.h>
1211
# include <stddef.h>
12+
# include <stdint.h>
1313

1414
# include "utf8.h"
1515

@@ -245,11 +245,11 @@ size_t u8encode(ucs4_t* us, char* des, size_t n, size_t* illegal)
245245
size_t u8encodelen(ucs4_t* us)
246246
{
247247
size_t len = 0;
248-
char tmp[4];
248+
char tmp[5];
249249
while (*us)
250250
{
251251
char* ptmp = tmp;
252-
size_t left = 4;
252+
size_t left = 5;
253253
int ret = putu8c(*us, &ptmp, &left);
254254
if (ret > 0)
255255
{

0 commit comments

Comments
 (0)