19
19
BOOST_JSON_NS_BEGIN
20
20
namespace detail {
21
21
22
+ #ifdef BOOST_JSON_BIG_ENDIAN
23
+ # define BOOST_JSON_MK_NUM (b1, b2 ) 0x ## b2 ## b1
24
+ # define BOOST_JSON_MK_NUM2 (b1, b2 ) 0x ## b2 # b1 ## 0000
25
+ # define BOOST_JSON_MK_NUM3 (b1, b2, b3 ) 0x ## b3 ## b2 # b1 ## 00
26
+ # define BOOST_JSON_MK_NUM4 (b1, b2, b3, b4 ) 0x ## b4 ## b3 ## b2 # b1
27
+ # define BOOST_JSON_UTF8_KIND (b ) b & 0xFF
28
+ # define BOOST_JSON_UTF8_LENGTH (b ) b >> 8
29
+ #else
30
+ # define BOOST_JSON_MK_NUM (b1, b2 ) 0x ## b1 ## b2
31
+ # define BOOST_JSON_MK_NUM2 (b1, b2 ) 0x ## b1 ## b2
32
+ # define BOOST_JSON_MK_NUM3 (b1, b2, b3 ) 0x ## b1 ## b2 ## b3
33
+ # define BOOST_JSON_MK_NUM4 (b1, b2, b3, b4 ) 0x ## b1 ## b2 ## b3 ## b4
34
+ # define BOOST_JSON_UTF8_KIND (b ) b >> 8
35
+ # define BOOST_JSON_UTF8_LENGTH (b ) b & FF
36
+ #endif
37
+
22
38
template <int N>
23
39
std::uint32_t
24
40
load_little_endian (void const * p)
38
54
uint16_t
39
55
classify_utf8 (char c)
40
56
{
57
+ // for little endian
41
58
// 0x000 = invalid
42
59
// 0x102 = 2 bytes, second byte [80, BF]
43
60
// 0x203 = 3 bytes, second byte [A0, BF]
@@ -46,6 +63,7 @@ classify_utf8(char c)
46
63
// 0x504 = 4 bytes, second byte [90, BF]
47
64
// 0x604 = 4 bytes, second byte [80, BF]
48
65
// 0x704 = 4 bytes, second byte [80, 8F]
66
+ // for big endian the bytes are reversed
49
67
static constexpr uint16_t first[128 ]
50
68
{
51
69
0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 ,
@@ -57,13 +75,41 @@ classify_utf8(char c)
57
75
0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 ,
58
76
0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 ,
59
77
60
- 0x000 , 0x000 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 ,
61
- 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 ,
62
- 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 ,
63
- 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 , 0x102 ,
64
- 0x203 , 0x303 , 0x303 , 0x303 , 0x303 , 0x303 , 0x303 , 0x303 ,
65
- 0x303 , 0x303 , 0x303 , 0x303 , 0x303 , 0x403 , 0x303 , 0x303 ,
66
- 0x504 , 0x604 , 0x604 , 0x604 , 0x704 , 0x000 , 0x000 , 0x000 ,
78
+ BOOST_JSON_MK_NUM (0 , 00 ), BOOST_JSON_MK_NUM (0 , 00 ),
79
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
80
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
81
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
82
+
83
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
84
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
85
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
86
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
87
+
88
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
89
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
90
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
91
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
92
+
93
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
94
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
95
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
96
+ BOOST_JSON_MK_NUM (1 , 02 ), BOOST_JSON_MK_NUM (1 , 02 ),
97
+
98
+ BOOST_JSON_MK_NUM (2 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
99
+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
100
+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
101
+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
102
+
103
+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
104
+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
105
+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (4 , 03 ),
106
+ BOOST_JSON_MK_NUM (3 , 03 ), BOOST_JSON_MK_NUM (3 , 03 ),
107
+
108
+ BOOST_JSON_MK_NUM (5 , 04 ), BOOST_JSON_MK_NUM (6 , 04 ),
109
+ BOOST_JSON_MK_NUM (6 , 04 ), BOOST_JSON_MK_NUM (6 , 04 ),
110
+ BOOST_JSON_MK_NUM (7 , 04 ), BOOST_JSON_MK_NUM (0 , 00 ),
111
+ BOOST_JSON_MK_NUM (0 , 00 ), BOOST_JSON_MK_NUM (0 , 00 ),
112
+
67
113
0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 , 0x000 ,
68
114
};
69
115
return first[static_cast <unsigned char >(c & 0x7F )];
@@ -74,30 +120,33 @@ bool
74
120
is_valid_utf8 (const char * p, uint16_t first)
75
121
{
76
122
uint32_t v;
77
- switch (first >> 8 )
123
+ switch (BOOST_JSON_UTF8_KIND ( first) )
78
124
{
79
125
default :
80
126
return false ;
81
127
82
128
// 2 bytes, second byte [80, BF]
83
129
case 1 :
84
- v = load_little_endian< 2 >(p );
85
- return (v & 0xC000 ) == 0x8000 ;
130
+ std::memcpy (&v, p, 2 );
131
+ return (v & BOOST_JSON_MK_NUM2 (C0, 00 )) == BOOST_JSON_MK_NUM2 ( 80 , 00 ) ;
86
132
87
133
// 3 bytes, second byte [A0, BF]
88
134
case 2 :
89
- v = load_little_endian<3 >(p);
90
- return (v & 0xC0E000 ) == 0x80A000 ;
135
+ std::memcpy (&v, p, 3 );
136
+ return (v & BOOST_JSON_MK_NUM3 (C0,E0 ,00 ))
137
+ == BOOST_JSON_MK_NUM3 (80 ,A0,00 );
91
138
92
139
// 3 bytes, second byte [80, BF]
93
140
case 3 :
94
- v = load_little_endian<3 >(p);
95
- return (v & 0xC0C000 ) == 0x808000 ;
141
+ std::memcpy (&v, p, 3 );
142
+ return (v & BOOST_JSON_MK_NUM3 (C0,C0,00 ))
143
+ == BOOST_JSON_MK_NUM3 (80 ,80 ,00 );
96
144
97
145
// 3 bytes, second byte [80, 9F]
98
146
case 4 :
99
- v = load_little_endian<3 >(p);
100
- return (v & 0xC0E000 ) == 0x808000 ;
147
+ std::memcpy (&v, p, 3 );
148
+ return (v & BOOST_JSON_MK_NUM3 (C0,E0 ,00 ))
149
+ == BOOST_JSON_MK_NUM3 (80 ,80 ,00 );
101
150
102
151
// 4 bytes, second byte [90, BF]
103
152
case 5 :
@@ -106,13 +155,15 @@ is_valid_utf8(const char* p, uint16_t first)
106
155
107
156
// 4 bytes, second byte [80, BF]
108
157
case 6 :
109
- v = load_little_endian<4 >(p);
110
- return (v & 0xC0C0C000 ) == 0x80808000 ;
158
+ std::memcpy (&v, p, 4 );
159
+ return (v & BOOST_JSON_MK_NUM4 (C0,C0,C0,00 ))
160
+ == BOOST_JSON_MK_NUM4 (80 ,80 ,80 ,00 );
111
161
112
162
// 4 bytes, second byte [80, 8F]
113
163
case 7 :
114
- v = load_little_endian<4 >(p);
115
- return (v & 0xC0C0F000 ) == 0x80808000 ;
164
+ std::memcpy (&v, p, 4 );
165
+ return (v & BOOST_JSON_MK_NUM4 (C0,C0,F0,00 ))
166
+ == BOOST_JSON_MK_NUM4 (80 ,80 ,80 ,00 );
116
167
}
117
168
}
118
169
0 commit comments