|
4 | 4 |
|
5 | 5 | module builtin
|
6 | 6 |
|
7 |
| -pub fn (s string) is_utf8() int { |
8 |
| - faulty_bytes := 0 |
9 |
| - len := s.len |
10 |
| - i := 0 |
11 |
| - // # size_t i = 0; |
12 |
| - # byte * str = s.str; |
13 |
| - # |
14 |
| - # while (i < len) { |
15 |
| - # if (str[i] <= 0x7F) /* 00..7F */ { |
16 |
| - # i += 1; |
17 |
| - # } |
18 |
| -#else if (str[i] >= 0xC2 && str[i] <= 0xDF) /* C2..DF 80..BF */ { |
19 |
| - # if (i + 1 < len) /* Expect a 2nd byte */ { |
20 |
| - # if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) { |
21 |
| - # printf( "After a first byte between C2 and DF, expecting a 2nd byte between 80 and BF"); |
22 |
| - # faulty_bytes = 2; |
23 |
| - # goto end; |
24 |
| - # } |
25 |
| - # } |
26 |
| -#else { |
27 |
| - # printf( "After a first byte between C2 and DF, expecting a 2nd byte."); |
28 |
| - # faulty_bytes = 1; |
29 |
| - # goto end; |
30 |
| - # } |
31 |
| - # i += 2; |
32 |
| - # } |
33 |
| -#else if (str[i] == 0xE0) /* E0 A0..BF 80..BF */ { |
34 |
| - # if (i + 2 < len) /* Expect a 2nd and 3rd byte */ { |
35 |
| - # if (str[i + 1] < 0xA0 || str[i + 1] > 0xBF) { |
36 |
| - # printf( "After a first byte of E0, expecting a 2nd byte between A0 and BF."); |
37 |
| - # faulty_bytes = 2; |
38 |
| - # goto end; |
39 |
| - # } |
40 |
| - # if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) { |
41 |
| - # printf( "After a first byte of E0, expecting a 3nd byte between 80 and BF."); |
42 |
| - # faulty_bytes = 3; |
43 |
| - # goto end; |
44 |
| - # } |
45 |
| - # } |
46 |
| -#else { |
47 |
| - # printf( "After a first byte of E0, expecting two following bytes."); |
48 |
| - # faulty_bytes = 1; |
49 |
| - # goto end; |
50 |
| - # } |
51 |
| - # i += 3; |
52 |
| - # } |
53 |
| -#else if (str[i] >= 0xE1 && str[i] <= 0xEC) /* E1..EC 80..BF 80..BF */ { |
54 |
| - # if (i + 2 < len) /* Expect a 2nd and 3rd byte */ { |
55 |
| - # if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) { |
56 |
| - # printf( "After a first byte between E1 and EC, expecting the 2nd byte between 80 and BF."); |
57 |
| - # faulty_bytes = 2; |
58 |
| - # goto end; |
59 |
| - # } |
60 |
| - # if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) { |
61 |
| - # printf( "After a first byte between E1 and EC, expecting the 3rd byte between 80 and BF."); |
62 |
| - # faulty_bytes = 3; |
63 |
| - # goto end; |
64 |
| - # } |
65 |
| - # } |
66 |
| -#else { |
67 |
| - # printf( "After a first byte between E1 and EC, expecting two following bytes."); |
68 |
| - # faulty_bytes = 1; |
69 |
| - # goto end; |
70 |
| - # } |
71 |
| - # i += 3; |
72 |
| - # } |
73 |
| -#else if (str[i] == 0xED) /* ED 80..9F 80..BF */ { |
74 |
| - # if (i + 2 < len) /* Expect a 2nd and 3rd byte */ { |
75 |
| - # if (str[i + 1] < 0x80 || str[i + 1] > 0x9F) { |
76 |
| - # printf( "After a first byte of ED, expecting 2nd byte between 80 and 9F."); |
77 |
| - # faulty_bytes = 2; |
78 |
| - # goto end; |
79 |
| - # } |
80 |
| - # if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) { |
81 |
| - # printf( "After a first byte of ED, expecting 3rd byte between 80 and BF."); |
82 |
| - # faulty_bytes = 3; |
83 |
| - # goto end; |
84 |
| - # } |
85 |
| - # } |
86 |
| -#else { |
87 |
| - # printf( "After a first byte of ED, expecting two following bytes."); |
88 |
| - # faulty_bytes = 1; |
89 |
| - # goto end; |
90 |
| - # } |
91 |
| - # i += 3; |
92 |
| - # } |
93 |
| -#else if (str[i] >= 0xEE && str[i] <= 0xEF) /* EE..EF 80..BF 80..BF */ { |
94 |
| - # if (i + 2 < len) /* Expect a 2nd and 3rd byte */ { |
95 |
| - # if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) { |
96 |
| - # printf( "After a first byte between EE and EF, expecting 2nd byte between 80 and BF."); |
97 |
| - # faulty_bytes = 2; |
98 |
| - # goto end; |
99 |
| - # } |
100 |
| - # if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) { |
101 |
| - # printf( "After a first byte between EE and EF, expecting 3rd byte between 80 and BF."); |
102 |
| - # faulty_bytes = 3; |
103 |
| - # goto end; |
104 |
| - # } |
105 |
| - # } |
106 |
| -#else { |
107 |
| - # printf( "After a first byte between EE and EF, two following bytes."); |
108 |
| - # faulty_bytes = 1; |
109 |
| - # goto end; |
110 |
| - # } |
111 |
| - # i += 3; |
112 |
| - # } |
113 |
| -#else if (str[i] == 0xF0) /* F0 90..BF 80..BF 80..BF */ { |
114 |
| - # if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ { |
115 |
| - # if (str[i + 1] < 0x90 || str[i + 1] > 0xBF) { |
116 |
| - # printf( "After a first byte of F0, expecting 2nd byte between 90 and BF."); |
117 |
| - # faulty_bytes = 2; |
118 |
| - # goto end; |
119 |
| - # } |
120 |
| - # if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) { |
121 |
| - # printf( "After a first byte of F0, expecting 3rd byte between 80 and BF."); |
122 |
| - # faulty_bytes = 3; |
123 |
| - # goto end; |
124 |
| - # } |
125 |
| - # if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) { |
126 |
| - # printf( "After a first byte of F0, expecting 4th byte between 80 and BF."); |
127 |
| - # faulty_bytes = 4; |
128 |
| - # goto end; |
129 |
| - # } |
130 |
| - # } |
131 |
| -#else { |
132 |
| - # printf( "After a first byte of F0, expecting three following bytes."); |
133 |
| - # faulty_bytes = 1; |
134 |
| - # goto end; |
135 |
| - # } |
136 |
| - # i += 4; |
137 |
| - # } |
138 |
| -#else if (str[i] >= 0xF1 && str[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */ { |
139 |
| - # if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ { |
140 |
| - # if (str[i + 1] < 0x80 || str[i + 1] > 0xBF) { |
141 |
| - # printf( "After a first byte of F1, F2, or F3, expecting a 2nd byte between 80 and BF."); |
142 |
| - # faulty_bytes = 2; |
143 |
| - # goto end; |
144 |
| - # } |
145 |
| - # if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) { |
146 |
| - # printf( "After a first byte of F1, F2, or F3, expecting a 3rd byte between 80 and BF."); |
147 |
| - # faulty_bytes = 3; |
148 |
| - # goto end; |
149 |
| - # } |
150 |
| - # if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) { |
151 |
| - # printf( "After a first byte of F1, F2, or F3, expecting a 4th byte between 80 and BF."); |
152 |
| - # faulty_bytes = 4; |
153 |
| - # goto end; |
154 |
| - # } |
155 |
| - # } |
156 |
| -#else { |
157 |
| - # printf( "After a first byte of F1, F2, or F3, expecting three following bytes."); |
158 |
| - # faulty_bytes = 1; |
159 |
| - # goto end; |
160 |
| - # } |
161 |
| - # i += 4; |
162 |
| - # } |
163 |
| -#else if (str[i] == 0xF4) /* F4 80..8F 80..BF 80..BF */ { |
164 |
| - # if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ { |
165 |
| - # if (str[i + 1] < 0x80 || str[i + 1] > 0x8F) { |
166 |
| - # printf( "After a first byte of F4, expecting 2nd byte between 80 and 8F."); |
167 |
| - # faulty_bytes = 2; |
168 |
| - # goto end; |
169 |
| - # } |
170 |
| - # if (str[i + 2] < 0x80 || str[i + 2] > 0xBF) { |
171 |
| - # printf( "After a first byte of F4, expecting 3rd byte between 80 and BF."); |
172 |
| - # faulty_bytes = 3; |
173 |
| - # goto end; |
174 |
| - # } |
175 |
| - # if (str[i + 3] < 0x80 || str[i + 3] > 0xBF) { |
176 |
| - # printf( "After a first byte of F4, expecting 4th byte between 80 and BF."); |
177 |
| - # faulty_bytes = 4; |
178 |
| - # goto end; |
179 |
| - # } |
180 |
| - # } |
181 |
| -#else { |
182 |
| - # printf( "After a first byte of F4, expecting three following bytes."); |
183 |
| - # faulty_bytes = 1; |
184 |
| - # goto end; |
185 |
| - # } |
186 |
| - # i += 4; |
187 |
| - # } |
188 |
| -#else { |
189 |
| - # printf( "i=%d Expecting bytes in the following ranges: 00..7F C2..F4.", |
190 |
| - # i); |
191 |
| - # faulty_bytes = 1; |
192 |
| - # goto end; |
193 |
| - # } |
194 |
| - # } |
195 |
| - # |
196 |
| - # end: ; |
197 |
| - // println('faulty bytes=$faulty_bytes i=$i') |
198 |
| - // # printf("c='%c'\n", str[i]); |
199 |
| - ok := faulty_bytes == 0 |
200 |
| - if ok { |
201 |
| - return -1 |
202 |
| - } |
203 |
| - if !ok { |
204 |
| - println('utf is bad dalen=$len KEK $s sdf') |
205 |
| - // s = s.left(i) |
206 |
| - } |
207 |
| - return i |
208 |
| - // return ok |
| 7 | +pub fn utf8_char_len(b byte) int { |
| 8 | + return (( 0xe5000000 >> (( b >> 3 ) & 0x1e )) & 3 ) + 1 |
209 | 9 | }
|
210 | 10 |
|
211 |
| -/* |
212 |
| -fn (s string) runes() []string { |
213 |
| - res2 := []string{} |
214 |
| - // res := new_empty_array_with_cap_string(s.len) |
215 |
| - res := []string{} |
216 |
| - if !s.is_utf8() { |
217 |
| - mys := s |
218 |
| - println('string.me runes bad utf $mys HAHA') |
219 |
| - return res |
220 |
| - } |
221 |
| - for i := 0; i < s.len; i++ { |
222 |
| - char_len := 0 |
223 |
| - # char_len =UTF8_CHAR_LEN(s.str[i]); |
224 |
| - switch char_len { |
225 |
| - case 1: |
226 |
| - // println('ONE') |
227 |
| - res <<(char2string(s[i])) |
228 |
| - case 2: |
229 |
| - // println('TWO') |
230 |
| - rune2 := s.substr(i, i + 2) |
231 |
| - res <<(rune2) |
232 |
| - i++ |
233 |
| - case 3: |
234 |
| - // println('TWO') |
235 |
| - rune3 := s.substr(i, i + 3) |
236 |
| - res <<(rune3) |
237 |
| - i++ |
238 |
| - i++ |
239 |
| - case 4: |
240 |
| - // println('TWO') |
241 |
| - rune4 := s.substr(i, i + 4) |
242 |
| - res <<(rune4) |
243 |
| - i++ |
244 |
| - i++ |
245 |
| - i++ |
246 |
| - } |
247 |
| - } |
248 |
| - return res |
249 |
| -} |
250 |
| -*/ |
251 | 11 | // Convert utf32 to utf8
|
252 | 12 | // utf32 == Codepoint
|
253 | 13 | pub fn utf32_to_str(code u32) string {
|
|
0 commit comments