|
142 | 142 | NSRange GSPrivateRangeOfComposed(const unichar *buf, NSUInteger length, |
143 | 143 | NSUInteger anIndex) |
144 | 144 | { |
145 | | -#if GS_USE_ICU == 1 |
146 | | - UErrorCode status = U_ZERO_ERROR; |
147 | | - int32_t len = (int32_t)length; |
148 | | - int32_t index = (int32_t)anIndex; |
149 | | - int32_t start; |
150 | | - int32_t end; |
151 | | - UBreakIterator *bi; |
152 | | - |
153 | | - /* Create a grapheme-cluster (UBRK_CHARACTER) break iterator |
154 | | - * over the UTF16 buffer. |
| 145 | + /* ASCII characters are never counted as parts of a longer |
| 146 | + * composed sequence. This seems to match OSX behavior and is a |
| 147 | + * reasonable optimisation. |
155 | 148 | */ |
156 | | - bi = ubrk_open(UBRK_CHARACTER, NULL /* default locale */, |
157 | | - (const UChar*)buf, len, &status); |
158 | | - |
159 | | - if (U_FAILURE(status) || NULL == bi) |
| 149 | + if (buf[anIndex] < 128) |
160 | 150 | { |
161 | | - return NSMakeRange(0, NSNotFound); |
162 | | - } |
163 | | - |
164 | | - /* Find start, end of the grapheme cluster containing index. |
165 | | - * |
166 | | - * ubrk_isBoundary(bi, pos) returns true when pos is a cluster boundary |
167 | | - * AND leaves the iterator positioned at pos, ready for ubrk_next(). |
168 | | - * |
169 | | - * Case A — index is itself a cluster-start boundary: |
170 | | - * isBoundary returns true and positions the iterator there. |
171 | | - * start = index, end = ubrk_next(). |
172 | | - * |
173 | | - * Case B — index falls inside a cluster (e.g. trail surrogate, combining |
174 | | - * mark, or non-first code unit of an emoji modifier sequence): |
175 | | - * isBoundary returns false. ICU positions the iterator at the next |
176 | | - * boundary strictly after index. We then call ubrk_preceding() to |
177 | | - * step back to the cluster start, and ubrk_next() to return to the end. |
178 | | - * |
179 | | - * This avoids the pitfall of calling preceding(index+1) when index+1 is |
180 | | - * itself in the middle of a surrogate pair, which returns UBRK_DONE. |
181 | | - */ |
182 | | - index = (int32_t)anIndex; |
183 | | - if (ubrk_isBoundary(bi, index)) |
184 | | - { |
185 | | - start = index; |
186 | | - end = ubrk_next(bi); |
| 151 | + return NSMakeRange(anIndex, 1); |
187 | 152 | } |
188 | 153 | else |
189 | 154 | { |
190 | | - int32_t next; |
| 155 | +#if GS_USE_ICU |
| 156 | + UErrorCode status = U_ZERO_ERROR; |
| 157 | + int32_t len = (int32_t)length; |
| 158 | + int32_t index = (int32_t)anIndex; |
| 159 | + int32_t start; |
| 160 | + int32_t end; |
| 161 | + UBreakIterator *bi; |
| 162 | + |
| 163 | + /* Create a grapheme-cluster (UBRK_CHARACTER) break iterator |
| 164 | + * over the UTF16 buffer. |
| 165 | + */ |
| 166 | + bi = ubrk_open(UBRK_CHARACTER, NULL /* default locale */, |
| 167 | + (const UChar*)buf, len, &status); |
191 | 168 |
|
192 | | - end = ubrk_current(bi); |
193 | | - start = ubrk_preceding(bi, end); |
194 | | - next = ubrk_next(bi); |
195 | | - if (next != UBRK_DONE) |
| 169 | + if (U_FAILURE(status) || NULL == bi) |
196 | 170 | { |
197 | | - end = next; |
| 171 | + return NSMakeRange(0, NSNotFound); |
198 | 172 | } |
199 | | - } |
200 | 173 |
|
201 | | - ubrk_close(bi); |
| 174 | + /* Find start, end of the grapheme cluster containing index. |
| 175 | + * |
| 176 | + * ubrk_isBoundary(bi, pos) returns true when pos is a cluster boundary |
| 177 | + * AND leaves the iterator positioned at pos, ready for ubrk_next(). |
| 178 | + * |
| 179 | + * Case A — index is itself a cluster-start boundary: |
| 180 | + * isBoundary returns true and positions the iterator there. |
| 181 | + * start = index, end = ubrk_next(). |
| 182 | + * |
| 183 | + * Case B — index falls inside a cluster (e.g. trail surrogate, combining |
| 184 | + * mark, or non-first code unit of an emoji modifier sequence): |
| 185 | + * isBoundary returns false. ICU positions the iterator at the next |
| 186 | + * boundary strictly after index. We then call ubrk_preceding() to |
| 187 | + * step back to the cluster start, and ubrk_next() to return to the end. |
| 188 | + * |
| 189 | + * This avoids the pitfall of calling preceding(index+1) when index+1 is |
| 190 | + * itself in the middle of a surrogate pair, which returns UBRK_DONE. |
| 191 | + */ |
| 192 | + index = (int32_t)anIndex; |
| 193 | + if (ubrk_isBoundary(bi, index)) |
| 194 | + { |
| 195 | + start = index; |
| 196 | + end = ubrk_next(bi); |
| 197 | + } |
| 198 | + else |
| 199 | + { |
| 200 | + int32_t next; |
202 | 201 |
|
203 | | - if (UBRK_DONE == start || UBRK_DONE == end) |
204 | | - { |
205 | | - return NSMakeRange(0, NSNotFound); |
206 | | - } |
207 | | - return NSMakeRange((NSUInteger)start, (NSUInteger)(end - start)); |
208 | | -#else |
209 | | - unsigned start; |
210 | | - unsigned end; |
211 | | - unsigned length = [self length]; |
212 | | - unichar ch; |
213 | | - unichar (*caiImp)(NSString*, SEL, NSUInteger); |
| 202 | + end = ubrk_current(bi); |
| 203 | + start = ubrk_preceding(bi, end); |
| 204 | + next = ubrk_next(bi); |
| 205 | + if (next != UBRK_DONE) |
| 206 | + { |
| 207 | + end = next; |
| 208 | + } |
| 209 | + } |
214 | 210 |
|
215 | | - caiImp = (unichar (*)(NSString*,SEL,NSUInteger)) |
216 | | - [self methodForSelector: caiSel]; |
| 211 | + ubrk_close(bi); |
217 | 212 |
|
218 | | - for (start = anIndex; start > 0; start--) |
219 | | - { |
220 | | - ch = (*caiImp)(self, caiSel, start); |
221 | | - if (uni_isnonsp(ch) == NO) |
222 | | - break; |
223 | | - } |
224 | | - for (end = start+1; end < length; end++) |
225 | | - { |
226 | | - ch = (*caiImp)(self, caiSel, end); |
227 | | - if (uni_isnonsp(ch) == NO) |
228 | | - break; |
229 | | - } |
| 213 | + if (UBRK_DONE == start || UBRK_DONE == end) |
| 214 | + { |
| 215 | + return NSMakeRange(0, NSNotFound); |
| 216 | + } |
| 217 | + return NSMakeRange((NSUInteger)start, (NSUInteger)(end - start)); |
| 218 | +#else |
| 219 | + unsigned start; |
| 220 | + unsigned end; |
| 221 | + unichar ch; |
230 | 222 |
|
231 | | - return NSMakeRange(start, end-start); |
| 223 | + for (start = anIndex; start > 0; start--) |
| 224 | + { |
| 225 | + ch = buf[start]; |
| 226 | + if (uni_isnonsp(ch) == NO) |
| 227 | + break; |
| 228 | + } |
| 229 | + for (end = start+1; end < length; end++) |
| 230 | + { |
| 231 | + ch = buf[end]; |
| 232 | + if (uni_isnonsp(ch) == NO) |
| 233 | + break; |
| 234 | + } |
| 235 | + |
| 236 | + return NSMakeRange(start, end-start); |
232 | 237 | #endif |
| 238 | + } |
233 | 239 | } |
234 | 240 |
|
235 | 241 |
|
|
0 commit comments