|
120 | 120 | # include <icu.h> |
121 | 121 | #endif |
122 | 122 |
|
123 | | - |
124 | 123 | /* Create local inline versions of key functions for case-insensitive operations |
125 | 124 | */ |
126 | 125 | #import "Additions/unicode/caseconv.h" |
|
139 | 138 |
|
140 | 139 | #import "GNUstepBase/Unicode.h" |
141 | 140 |
|
142 | | -NSRange GSPrivateRangeOfComposed(const unichar *buf, NSUInteger length, |
143 | | - NSUInteger anIndex) |
144 | | -{ |
145 | | - /* ASCII characters are never counted as parts of a longer |
146 | | - * composed sequence. This seems to match OSX behavior and is a |
147 | | - * reasonable optimisation. |
148 | | - */ |
149 | | - if (buf[anIndex] < 128) |
150 | | - { |
151 | | - return NSMakeRange(anIndex, 1); |
152 | | - } |
153 | | - else |
154 | | - { |
155 | | -#if GS_USE_ICU |
156 | | - UErrorCode status = U_ZERO_ERROR; |
157 | | - int32_t len = (int32_t)length; |
158 | | - int32_t index = (int32_t)anIndex; |
159 | | - int32_t start; |
160 | | - int32_t end; |
161 | | - UBreakIterator *bi; |
162 | | - |
163 | | - /* Create a grapheme-cluster (UBRK_CHARACTER) break iterator |
164 | | - * over the UTF16 buffer. |
165 | | - */ |
166 | | - bi = ubrk_open(UBRK_CHARACTER, NULL /* default locale */, |
167 | | - (const UChar*)buf, len, &status); |
168 | | - |
169 | | - if (U_FAILURE(status) || NULL == bi) |
170 | | - { |
171 | | - return NSMakeRange(0, NSNotFound); |
172 | | - } |
173 | | - |
174 | | - /* Find start, end of the grapheme cluster containing index. |
175 | | - * |
176 | | - * ubrk_isBoundary(bi, pos) returns true when pos is a cluster boundary |
177 | | - * AND leaves the iterator positioned at pos, ready for ubrk_next(). |
178 | | - * |
179 | | - * Case A — index is itself a cluster-start boundary: |
180 | | - * isBoundary returns true and positions the iterator there. |
181 | | - * start = index, end = ubrk_next(). |
182 | | - * |
183 | | - * Case B — index falls inside a cluster (e.g. trail surrogate, combining |
184 | | - * mark, or non-first code unit of an emoji modifier sequence): |
185 | | - * isBoundary returns false. ICU positions the iterator at the next |
186 | | - * boundary strictly after index. We then call ubrk_preceding() to |
187 | | - * step back to the cluster start, and ubrk_next() to return to the end. |
188 | | - * |
189 | | - * This avoids the pitfall of calling preceding(index+1) when index+1 is |
190 | | - * itself in the middle of a surrogate pair, which returns UBRK_DONE. |
191 | | - */ |
192 | | - index = (int32_t)anIndex; |
193 | | - if (ubrk_isBoundary(bi, index)) |
194 | | - { |
195 | | - start = index; |
196 | | - end = ubrk_next(bi); |
197 | | - } |
198 | | - else |
199 | | - { |
200 | | - int32_t next; |
201 | | - |
202 | | - end = ubrk_current(bi); |
203 | | - start = ubrk_preceding(bi, end); |
204 | | - next = ubrk_next(bi); |
205 | | - if (next != UBRK_DONE) |
206 | | - { |
207 | | - end = next; |
208 | | - } |
209 | | - } |
210 | | - |
211 | | - ubrk_close(bi); |
212 | | - |
213 | | - if (UBRK_DONE == start || UBRK_DONE == end) |
214 | | - { |
215 | | - return NSMakeRange(0, NSNotFound); |
216 | | - } |
217 | | - return NSMakeRange((NSUInteger)start, (NSUInteger)(end - start)); |
218 | | -#else |
219 | | - unsigned start; |
220 | | - unsigned end; |
221 | | - unichar ch; |
222 | | - |
223 | | - for (start = anIndex; start > 0; start--) |
224 | | - { |
225 | | - ch = buf[start]; |
226 | | - if (uni_isnonsp(ch) == NO) |
227 | | - break; |
228 | | - } |
229 | | - for (end = start+1; end < length; end++) |
230 | | - { |
231 | | - ch = buf[end]; |
232 | | - if (uni_isnonsp(ch) == NO) |
233 | | - break; |
234 | | - } |
235 | | - |
236 | | - return NSMakeRange(start, end-start); |
237 | | -#endif |
238 | | - } |
239 | | -} |
240 | | - |
241 | | - |
242 | | - |
243 | 141 | @interface NSScanner (Double) |
244 | 142 | + (BOOL) _scanDouble: (double*)value from: (NSString*)str; |
245 | 143 | @end |
@@ -3149,27 +3047,31 @@ - (NSUInteger) indexOfString: (NSString*)substring |
3149 | 3047 | */ |
3150 | 3048 | - (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex |
3151 | 3049 | { |
3152 | | - NSUInteger length = [self length]; |
| 3050 | + unsigned start; |
| 3051 | + unsigned end; |
| 3052 | + unsigned length = [self length]; |
| 3053 | + unichar ch; |
| 3054 | + unichar (*caiImp)(NSString*, SEL, NSUInteger); |
3153 | 3055 |
|
3154 | 3056 | if (anIndex >= length) |
3155 | | - { |
3156 | | - [NSException raise: NSRangeException format: @"Invalid location."]; |
3157 | | - } |
| 3057 | + [NSException raise: NSRangeException format:@"Invalid location."]; |
| 3058 | + caiImp = (unichar (*)(NSString*,SEL,NSUInteger)) |
| 3059 | + [self methodForSelector: caiSel]; |
3158 | 3060 |
|
3159 | | - if (0 == length) |
| 3061 | + for (start = anIndex; start > 0; start--) |
3160 | 3062 | { |
3161 | | - return NSMakeRange(0, NSNotFound); |
| 3063 | + ch = (*caiImp)(self, caiSel, start); |
| 3064 | + if (uni_isnonsp(ch) == NO) |
| 3065 | + break; |
3162 | 3066 | } |
3163 | | - else |
| 3067 | + for (end = start+1; end < length; end++) |
3164 | 3068 | { |
3165 | | - NSRange result; |
3166 | | - GS_BEGINITEMBUF(buf, (length * sizeof(unichar)), unichar) |
3167 | | - |
3168 | | - [self getCharacters: buf]; |
3169 | | - result = GSPrivateRangeOfComposed(buf, length, anIndex); |
3170 | | - GS_ENDITEMBUF() |
3171 | | - return result; |
| 3069 | + ch = (*caiImp)(self, caiSel, end); |
| 3070 | + if (uni_isnonsp(ch) == NO) |
| 3071 | + break; |
3172 | 3072 | } |
| 3073 | + |
| 3074 | + return NSMakeRange(start, end-start); |
3173 | 3075 | } |
3174 | 3076 |
|
3175 | 3077 | - (NSRange) rangeOfComposedCharacterSequencesForRange: (NSRange)range |
|
0 commit comments