|
120 | 120 | # include <icu.h> |
121 | 121 | #endif |
122 | 122 |
|
| 123 | + |
123 | 124 | /* Create local inline versions of key functions for case-insensitive operations |
124 | 125 | */ |
125 | 126 | #import "Additions/unicode/caseconv.h" |
|
138 | 139 |
|
139 | 140 | #import "GNUstepBase/Unicode.h" |
140 | 141 |
|
| 142 | +NSRange GSPrivateRangeOfComposed(const unichar *buf, NSUInteger length, |
| 143 | + NSUInteger anIndex) |
| 144 | +{ |
| 145 | +#if GS_USE_ICU == 1 |
| 146 | + UErrorCode status = U_ZERO_ERROR; |
| 147 | + int32_t len = (int32_t)length; |
| 148 | + int32_t index = (int32_t)anIndex; |
| 149 | + int32_t start; |
| 150 | + int32_t end; |
| 151 | + UBreakIterator *bi; |
| 152 | + |
| 153 | + /* Create a grapheme-cluster (UBRK_CHARACTER) break iterator |
| 154 | + * over the UTF16 buffer. |
| 155 | + */ |
| 156 | + bi = ubrk_open(UBRK_CHARACTER, NULL /* default locale */, |
| 157 | + (const UChar*)buf, len, &status); |
| 158 | + |
| 159 | + if (U_FAILURE(status) || NULL == bi) |
| 160 | + { |
| 161 | + return NSMakeRange(0, NSNotFound); |
| 162 | + } |
| 163 | + |
| 164 | + /* Find start, end of the grapheme cluster containing index. |
| 165 | + * |
| 166 | + * ubrk_isBoundary(bi, pos) returns true when pos is a cluster boundary |
| 167 | + * AND leaves the iterator positioned at pos, ready for ubrk_next(). |
| 168 | + * |
| 169 | + * Case A — index is itself a cluster-start boundary: |
| 170 | + * isBoundary returns true and positions the iterator there. |
| 171 | + * start = index, end = ubrk_next(). |
| 172 | + * |
| 173 | + * Case B — index falls inside a cluster (e.g. trail surrogate, combining |
| 174 | + * mark, or non-first code unit of an emoji modifier sequence): |
| 175 | + * isBoundary returns false. ICU positions the iterator at the next |
| 176 | + * boundary strictly after index. We then call ubrk_preceding() to |
| 177 | + * step back to the cluster start, and ubrk_next() to return to the end. |
| 178 | + * |
| 179 | + * This avoids the pitfall of calling preceding(index+1) when index+1 is |
| 180 | + * itself in the middle of a surrogate pair, which returns UBRK_DONE. |
| 181 | + */ |
| 182 | + index = (int32_t)anIndex; |
| 183 | + if (ubrk_isBoundary(bi, index)) |
| 184 | + { |
| 185 | + start = index; |
| 186 | + end = ubrk_next(bi); |
| 187 | + } |
| 188 | + else |
| 189 | + { |
| 190 | + int32_t next; |
| 191 | + |
| 192 | + end = ubrk_current(bi); |
| 193 | + start = ubrk_preceding(bi, end); |
| 194 | + next = ubrk_next(bi); |
| 195 | + if (next != UBRK_DONE) |
| 196 | + { |
| 197 | + end = next; |
| 198 | + } |
| 199 | + } |
| 200 | + |
| 201 | + ubrk_close(bi); |
| 202 | + |
| 203 | + if (UBRK_DONE == start || UBRK_DONE == end) |
| 204 | + { |
| 205 | + return NSMakeRange(0, NSNotFound); |
| 206 | + } |
| 207 | + return NSMakeRange((NSUInteger)start, (NSUInteger)(end - start)); |
| 208 | +#else |
| 209 | + unsigned start; |
| 210 | + unsigned end; |
| 211 | + unsigned length = [self length]; |
| 212 | + unichar ch; |
| 213 | + unichar (*caiImp)(NSString*, SEL, NSUInteger); |
| 214 | + |
| 215 | + caiImp = (unichar (*)(NSString*,SEL,NSUInteger)) |
| 216 | + [self methodForSelector: caiSel]; |
| 217 | + |
| 218 | + for (start = anIndex; start > 0; start--) |
| 219 | + { |
| 220 | + ch = (*caiImp)(self, caiSel, start); |
| 221 | + if (uni_isnonsp(ch) == NO) |
| 222 | + break; |
| 223 | + } |
| 224 | + for (end = start+1; end < length; end++) |
| 225 | + { |
| 226 | + ch = (*caiImp)(self, caiSel, end); |
| 227 | + if (uni_isnonsp(ch) == NO) |
| 228 | + break; |
| 229 | + } |
| 230 | + |
| 231 | + return NSMakeRange(start, end-start); |
| 232 | +#endif |
| 233 | +} |
| 234 | + |
| 235 | + |
| 236 | + |
141 | 237 | @interface NSScanner (Double) |
142 | 238 | + (BOOL) _scanDouble: (double*)value from: (NSString*)str; |
143 | 239 | @end |
@@ -3047,31 +3143,27 @@ - (NSUInteger) indexOfString: (NSString*)substring |
3047 | 3143 | */ |
3048 | 3144 | - (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex |
3049 | 3145 | { |
3050 | | - unsigned start; |
3051 | | - unsigned end; |
3052 | | - unsigned length = [self length]; |
3053 | | - unichar ch; |
3054 | | - unichar (*caiImp)(NSString*, SEL, NSUInteger); |
| 3146 | + NSUInteger length = [self length]; |
3055 | 3147 |
|
3056 | 3148 | if (anIndex >= length) |
3057 | | - [NSException raise: NSRangeException format:@"Invalid location."]; |
3058 | | - caiImp = (unichar (*)(NSString*,SEL,NSUInteger)) |
3059 | | - [self methodForSelector: caiSel]; |
3060 | | - |
3061 | | - for (start = anIndex; start > 0; start--) |
3062 | 3149 | { |
3063 | | - ch = (*caiImp)(self, caiSel, start); |
3064 | | - if (uni_isnonsp(ch) == NO) |
3065 | | - break; |
| 3150 | + [NSException raise: NSRangeException format: @"Invalid location."]; |
3066 | 3151 | } |
3067 | | - for (end = start+1; end < length; end++) |
| 3152 | + |
| 3153 | + if (0 == length) |
3068 | 3154 | { |
3069 | | - ch = (*caiImp)(self, caiSel, end); |
3070 | | - if (uni_isnonsp(ch) == NO) |
3071 | | - break; |
| 3155 | + return NSMakeRange(0, NSNotFound); |
3072 | 3156 | } |
| 3157 | + else |
| 3158 | + { |
| 3159 | + NSRange result; |
| 3160 | + GS_BEGINITEMBUF(buf, (length * sizeof(unichar)), unichar) |
3073 | 3161 |
|
3074 | | - return NSMakeRange(start, end-start); |
| 3162 | + [self getCharacters: buf]; |
| 3163 | + result = GSPrivateRangeOfComposed(buf, length, anIndex); |
| 3164 | + GS_ENDITEMBUF() |
| 3165 | + return result; |
| 3166 | + } |
3075 | 3167 | } |
3076 | 3168 |
|
3077 | 3169 | - (NSRange) rangeOfComposedCharacterSequencesForRange: (NSRange)range |
|
0 commit comments