Skip to content

Commit c6606ed

Browse files
committed
Fixup for no ICU
1 parent e3402e8 commit c6606ed

1 file changed

Lines changed: 80 additions & 74 deletions

File tree

Source/NSString.m

Lines changed: 80 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -142,94 +142,100 @@
142142
NSRange GSPrivateRangeOfComposed(const unichar *buf, NSUInteger length,
143143
NSUInteger anIndex)
144144
{
145-
#if GS_USE_ICU == 1
146-
UErrorCode status = U_ZERO_ERROR;
147-
int32_t len = (int32_t)length;
148-
int32_t index = (int32_t)anIndex;
149-
int32_t start;
150-
int32_t end;
151-
UBreakIterator *bi;
152-
153-
/* Create a grapheme-cluster (UBRK_CHARACTER) break iterator
154-
* over the UTF16 buffer.
145+
/* ASCII characters are never counted as parts of a longer
146+
* composed sequence. This seems to match OSX behavior and is a
147+
* reasonable optimisation.
155148
*/
156-
bi = ubrk_open(UBRK_CHARACTER, NULL /* default locale */,
157-
(const UChar*)buf, len, &status);
158-
159-
if (U_FAILURE(status) || NULL == bi)
149+
if (buf[anIndex] < 128)
160150
{
161-
return NSMakeRange(0, NSNotFound);
162-
}
163-
164-
/* Find start, end of the grapheme cluster containing index.
165-
*
166-
* ubrk_isBoundary(bi, pos) returns true when pos is a cluster boundary
167-
* AND leaves the iterator positioned at pos, ready for ubrk_next().
168-
*
169-
* Case A — index is itself a cluster-start boundary:
170-
* isBoundary returns true and positions the iterator there.
171-
* start = index, end = ubrk_next().
172-
*
173-
* Case B — index falls inside a cluster (e.g. trail surrogate, combining
174-
* mark, or non-first code unit of an emoji modifier sequence):
175-
* isBoundary returns false. ICU positions the iterator at the next
176-
* boundary strictly after index. We then call ubrk_preceding() to
177-
* step back to the cluster start, and ubrk_next() to return to the end.
178-
*
179-
* This avoids the pitfall of calling preceding(index+1) when index+1 is
180-
* itself in the middle of a surrogate pair, which returns UBRK_DONE.
181-
*/
182-
index = (int32_t)anIndex;
183-
if (ubrk_isBoundary(bi, index))
184-
{
185-
start = index;
186-
end = ubrk_next(bi);
151+
return NSMakeRange(anIndex, 1);
187152
}
188153
else
189154
{
190-
int32_t next;
155+
#if GS_USE_ICU
156+
UErrorCode status = U_ZERO_ERROR;
157+
int32_t len = (int32_t)length;
158+
int32_t index = (int32_t)anIndex;
159+
int32_t start;
160+
int32_t end;
161+
UBreakIterator *bi;
162+
163+
/* Create a grapheme-cluster (UBRK_CHARACTER) break iterator
164+
* over the UTF16 buffer.
165+
*/
166+
bi = ubrk_open(UBRK_CHARACTER, NULL /* default locale */,
167+
(const UChar*)buf, len, &status);
191168

192-
end = ubrk_current(bi);
193-
start = ubrk_preceding(bi, end);
194-
next = ubrk_next(bi);
195-
if (next != UBRK_DONE)
169+
if (U_FAILURE(status) || NULL == bi)
196170
{
197-
end = next;
171+
return NSMakeRange(0, NSNotFound);
198172
}
199-
}
200173

201-
ubrk_close(bi);
174+
/* Find start, end of the grapheme cluster containing index.
175+
*
176+
* ubrk_isBoundary(bi, pos) returns true when pos is a cluster boundary
177+
* AND leaves the iterator positioned at pos, ready for ubrk_next().
178+
*
179+
* Case A — index is itself a cluster-start boundary:
180+
* isBoundary returns true and positions the iterator there.
181+
* start = index, end = ubrk_next().
182+
*
183+
* Case B — index falls inside a cluster (e.g. trail surrogate, combining
184+
* mark, or non-first code unit of an emoji modifier sequence):
185+
* isBoundary returns false. ICU positions the iterator at the next
186+
* boundary strictly after index. We then call ubrk_preceding() to
187+
* step back to the cluster start, and ubrk_next() to return to the end.
188+
*
189+
* This avoids the pitfall of calling preceding(index+1) when index+1 is
190+
* itself in the middle of a surrogate pair, which returns UBRK_DONE.
191+
*/
192+
index = (int32_t)anIndex;
193+
if (ubrk_isBoundary(bi, index))
194+
{
195+
start = index;
196+
end = ubrk_next(bi);
197+
}
198+
else
199+
{
200+
int32_t next;
202201

203-
if (UBRK_DONE == start || UBRK_DONE == end)
204-
{
205-
return NSMakeRange(0, NSNotFound);
206-
}
207-
return NSMakeRange((NSUInteger)start, (NSUInteger)(end - start));
208-
#else
209-
unsigned start;
210-
unsigned end;
211-
unsigned length = [self length];
212-
unichar ch;
213-
unichar (*caiImp)(NSString*, SEL, NSUInteger);
202+
end = ubrk_current(bi);
203+
start = ubrk_preceding(bi, end);
204+
next = ubrk_next(bi);
205+
if (next != UBRK_DONE)
206+
{
207+
end = next;
208+
}
209+
}
214210

215-
caiImp = (unichar (*)(NSString*,SEL,NSUInteger))
216-
[self methodForSelector: caiSel];
211+
ubrk_close(bi);
217212

218-
for (start = anIndex; start > 0; start--)
219-
{
220-
ch = (*caiImp)(self, caiSel, start);
221-
if (uni_isnonsp(ch) == NO)
222-
break;
223-
}
224-
for (end = start+1; end < length; end++)
225-
{
226-
ch = (*caiImp)(self, caiSel, end);
227-
if (uni_isnonsp(ch) == NO)
228-
break;
229-
}
213+
if (UBRK_DONE == start || UBRK_DONE == end)
214+
{
215+
return NSMakeRange(0, NSNotFound);
216+
}
217+
return NSMakeRange((NSUInteger)start, (NSUInteger)(end - start));
218+
#else
219+
unsigned start;
220+
unsigned end;
221+
unichar ch;
230222

231-
return NSMakeRange(start, end-start);
223+
for (start = anIndex; start > 0; start--)
224+
{
225+
ch = buf[start];
226+
if (uni_isnonsp(ch) == NO)
227+
break;
228+
}
229+
for (end = start+1; end < length; end++)
230+
{
231+
ch = buf[end];
232+
if (uni_isnonsp(ch) == NO)
233+
break;
234+
}
235+
236+
return NSMakeRange(start, end-start);
232237
#endif
238+
}
233239
}
234240

235241

0 commit comments

Comments
 (0)