Skip to content

Commit f5d031c

Browse files
committed
Revert composed character sequence changes ... should have been in a branch
This reverts commit 4ff3474.
1 parent c6606ed commit f5d031c

6 files changed

Lines changed: 31 additions & 384 deletions

File tree

ChangeLog

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,3 @@
1-
2026-05-11 Richard Frith-Macdonald <rfm@gnu.org>
2-
3-
* Source/GSPrivate.h: New function to get composed sequence range.
4-
* Source/GSString.m: Use new ICU based code if possible.
5-
* Source/NSString.m: Use new ICU based code if possible.
6-
* Tests/base/NSString/rangeOfComposedCharacter.m:
7-
Range of composed character sequence testcases to cover a fairly
8-
comprehensive range of cases rather than just the simplistic set
9-
handled by the non-ICU code.
10-
111
2026-05-09 Richard Frith-Macdonald <rfm@gnu.org>
122

133
* Source/GSSocketStream.m:

Source/GSPrivate.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -464,11 +464,6 @@ void GSPrivateNotifyIdle(NSString *mode) GS_ATTRIB_PRIVATE;
464464
*/
465465
BOOL GSPrivateNotifyMore(NSString *mode) GS_ATTRIB_PRIVATE;
466466

467-
/* Function to return the range of a composed character sequence.
468-
*/
469-
NSRange GSPrivateRangeOfComposed(const unichar *buf, NSUInteger length,
470-
NSUInteger index) GS_ATTRIB_PRIVATE;
471-
472467
/* Function to return the function for searching in a string for a range.
473468
*/
474469
typedef NSRange (*GSRSFunc)(id, id, unsigned, NSRange);

Source/GSString.m

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3217,22 +3217,28 @@ static void GSStrWiden(GSStr s)
32173217
rangeOfSequence_c(GSStr self, unsigned anIndex)
32183218
{
32193219
if (anIndex >= self->_count)
3220-
[NSException raise: NSRangeException format: @"Invalid location."];
3220+
[NSException raise: NSRangeException format:@"Invalid location."];
32213221

32223222
return (NSRange){anIndex, 1};
32233223
}
32243224

32253225
static inline NSRange
32263226
rangeOfSequence_u(GSStr self, unsigned anIndex)
32273227
{
3228-
NSRange result;
3228+
unsigned start;
3229+
unsigned end;
32293230

32303231
if (anIndex >= self->_count)
3231-
{
3232-
[NSException raise: NSRangeException format: @"Invalid location."];
3233-
}
3234-
result = GSPrivateRangeOfComposed(self->_contents.u, self->_count, anIndex);
3235-
return result;
3232+
[NSException raise: NSRangeException format:@"Invalid location."];
3233+
3234+
start = anIndex;
3235+
while (uni_isnonsp(self->_contents.u[start]) && start > 0)
3236+
start--;
3237+
end = start + 1;
3238+
if (end < self->_count)
3239+
while ((end < self->_count) && (uni_isnonsp(self->_contents.u[end])))
3240+
end++;
3241+
return (NSRange){start, end-start};
32363242
}
32373243

32383244
static inline NSRange
@@ -6083,12 +6089,6 @@ - (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet
60836089

60846090
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
60856091
{
6086-
#if GS_USE_ICU
6087-
/* NB. A comprehensive implementation needs to work with UTF-16
6088-
* as done by the NSString code.
6089-
*/
6090-
return [super rangeOfComposedCharacterSequenceAtIndex: anIndex];
6091-
#else
60926092
NSUInteger start = 0;
60936093
NSUInteger pos = 0;
60946094
unichar n = 0;
@@ -6129,7 +6129,6 @@ - (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
61296129
[NSException raise: NSInvalidArgumentException
61306130
format: @"-rangeOfComposedCharacterSequenceAtIndex: index out of range"];
61316131
return NSMakeRange(NSNotFound, 0);
6132-
#endif
61336132
}
61346133
#endif // GNUSTEP_NEW_STRING_ABI
61356134

Source/NSString.m

Lines changed: 18 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@
120120
# include <icu.h>
121121
#endif
122122

123-
124123
/* Create local inline versions of key functions for case-insensitive operations
125124
*/
126125
#import "Additions/unicode/caseconv.h"
@@ -139,107 +138,6 @@
139138

140139
#import "GNUstepBase/Unicode.h"
141140

142-
NSRange GSPrivateRangeOfComposed(const unichar *buf, NSUInteger length,
143-
NSUInteger anIndex)
144-
{
145-
/* ASCII characters are never counted as parts of a longer
146-
* composed sequence. This seems to match OSX behavior and is a
147-
* reasonable optimisation.
148-
*/
149-
if (buf[anIndex] < 128)
150-
{
151-
return NSMakeRange(anIndex, 1);
152-
}
153-
else
154-
{
155-
#if GS_USE_ICU
156-
UErrorCode status = U_ZERO_ERROR;
157-
int32_t len = (int32_t)length;
158-
int32_t index = (int32_t)anIndex;
159-
int32_t start;
160-
int32_t end;
161-
UBreakIterator *bi;
162-
163-
/* Create a grapheme-cluster (UBRK_CHARACTER) break iterator
164-
* over the UTF16 buffer.
165-
*/
166-
bi = ubrk_open(UBRK_CHARACTER, NULL /* default locale */,
167-
(const UChar*)buf, len, &status);
168-
169-
if (U_FAILURE(status) || NULL == bi)
170-
{
171-
return NSMakeRange(0, NSNotFound);
172-
}
173-
174-
/* Find start, end of the grapheme cluster containing index.
175-
*
176-
* ubrk_isBoundary(bi, pos) returns true when pos is a cluster boundary
177-
* AND leaves the iterator positioned at pos, ready for ubrk_next().
178-
*
179-
* Case A — index is itself a cluster-start boundary:
180-
* isBoundary returns true and positions the iterator there.
181-
* start = index, end = ubrk_next().
182-
*
183-
* Case B — index falls inside a cluster (e.g. trail surrogate, combining
184-
* mark, or non-first code unit of an emoji modifier sequence):
185-
* isBoundary returns false. ICU positions the iterator at the next
186-
* boundary strictly after index. We then call ubrk_preceding() to
187-
* step back to the cluster start, and ubrk_next() to return to the end.
188-
*
189-
* This avoids the pitfall of calling preceding(index+1) when index+1 is
190-
* itself in the middle of a surrogate pair, which returns UBRK_DONE.
191-
*/
192-
index = (int32_t)anIndex;
193-
if (ubrk_isBoundary(bi, index))
194-
{
195-
start = index;
196-
end = ubrk_next(bi);
197-
}
198-
else
199-
{
200-
int32_t next;
201-
202-
end = ubrk_current(bi);
203-
start = ubrk_preceding(bi, end);
204-
next = ubrk_next(bi);
205-
if (next != UBRK_DONE)
206-
{
207-
end = next;
208-
}
209-
}
210-
211-
ubrk_close(bi);
212-
213-
if (UBRK_DONE == start || UBRK_DONE == end)
214-
{
215-
return NSMakeRange(0, NSNotFound);
216-
}
217-
return NSMakeRange((NSUInteger)start, (NSUInteger)(end - start));
218-
#else
219-
unsigned start;
220-
unsigned end;
221-
unichar ch;
222-
223-
for (start = anIndex; start > 0; start--)
224-
{
225-
ch = buf[start];
226-
if (uni_isnonsp(ch) == NO)
227-
break;
228-
}
229-
for (end = start+1; end < length; end++)
230-
{
231-
ch = buf[end];
232-
if (uni_isnonsp(ch) == NO)
233-
break;
234-
}
235-
236-
return NSMakeRange(start, end-start);
237-
#endif
238-
}
239-
}
240-
241-
242-
243141
@interface NSScanner (Double)
244142
+ (BOOL) _scanDouble: (double*)value from: (NSString*)str;
245143
@end
@@ -3149,27 +3047,31 @@ - (NSUInteger) indexOfString: (NSString*)substring
31493047
*/
31503048
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
31513049
{
3152-
NSUInteger length = [self length];
3050+
unsigned start;
3051+
unsigned end;
3052+
unsigned length = [self length];
3053+
unichar ch;
3054+
unichar (*caiImp)(NSString*, SEL, NSUInteger);
31533055

31543056
if (anIndex >= length)
3155-
{
3156-
[NSException raise: NSRangeException format: @"Invalid location."];
3157-
}
3057+
[NSException raise: NSRangeException format:@"Invalid location."];
3058+
caiImp = (unichar (*)(NSString*,SEL,NSUInteger))
3059+
[self methodForSelector: caiSel];
31583060

3159-
if (0 == length)
3061+
for (start = anIndex; start > 0; start--)
31603062
{
3161-
return NSMakeRange(0, NSNotFound);
3063+
ch = (*caiImp)(self, caiSel, start);
3064+
if (uni_isnonsp(ch) == NO)
3065+
break;
31623066
}
3163-
else
3067+
for (end = start+1; end < length; end++)
31643068
{
3165-
NSRange result;
3166-
GS_BEGINITEMBUF(buf, (length * sizeof(unichar)), unichar)
3167-
3168-
[self getCharacters: buf];
3169-
result = GSPrivateRangeOfComposed(buf, length, anIndex);
3170-
GS_ENDITEMBUF()
3171-
return result;
3069+
ch = (*caiImp)(self, caiSel, end);
3070+
if (uni_isnonsp(ch) == NO)
3071+
break;
31723072
}
3073+
3074+
return NSMakeRange(start, end-start);
31733075
}
31743076

31753077
- (NSRange) rangeOfComposedCharacterSequencesForRange: (NSRange)range

0 commit comments

Comments
 (0)