Skip to content

Commit a59e2b3

Browse files
authored
Count surrogate pair as single character (#779)
* Count surrogate pair as single character String expression operators now count UTF-16 surrogate pairs as single characters instead of splitting them up into individual surrogates. * Removed extraneous empty string case
1 parent 6fb546e commit a59e2b3

File tree

6 files changed

+270
-16
lines changed

6 files changed

+270
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- _...Add new stuff here..._
66

77
### 🐞 Bug fixes
8+
- The `index-of`, `length`, and `slice` expression operators count a UTF-16 surrogate pair as a single character. ([#779](https://github.com/maplibre/maplibre-style-spec/pull/779))
89
- _...Add new stuff here..._
910

1011
## 20.3.0

src/expression/definitions/index_of.ts

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,24 @@ class IndexOf implements Expression {
6060
throw new RuntimeError(`Expected first argument to be of type boolean, string, number or null, but found ${toString(typeOf(needle))} instead.`);
6161
}
6262

63-
if (!isValidNativeType(haystack, ['string', 'array'])) {
64-
throw new RuntimeError(`Expected second argument to be of type array or string, but found ${toString(typeOf(haystack))} instead.`);
63+
let fromIndex;
64+
if (this.fromIndex) {
65+
fromIndex = (this.fromIndex.evaluate(ctx) as number);
6566
}
6667

67-
if (this.fromIndex) {
68-
const fromIndex = (this.fromIndex.evaluate(ctx) as number);
68+
if (isValidNativeType(haystack, ['string'])) {
69+
const rawIndex = haystack.indexOf(needle, fromIndex);
70+
if (rawIndex === -1) {
71+
return -1;
72+
} else {
73+
// The index may be affected by surrogate pairs, so get the length of the preceding substring.
74+
return [...haystack.slice(0, rawIndex)].length;
75+
}
76+
} else if (isValidNativeType(haystack, ['array'])) {
6977
return haystack.indexOf(needle, fromIndex);
78+
} else {
79+
throw new RuntimeError(`Expected second argument to be of type array or string, but found ${toString(typeOf(haystack))} instead.`);
7080
}
71-
72-
return haystack.indexOf(needle);
7381
}
7482

7583
eachChild(fn: (_: Expression) => void) {

src/expression/definitions/length.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ class Length implements Expression {
3333
evaluate(ctx: EvaluationContext) {
3434
const input = this.input.evaluate(ctx);
3535
if (typeof input === 'string') {
36-
return input.length;
36+
// The length may be affected by surrogate pairs.
37+
return [...input].length;
3738
} else if (Array.isArray(input)) {
3839
return input.length;
3940
} else {

src/expression/definitions/slice.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,16 +56,19 @@ class Slice implements Expression {
5656
const input = (this.input.evaluate(ctx) as any);
5757
const beginIndex = (this.beginIndex.evaluate(ctx) as number);
5858

59-
if (!isValidNativeType(input, ['string', 'array'])) {
60-
throw new RuntimeError(`Expected first argument to be of type array or string, but found ${toString(typeOf(input))} instead.`);
59+
let endIndex;
60+
if (this.endIndex) {
61+
endIndex = (this.endIndex.evaluate(ctx) as number);
6162
}
6263

63-
if (this.endIndex) {
64-
const endIndex = (this.endIndex.evaluate(ctx) as number);
64+
if (isValidNativeType(input, ['string'])) {
65+
// Indices may be affected by surrogate pairs.
66+
return [...input].slice(beginIndex, endIndex).join('');
67+
} else if (isValidNativeType(input, ['array'])) {
6568
return input.slice(beginIndex, endIndex);
69+
} else {
70+
throw new RuntimeError(`Expected first argument to be of type array or string, but found ${toString(typeOf(input))} instead.`);
6671
}
67-
68-
return input.slice(beginIndex);
6972
}
7073

7174
eachChild(fn: (_: Expression) => void) {

src/expression/expression.test.ts

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,3 +388,244 @@ describe('Distance expression', () => {
388388
});
389389
});
390390
});
391+
392+
describe('index-of expression', () => {
393+
test('requires a needle', () => {
394+
const response = createExpression(['index-of']);
395+
expect(response.result).toBe('error');
396+
});
397+
test('requires a haystack', () => {
398+
const response = createExpression(['index-of', 'a']);
399+
expect(response.result).toBe('error');
400+
});
401+
test('rejects a fourth argument', () => {
402+
const response = createExpression(['index-of', 'a', 'abc', 1, 8]);
403+
expect(response.result).toBe('error');
404+
});
405+
test('requires a primitive as the needle', () => {
406+
const response = createExpression(['index-of', ['literal', ['a']], ['a', 'b', 'c']]);
407+
expect(response.result).toBe('error');
408+
});
409+
test('requires a string or array as the haystack', () => {
410+
const response = createExpression(['index-of', 't', true]);
411+
expect(response.result).toBe('error');
412+
});
413+
test('finds an empty substring in an empty string', () => {
414+
const response = createExpression(['index-of', '', '']);
415+
expect(response.result).toBe('success');
416+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(0);
417+
});
418+
test('finds an empty substring in a non-empty string', () => {
419+
const response = createExpression(['index-of', '', 'abc']);
420+
expect(response.result).toBe('success');
421+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(0);
422+
});
423+
test('cannot find a non-empty substring in an empty string', () => {
424+
const response = createExpression(['index-of', 'abc', '']);
425+
expect(response.result).toBe('success');
426+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(-1);
427+
});
428+
test('finds a non-empty substring in a non-empty string', () => {
429+
const response = createExpression(['index-of', 'b', 'abc']);
430+
expect(response.result).toBe('success');
431+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1);
432+
});
433+
test('only finds the first occurrence in a string', () => {
434+
const response = createExpression(['index-of', 'b', 'abbc']);
435+
expect(response.result).toBe('success');
436+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1);
437+
});
438+
test('starts looking for the substring at a positive start index', () => {
439+
const response = createExpression(['index-of', 'a', 'abc', 1]);
440+
expect(response.result).toBe('success');
441+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(-1);
442+
});
443+
test('starts looking for the substring at a negative start index', () => {
444+
const response = createExpression(['index-of', 'c', 'abc', -1]);
445+
expect(response.result).toBe('success');
446+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(2);
447+
});
448+
test('counts a non-ASCII character as a single character', () => {
449+
const response = createExpression(['index-of', '镇', '市镇']);
450+
expect(response.result).toBe('success');
451+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1);
452+
});
453+
test('counts a surrogate pair as a single character', () => {
454+
const response = createExpression(['index-of', '市镇', '丐𦨭市镇']);
455+
expect(response.result).toBe('success');
456+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(2);
457+
});
458+
test('cannot find an element in an empty array', () => {
459+
const response = createExpression(['index-of', 1, ['literal', []]]);
460+
expect(response.result).toBe('success');
461+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(-1);
462+
});
463+
test('finds an element in a non-empty array', () => {
464+
const response = createExpression(['index-of', 2, ['literal', [1, 2, 3]]]);
465+
expect(response.result).toBe('success');
466+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1);
467+
});
468+
test('only finds the first occurrence in an array', () => {
469+
const response = createExpression(['index-of', 2, ['literal', [1, 2, 2, 3]]]);
470+
expect(response.result).toBe('success');
471+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1);
472+
});
473+
test('starts looking for the element at a positive start index', () => {
474+
const response = createExpression(['index-of', 1, ['literal', [1, 2, 3]], 1]);
475+
expect(response.result).toBe('success');
476+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(-1);
477+
});
478+
test('starts looking for the element at a negative start index', () => {
479+
const response = createExpression(['index-of', 3, ['literal', [1, 2, 3]], -1]);
480+
expect(response.result).toBe('success');
481+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(2);
482+
});
483+
});
484+
485+
describe('length expression', () => {
486+
test('requires an argument', () => {
487+
const response = createExpression(['length']);
488+
expect(response.result).toBe('error');
489+
});
490+
test('requires a string or array as the argument', () => {
491+
const response = createExpression(['length', true]);
492+
expect(response.result).toBe('error');
493+
});
494+
test('rejects a second argument', () => {
495+
const response = createExpression(['length', 'abc', 'def']);
496+
expect(response.result).toBe('error');
497+
});
498+
test('measures an empty string', () => {
499+
const response = createExpression(['length', '']);
500+
expect(response.result).toBe('success');
501+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(0);
502+
});
503+
test('measures a non-empty string', () => {
504+
const response = createExpression(['length', 'abc']);
505+
expect(response.result).toBe('success');
506+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(3);
507+
});
508+
test('counts a non-ASCII character as a single character', () => {
509+
const response = createExpression(['length', '市镇']);
510+
expect(response.result).toBe('success');
511+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(2);
512+
});
513+
test('counts a surrogate pair as a single character', () => {
514+
const response = createExpression(['length', '丐𦨭市镇']);
515+
expect(response.result).toBe('success');
516+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(4);
517+
});
518+
test('measures an empty array', () => {
519+
const response = createExpression(['length', ['literal', []]]);
520+
expect(response.result).toBe('success');
521+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(0);
522+
});
523+
test('measures a non-empty array', () => {
524+
const response = createExpression(['length', ['literal', [1, 2, 3]]]);
525+
expect(response.result).toBe('success');
526+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(3);
527+
});
528+
});
529+
530+
describe('slice expression', () => {
531+
test('requires an input argument', () => {
532+
const response = createExpression(['slice']);
533+
expect(response.result).toBe('error');
534+
});
535+
test('requires a start index argument', () => {
536+
const response = createExpression(['slice', 'abc']);
537+
expect(response.result).toBe('error');
538+
});
539+
test('rejects a fourth argument', () => {
540+
const response = createExpression(['slice', 'abc', 0, 1, 8]);
541+
expect(response.result).toBe('error');
542+
});
543+
test('requires a string or array as the input argument', () => {
544+
const response = createExpression(['slice', true, 0]);
545+
expect(response.result).toBe('error');
546+
});
547+
test('requires a number as the start index argument', () => {
548+
const response = createExpression(['slice', 'abc', true]);
549+
expect(response.result).toBe('error');
550+
});
551+
test('slices an empty string', () => {
552+
const response = createExpression(['slice', '', 0]);
553+
expect(response.result).toBe('success');
554+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('');
555+
});
556+
test('slices a string starting at the beginning', () => {
557+
const response = createExpression(['slice', 'abc', 0]);
558+
expect(response.result).toBe('success');
559+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('abc');
560+
});
561+
test('slices a string starting at the middle', () => {
562+
const response = createExpression(['slice', 'abc', 1]);
563+
expect(response.result).toBe('success');
564+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('bc');
565+
});
566+
test('slices a string starting at the end', () => {
567+
const response = createExpression(['slice', 'abc', 3]);
568+
expect(response.result).toBe('success');
569+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('');
570+
});
571+
test('slices a string backwards from the end', () => {
572+
const response = createExpression(['slice', 'abc', -2]);
573+
expect(response.result).toBe('success');
574+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('bc');
575+
});
576+
test('slices a string by a zero-length range', () => {
577+
const response = createExpression(['slice', 'abc', 1, 1]);
578+
expect(response.result).toBe('success');
579+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('');
580+
});
581+
test('slices a string by a negative-length range', () => {
582+
const response = createExpression(['slice', 'abc', 2, 1]);
583+
expect(response.result).toBe('success');
584+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('');
585+
});
586+
test('avoids splitting a non-ASCII character', () => {
587+
const response = createExpression(['slice', '市镇', 1]);
588+
expect(response.result).toBe('success');
589+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('镇');
590+
});
591+
test('avoids splitting a surrogate pair', () => {
592+
const response = createExpression(['slice', '丐𦨭市镇', 2]);
593+
expect(response.result).toBe('success');
594+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('市镇');
595+
});
596+
test('slices an empty array', () => {
597+
const response = createExpression(['slice', ['literal', []], 0]);
598+
expect(response.result).toBe('success');
599+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([]);
600+
});
601+
test('slices an array starting at the beginning', () => {
602+
const response = createExpression(['slice', ['literal', [1, 2, 3]], 0]);
603+
expect(response.result).toBe('success');
604+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([1, 2, 3]);
605+
});
606+
test('slices an array starting at the middle', () => {
607+
const response = createExpression(['slice', ['literal', [1, 2, 3]], 1]);
608+
expect(response.result).toBe('success');
609+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([2, 3]);
610+
});
611+
test('slices an array starting at the end', () => {
612+
const response = createExpression(['slice', ['literal', [1, 2, 3]], 3]);
613+
expect(response.result).toBe('success');
614+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([]);
615+
});
616+
test('slices an array backwards from the end', () => {
617+
const response = createExpression(['slice', ['literal', [1, 2, 3]], -2]);
618+
expect(response.result).toBe('success');
619+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([2, 3]);
620+
});
621+
test('slices an array by a zero-length range', () => {
622+
const response = createExpression(['slice', ['literal', [1, 2, 3]], 1, 1]);
623+
expect(response.result).toBe('success');
624+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([]);
625+
});
626+
test('slices an array by a negative-length range', () => {
627+
const response = createExpression(['slice', ['literal', [1, 2, 3]], 2, 1]);
628+
expect(response.result).toBe('success');
629+
expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([]);
630+
});
631+
});

src/reference/v8.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2826,7 +2826,7 @@
28262826
}
28272827
},
28282828
"index-of": {
2829-
"doc": "Returns the first position at which an item can be found in an array or a substring can be found in a string, or `-1` if the input cannot be found. Accepts an optional index from where to begin the search.",
2829+
"doc": "Returns the first position at which an item can be found in an array or a substring can be found in a string, or `-1` if the input cannot be found. Accepts an optional index from where to begin the search. In a string, a UTF-16 surrogate pair counts as a single position.",
28302830
"example": {
28312831
"syntax": {
28322832
"method": ["value", "value", "number?"],
@@ -2844,7 +2844,7 @@
28442844
}
28452845
},
28462846
"slice": {
2847-
"doc": "Returns an item from an array or a substring from a string from a specified start index, or between a start index and an end index if set. The return value is inclusive of the start index but not of the end index.",
2847+
"doc": "Returns an item from an array or a substring from a string from a specified start index, or between a start index and an end index if set. The return value is inclusive of the start index but not of the end index. In a string, a UTF-16 surrogate pair counts as a single position.",
28482848
"example": {
28492849
"syntax": {
28502850
"method": ["value", "number", "number?"],
@@ -3380,7 +3380,7 @@
33803380
}
33813381
},
33823382
"length": {
3383-
"doc": "Gets the length of an array or string.",
3383+
"doc": "Gets the length of an array or string. In a string, a UTF-16 surrogate pair counts as a single position.",
33843384
"example": {
33853385
"syntax": {
33863386
"method": ["array"],

0 commit comments

Comments
 (0)