Skip to content

Commit c70317d

Browse files
authored
feat: Add generic length function (#3317)
1 parent 7189b13 commit c70317d

File tree

5 files changed

+88
-6
lines changed

5 files changed

+88
-6
lines changed

docs/4_secondary_admin_controls/expressions/functions.md

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,16 @@ There are various supported functions, and we are willing to add more. Let us kn
22

33
The currently supported functions are:
44

5+
##### General operations
6+
7+
**length(val)**
8+
9+
Find the length of the item passed in.
10+
* For a strings it will return the number of unicode graphemes
11+
* For arrays, the number of elements
12+
* For JSON or other objects, it will return the number of properties
13+
* For numbers it will return the length of the string representation
14+
515
##### Numeric operations
616

717
**round(val)**
@@ -71,11 +81,11 @@ Trims any whitespace at the beginning and end of the string.
7181

7282
**strlen(val)**
7383

74-
Find the length of the given string.
84+
Find the length of the given string. For Unicode strings this will count the bytes not the graphemes.
7585

7686
**substr(val, indexStart, indexEnd)**
7787

78-
substr() extracts characters from indexStart up to but not including indexEnd.
88+
substr() extracts characters from indexStart up to but not including indexEnd. For Unicode strings, this will count based on the bytes not the graphemes.
7989

8090
- If indexStart >= str.length, an empty string is returned.
8191
- If indexStart < 0, the index is counted from the end of the string. More formally, in this case, the substring starts at max(indexStart + str.length, 0).
@@ -106,15 +116,15 @@ eg `includes("Companion is great!", "great")` gives `true`
106116

107117
**indexOf(val, find, offset)**
108118

109-
Find the index of the first occurrence of a value within the provided string.
119+
Find the index of the first occurrence of a value within the provided string. For Unicode strings, this will count based on the bytes not the graphemes.
110120

111121
Optionally provide an offset to begin the search from, otherwise it starts from position 0 (the beginning).
112122

113123
If the value isn't found, it will return -1, otherwise the index of the first occurence.
114124

115125
**lastIndexOf(val, find, offset)**
116126

117-
Find the index of the last occurrence of a value within the provided string, searching from the end.
127+
Find the index of the last occurrence of a value within the provided string, searching from the end. For Unicode strings, this will count based on the bytes not the graphemes.
118128

119129
Optionally provide an offset to begin the search from, searching from the end.
120130

shared-lib/lib/Expression/ExpressionFunctions.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,34 @@
11
import { pad } from '../Util.js'
22
import { JSONPath } from 'jsonpath-plus'
3+
import { countGraphemes } from 'unicode-segmenter/grapheme'
34

45
// Note: when adding new functions, make sure to update the docs!
56
export const ExpressionFunctions: Record<string, (...args: any[]) => any> = {
7+
// General operations
8+
length: (v) => {
9+
let len = 0
10+
if (v === undefined || v === null) {
11+
len = 0
12+
} else if (Array.isArray(v)) {
13+
len = v.length
14+
} else if (typeof v === 'number') {
15+
len = (v + '').length
16+
} else if (typeof v === 'bigint') {
17+
len = v.toString().length
18+
} else if (typeof v === 'string') {
19+
// So we handle UTF graphemes correctly
20+
len = countGraphemes(v)
21+
} else if (v instanceof RegExp) {
22+
len = v.toString().length
23+
} else if (typeof v === 'object') {
24+
len = Object.keys(v).length
25+
} else {
26+
// If it's got to here, we don't know how to handle it
27+
len = NaN
28+
}
29+
return len
30+
},
31+
632
// Number operations
733
// TODO: round to fractionals, without fp issues
834
round: (v) => Math.round(v),
@@ -74,7 +100,7 @@ export const ExpressionFunctions: Record<string, (...args: any[]) => any> = {
74100
// Bool operations
75101
bool: (v) => !!v && v !== 'false' && v !== '0',
76102

77-
// Object operations
103+
// Object/array operations
78104
jsonpath: (obj, path) => {
79105
const shouldParseInput = typeof obj === 'string'
80106
if (shouldParseInput) {

shared-lib/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
"jsep": "^1.4.0",
2121
"jsonpath-plus": "^10.3.0",
2222
"ps-tree": "^1.2.0",
23-
"semver": "^7.7.2"
23+
"semver": "^7.7.2",
24+
"unicode-segmenter": "^0.11.3"
2425
},
2526
"scripts": {
2627
"build": "run build:ts",

shared-lib/test/expressions-functions.test.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,39 @@ import { describe, it, expect } from 'vitest'
22
import { ExpressionFunctions } from '../lib/Expression/ExpressionFunctions.js'
33

44
describe('functions', () => {
5+
describe('general', () => {
6+
it('length', () => {
7+
expect(ExpressionFunctions.length()).toBe(0)
8+
expect(ExpressionFunctions.length('')).toBe(0)
9+
expect(ExpressionFunctions.length('a')).toBe(1)
10+
expect(ExpressionFunctions.length('abc')).toBe(3)
11+
expect(ExpressionFunctions.length('ä')).toBe(1) // codepoint U+00E4, one grapheme
12+
expect(ExpressionFunctions.length('̈a')).toBe(2) // codepoints U+0308 U+0061, one grapheme, wrong order
13+
expect(ExpressionFunctions.length('ä')).toBe(1) // codepoints U+0061 U+0308, one grapheme
14+
expect(ExpressionFunctions.length('á̈')).toBe(1) // codepoints U+0061 U+0301 U+0308, one grapheme
15+
expect(ExpressionFunctions.length(9)).toBe(1)
16+
expect(ExpressionFunctions.length(99)).toBe(2)
17+
expect(ExpressionFunctions.length(-123)).toBe(4)
18+
expect(ExpressionFunctions.length(3.14)).toBe(4)
19+
expect(ExpressionFunctions.length(BigInt(1024))).toBe(4)
20+
expect(ExpressionFunctions.length(BigInt(9007199254740991))).toBe(16)
21+
expect(ExpressionFunctions.length(new RegExp('ab+c', 'i'))).toBe(7)
22+
expect(ExpressionFunctions.length([])).toBe(0)
23+
expect(ExpressionFunctions.length([9])).toBe(1)
24+
expect(ExpressionFunctions.length([99])).toBe(1)
25+
expect(ExpressionFunctions.length(['abc'])).toBe(1)
26+
expect(ExpressionFunctions.length([9, 'a'])).toBe(2)
27+
expect(ExpressionFunctions.length(['a', 'c'])).toBe(2)
28+
expect(ExpressionFunctions.length(['ab', ''])).toBe(2)
29+
expect(ExpressionFunctions.length([1, , 3])).toBe(3)
30+
expect(ExpressionFunctions.length(['a', 'b', 'c'])).toBe(3)
31+
expect(ExpressionFunctions.length(['a', ['b', 'b'], 'c'])).toBe(3)
32+
expect(ExpressionFunctions.length({ a: 1 })).toBe(1)
33+
expect(ExpressionFunctions.length({ a: 1, b: { c: 5 } })).toBe(2)
34+
expect(ExpressionFunctions.length({ a: ['a', 'c'], b: { c: 5 } })).toBe(2)
35+
})
36+
})
37+
538
describe('number', () => {
639
it('round', () => {
740
expect(ExpressionFunctions.round(9.99)).toBe(10)
@@ -127,6 +160,7 @@ describe('functions', () => {
127160
expect(ExpressionFunctions.strlen(' 99 ')).toBe(6)
128161
expect(ExpressionFunctions.strlen('\t aa \n')).toBe(6)
129162
expect(ExpressionFunctions.strlen('')).toBe(0)
163+
expect(ExpressionFunctions.strlen('ä')).toBe(2) // codepoints U+0061 U+0308, one grapheme, two bytes
130164
expect(ExpressionFunctions.strlen(undefined)).toBe(9)
131165
expect(ExpressionFunctions.strlen(false)).toBe(5)
132166
expect(ExpressionFunctions.strlen(true)).toBe(4)
@@ -139,6 +173,7 @@ describe('functions', () => {
139173
expect(ExpressionFunctions.substr('abcdef', 2, -2)).toBe('cd')
140174
expect(ExpressionFunctions.substr('abcdef', -4, -2)).toBe('cd')
141175
expect(ExpressionFunctions.substr('abcdef', 0, 0)).toBe('')
176+
expect(ExpressionFunctions.substr('ä', 0, 1)).toBe('a') // codepoints U+0061 U+0308, one grapheme, substr works on bytes
142177

143178
expect(ExpressionFunctions.substr(11)).toBe('11')
144179
expect(ExpressionFunctions.substr('', 0, 1)).toBe('')
@@ -201,6 +236,7 @@ describe('functions', () => {
201236
expect(ExpressionFunctions.indexOf('1234512345', '34')).toBe(2)
202237
expect(ExpressionFunctions.indexOf('1234512345', '34', 2)).toBe(2)
203238
expect(ExpressionFunctions.indexOf('1234512345', '34', 3)).toBe(7)
239+
expect(ExpressionFunctions.indexOf('ä', 'a')).toBe(0) // codepoints U+0061 U+0308, one grapheme, indexOf works on bytes
204240
})
205241

206242
it('lastIndexOf', () => {
@@ -214,6 +250,7 @@ describe('functions', () => {
214250
expect(ExpressionFunctions.lastIndexOf('1234512345', '34')).toBe(7)
215251
expect(ExpressionFunctions.lastIndexOf('1234512345', '34', 7)).toBe(7)
216252
expect(ExpressionFunctions.lastIndexOf('1234512345', '34', 6)).toBe(2)
253+
expect(ExpressionFunctions.lastIndexOf('äbbä', 'a')).toBe(4) // codepoints U+0061 U+0308, one grapheme, lastIndexOf works on bytes
217254
})
218255

219256
it('toUpperCase', () => {

yarn.lock

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,7 @@ __metadata:
12441244
ps-tree: "npm:^1.2.0"
12451245
semver: "npm:^7.7.2"
12461246
typescript: "npm:~5.8.3"
1247+
unicode-segmenter: "npm:^0.11.3"
12471248
languageName: unknown
12481249
linkType: soft
12491250

@@ -14600,6 +14601,13 @@ asn1@evs-broadcast/node-asn1:
1460014601
languageName: node
1460114602
linkType: hard
1460214603

14604+
"unicode-segmenter@npm:^0.11.3":
14605+
version: 0.11.3
14606+
resolution: "unicode-segmenter@npm:0.11.3"
14607+
checksum: 10c0/e5f6c16ebd2112eb6e447436ebaa525deb69705a609e6e19161c779ae738f1c2325b96cd986d5099c0f9b8c92538940d41d77237dd5a169230bb8405ec5f4835
14608+
languageName: node
14609+
linkType: hard
14610+
1460314611
"unified@npm:^11.0.0":
1460414612
version: 11.0.5
1460514613
resolution: "unified@npm:11.0.5"

0 commit comments

Comments
 (0)