feat: Add generic length function (#3317)

peternewman · web-flow · commit c70317d4e0e8 · 2025-06-02T20:02:34.000+01:00
diff --git a/docs/4_secondary_admin_controls/expressions/functions.md b/docs/4_secondary_admin_controls/expressions/functions.md
@@ -2,6 +2,16 @@ There are various supported functions, and we are willing to add more. Let us kn
 
 The currently supported functions are:
 
+##### General operations
+
+**length(val)**
+
+Find the length of the item passed in.
+* For a strings it will return the number of unicode graphemes
+* For arrays, the number of elements
+* For JSON or other objects, it will return the number of properties
+* For numbers it will return the length of the string representation
+
 ##### Numeric operations
 
 **round(val)**
@@ -71,11 +81,11 @@ Trims any whitespace at the beginning and end of the string.
 
 **strlen(val)**
 
-Find the length of the given string.
+Find the length of the given string. For Unicode strings this will count the bytes not the graphemes.
 
 **substr(val, indexStart, indexEnd)**
 
-substr() extracts characters from indexStart up to but not including indexEnd.
+substr() extracts characters from indexStart up to but not including indexEnd. For Unicode strings, this will count based on the bytes not the graphemes.
 
 - If indexStart >= str.length, an empty string is returned.
 - If indexStart < 0, the index is counted from the end of the string. More formally, in this case, the substring starts at max(indexStart + str.length, 0).
@@ -106,15 +116,15 @@ eg `includes("Companion is great!", "great")` gives `true`
 
 **indexOf(val, find, offset)**
 
-Find the index of the first occurrence of a value within the provided string.
+Find the index of the first occurrence of a value within the provided string. For Unicode strings, this will count based on the bytes not the graphemes.
 
 Optionally provide an offset to begin the search from, otherwise it starts from position 0 (the beginning).
 
 If the value isn't found, it will return -1, otherwise the index of the first occurence.
 
 **lastIndexOf(val, find, offset)**
 
-Find the index of the last occurrence of a value within the provided string, searching from the end.
+Find the index of the last occurrence of a value within the provided string, searching from the end. For Unicode strings, this will count based on the bytes not the graphemes.
 
 Optionally provide an offset to begin the search from, searching from the end.
 
diff --git a/shared-lib/lib/Expression/ExpressionFunctions.ts b/shared-lib/lib/Expression/ExpressionFunctions.ts
@@ -1,8 +1,34 @@
 import { pad } from '../Util.js'
 import { JSONPath } from 'jsonpath-plus'
+import { countGraphemes } from 'unicode-segmenter/grapheme'
 
 // Note: when adding new functions, make sure to update the docs!
 export const ExpressionFunctions: Record<string, (...args: any[]) => any> = {
+	// General operations
+	length: (v) => {
+		let len = 0
+		if (v === undefined || v === null) {
+			len = 0
+		} else if (Array.isArray(v)) {
+			len = v.length
+		} else if (typeof v === 'number') {
+			len = (v + '').length
+		} else if (typeof v === 'bigint') {
+			len = v.toString().length
+		} else if (typeof v === 'string') {
+			// So we handle UTF graphemes correctly
+			len = countGraphemes(v)
+		} else if (v instanceof RegExp) {
+			len = v.toString().length
+		} else if (typeof v === 'object') {
+			len = Object.keys(v).length
+		} else {
+			// If it's got to here, we don't know how to handle it
+			len = NaN
+		}
+		return len
+	},
+
 	// Number operations
 	// TODO: round to fractionals, without fp issues
 	round: (v) => Math.round(v),
@@ -74,7 +100,7 @@ export const ExpressionFunctions: Record<string, (...args: any[]) => any> = {
 	// Bool operations
 	bool: (v) => !!v && v !== 'false' && v !== '0',
 
-	// Object operations
+	// Object/array operations
 	jsonpath: (obj, path) => {
 		const shouldParseInput = typeof obj === 'string'
 		if (shouldParseInput) {
diff --git a/shared-lib/package.json b/shared-lib/package.json
@@ -20,7 +20,8 @@
     "jsep": "^1.4.0",
     "jsonpath-plus": "^10.3.0",
     "ps-tree": "^1.2.0",
-    "semver": "^7.7.2"
+    "semver": "^7.7.2",
+    "unicode-segmenter": "^0.11.3"
   },
   "scripts": {
     "build": "run build:ts",
diff --git a/shared-lib/test/expressions-functions.test.ts b/shared-lib/test/expressions-functions.test.ts
@@ -2,6 +2,39 @@ import { describe, it, expect } from 'vitest'
 import { ExpressionFunctions } from '../lib/Expression/ExpressionFunctions.js'
 
 describe('functions', () => {
+	describe('general', () => {
+		it('length', () => {
+			expect(ExpressionFunctions.length()).toBe(0)
+			expect(ExpressionFunctions.length('')).toBe(0)
+			expect(ExpressionFunctions.length('a')).toBe(1)
+			expect(ExpressionFunctions.length('abc')).toBe(3)
+			expect(ExpressionFunctions.length('ä')).toBe(1) // codepoint U+00E4, one grapheme
+			expect(ExpressionFunctions.length('̈a')).toBe(2) // codepoints U+0308 U+0061, one grapheme, wrong order
+			expect(ExpressionFunctions.length('ä')).toBe(1) // codepoints U+0061 U+0308, one grapheme
+			expect(ExpressionFunctions.length('á̈')).toBe(1) // codepoints U+0061 U+0301 U+0308, one grapheme
+			expect(ExpressionFunctions.length(9)).toBe(1)
+			expect(ExpressionFunctions.length(99)).toBe(2)
+			expect(ExpressionFunctions.length(-123)).toBe(4)
+			expect(ExpressionFunctions.length(3.14)).toBe(4)
+			expect(ExpressionFunctions.length(BigInt(1024))).toBe(4)
+			expect(ExpressionFunctions.length(BigInt(9007199254740991))).toBe(16)
+			expect(ExpressionFunctions.length(new RegExp('ab+c', 'i'))).toBe(7)
+			expect(ExpressionFunctions.length([])).toBe(0)
+			expect(ExpressionFunctions.length([9])).toBe(1)
+			expect(ExpressionFunctions.length([99])).toBe(1)
+			expect(ExpressionFunctions.length(['abc'])).toBe(1)
+			expect(ExpressionFunctions.length([9, 'a'])).toBe(2)
+			expect(ExpressionFunctions.length(['a', 'c'])).toBe(2)
+			expect(ExpressionFunctions.length(['ab', ''])).toBe(2)
+			expect(ExpressionFunctions.length([1, , 3])).toBe(3)
+			expect(ExpressionFunctions.length(['a', 'b', 'c'])).toBe(3)
+			expect(ExpressionFunctions.length(['a', ['b', 'b'], 'c'])).toBe(3)
+			expect(ExpressionFunctions.length({ a: 1 })).toBe(1)
+			expect(ExpressionFunctions.length({ a: 1, b: { c: 5 } })).toBe(2)
+			expect(ExpressionFunctions.length({ a: ['a', 'c'], b: { c: 5 } })).toBe(2)
+		})
+	})
+
 	describe('number', () => {
 		it('round', () => {
 			expect(ExpressionFunctions.round(9.99)).toBe(10)
@@ -127,6 +160,7 @@ describe('functions', () => {
 			expect(ExpressionFunctions.strlen('  99  ')).toBe(6)
 			expect(ExpressionFunctions.strlen('\t aa \n')).toBe(6)
 			expect(ExpressionFunctions.strlen('')).toBe(0)
+			expect(ExpressionFunctions.strlen('ä')).toBe(2) // codepoints U+0061 U+0308, one grapheme, two bytes
 			expect(ExpressionFunctions.strlen(undefined)).toBe(9)
 			expect(ExpressionFunctions.strlen(false)).toBe(5)
 			expect(ExpressionFunctions.strlen(true)).toBe(4)
@@ -139,6 +173,7 @@ describe('functions', () => {
 			expect(ExpressionFunctions.substr('abcdef', 2, -2)).toBe('cd')
 			expect(ExpressionFunctions.substr('abcdef', -4, -2)).toBe('cd')
 			expect(ExpressionFunctions.substr('abcdef', 0, 0)).toBe('')
+			expect(ExpressionFunctions.substr('ä', 0, 1)).toBe('a') // codepoints U+0061 U+0308, one grapheme, substr works on bytes
 
 			expect(ExpressionFunctions.substr(11)).toBe('11')
 			expect(ExpressionFunctions.substr('', 0, 1)).toBe('')
@@ -201,6 +236,7 @@ describe('functions', () => {
 			expect(ExpressionFunctions.indexOf('1234512345', '34')).toBe(2)
 			expect(ExpressionFunctions.indexOf('1234512345', '34', 2)).toBe(2)
 			expect(ExpressionFunctions.indexOf('1234512345', '34', 3)).toBe(7)
+			expect(ExpressionFunctions.indexOf('ä', 'a')).toBe(0) // codepoints U+0061 U+0308, one grapheme, indexOf works on bytes
 		})
 
 		it('lastIndexOf', () => {
@@ -214,6 +250,7 @@ describe('functions', () => {
 			expect(ExpressionFunctions.lastIndexOf('1234512345', '34')).toBe(7)
 			expect(ExpressionFunctions.lastIndexOf('1234512345', '34', 7)).toBe(7)
 			expect(ExpressionFunctions.lastIndexOf('1234512345', '34', 6)).toBe(2)
+			expect(ExpressionFunctions.lastIndexOf('äbbä', 'a')).toBe(4) // codepoints U+0061 U+0308, one grapheme, lastIndexOf works on bytes
 		})
 
 		it('toUpperCase', () => {
diff --git a/yarn.lock b/yarn.lock
@@ -1244,6 +1244,7 @@ __metadata:
     ps-tree: "npm:^1.2.0"
     semver: "npm:^7.7.2"
     typescript: "npm:~5.8.3"
+    unicode-segmenter: "npm:^0.11.3"
   languageName: unknown
   linkType: soft
 
@@ -14600,6 +14601,13 @@ asn1@evs-broadcast/node-asn1:
   languageName: node
   linkType: hard
 
+"unicode-segmenter@npm:^0.11.3":
+  version: 0.11.3
+  resolution: "unicode-segmenter@npm:0.11.3"
+  checksum: 10c0/e5f6c16ebd2112eb6e447436ebaa525deb69705a609e6e19161c779ae738f1c2325b96cd986d5099c0f9b8c92538940d41d77237dd5a169230bb8405ec5f4835
+  languageName: node
+  linkType: hard
+
 "unified@npm:^11.0.0":
   version: 11.0.5
   resolution: "unified@npm:11.0.5"