Skip to content

Commit 307b37a

Browse files
authored
Merge pull request #10 from GoogleChromeLabs/tools-evals-constraints
Implement argument constraints for tool evals
2 parents 134b1d6 + 8d19d53 commit 307b37a

File tree

8 files changed

+376
-41
lines changed

8 files changed

+376
-41
lines changed

evals-cli/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
node_modules/
22
dist/
33
.DS_Store
4+
.env
5+
report.html

evals-cli/README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,38 @@ node dist/bin/runevals.js --model=gemini-2.5-flash --tools=examples/travel/tools
6262
```bash
6363
node dist/bin/runevals.js --model=qwen3:8b --backend=ollama --tools=examples/travel/tools_schema.json --evals=examples/travel/evals.json
6464
```
65+
66+
## Argument Constraints
67+
68+
You can use constraint operators to match argument values flexibly. A constraint object is identified when **all** its keys start with `$`.
69+
70+
### Supported Operators
71+
72+
| Operator | Description | Example |
73+
|---|---|---|
74+
| **`$pattern`** | Regex match | `{"$pattern": "^2026-\\d{2}$"}` |
75+
| **`$contains`** | Substring match | `{"$contains": "York"}` |
76+
| **`$gt`**, **`$gte`** | Greater than (or equal) | `{"$gte": 1}` |
77+
| **`$lt`**, **`$lte`** | Less than (or equal) | `{"$lt": 100}` |
78+
| **`$type`** | Type check | `{"$type": "string"}` |
79+
| **`$any`** | Presence check | `{"$any": true}` |
80+
81+
### Example
82+
83+
```json
84+
{
85+
"expectedCall": {
86+
"functionName": "searchFlights",
87+
"arguments": {
88+
"destination": "NYC",
89+
"outboundDate": { "$pattern": "^2026-01-\\d{2}$" },
90+
"passengers": { "$gte": 1 },
91+
"preferences": { "$any": true }
92+
}
93+
}
94+
}
95+
```
96+
6597
## License
6698
6799
Apache-2.0

evals-cli/examples/events/evals.json

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,12 @@
5858
"arguments": {
5959
"date": "2026-01-20",
6060
"time": "19:00",
61-
"location": "Ginno's Pizza",
62-
"description": "Restaurant reservation for 3 at Ginno's pizza"
61+
"location": {
62+
"$contains": "Ginno's"
63+
},
64+
"description": {
65+
"$contains": "Ginno's"
66+
}
6367
}
6468
}
6569
}

evals-cli/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
"license": "Apache-2.0",
66
"type": "module",
77
"scripts": {
8-
"build": "tsc"
8+
"build": "tsc",
9+
"test": "tsc && node --test dist/test/matcher.test.js"
910
},
1011
"devDependencies": {
1112
"@types/node": "^25.0.10",

evals-cli/src/matcher.ts

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/**
2+
* Copyright 2026 Google LLC
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
/**
7+
* Checks if the actual argument matches the expected argument, supporting both exact matching and constraints.
8+
*
9+
* If the expected argument is a constraint object (all keys start with `$`), it evaluates the constraints.
10+
* Otherwise, it performs a recursive deep equality check, allowing nested constraints.
11+
*
12+
* @param expected The expected value or constraint object.
13+
* @param actual The actual value to check.
14+
* @returns True if the actual value matches the expected value or satisfies the constraints.
15+
*/
16+
export function matchesArgument(expected: any, actual: any): boolean {
17+
if (isConstraintObject(expected)) {
18+
return matchesConstraint(expected, actual);
19+
}
20+
21+
return matchesRecursive(expected, actual);
22+
}
23+
24+
/**
25+
* Evaluates a constraint object against an actual value.
26+
* Supports operators:
27+
* - `$pattern`: Regex match (string)
28+
* - `$contains`: Substring match (string)
29+
* - `$gt`, `$gte`, `$lt`, `$lte`: Numeric comparisons
30+
* - `$type`: Type assertion ("string", "number", "boolean", "array", "object", "null")
31+
* - `$any`: Presence check (always true if key exists)
32+
*
33+
* @param constraint The constraint object (e.g., { "$gt": 10 }).
34+
* @param actual The value to test.
35+
* @returns True if all constraints in the object are satisfied.
36+
*/
37+
function matchesConstraint(constraint: any, actual: any): boolean {
38+
for (const key of Object.keys(constraint)) {
39+
if (key === "$pattern") {
40+
if (typeof actual !== "string") {
41+
return false;
42+
}
43+
const pattern = new RegExp(constraint[key]);
44+
if (!pattern.test(actual)) {
45+
return false;
46+
}
47+
} else if (key === "$contains") {
48+
if (typeof actual !== "string") {
49+
return false;
50+
}
51+
if (!actual.includes(constraint[key])) {
52+
return false;
53+
}
54+
} else if (["$gt", "$gte", "$lt", "$lte"].includes(key)) {
55+
if (typeof actual !== "number") {
56+
return false;
57+
}
58+
const val = constraint[key];
59+
if (key === "$gt" && !(actual > val)) return false;
60+
if (key === "$gte" && !(actual >= val)) return false;
61+
if (key === "$lt" && !(actual < val)) return false;
62+
if (key === "$lte" && !(actual <= val)) return false;
63+
} else if (key === "$type") {
64+
const type = constraint[key];
65+
if (type === "array") {
66+
if (!Array.isArray(actual)) return false;
67+
} else if (type === "null") {
68+
if (actual !== null) return false;
69+
} else if (type === "object") {
70+
if (
71+
typeof actual !== "object" ||
72+
actual === null ||
73+
Array.isArray(actual)
74+
)
75+
return false;
76+
} else {
77+
if (typeof actual !== type) return false;
78+
}
79+
} else if (key === "$any") {
80+
// Always matches if present
81+
}
82+
// Future constraints will go here
83+
}
84+
return true;
85+
}
86+
87+
/**
88+
* Determines if an object is a constraint object.
89+
* An object is a constraint object if it is non-null, has at least one key,
90+
* and ALL its keys start with `$`.
91+
*
92+
* @param obj The object to check.
93+
* @returns True if strictly a constraint object.
94+
*/
95+
function isConstraintObject(obj: any): boolean {
96+
if (typeof obj !== "object" || obj === null) {
97+
return false;
98+
}
99+
const keys = Object.keys(obj);
100+
if (keys.length === 0) {
101+
return false;
102+
}
103+
return keys.every((key) => key.startsWith("$"));
104+
}
105+
106+
/**
107+
* Recursively checks equality between two values.
108+
* If values are objects or arrays, it recurses into them.
109+
* Crucially, it calls `matchesArgument` for children, enabling nested constraints.
110+
*
111+
* @param expected The expected structure.
112+
* @param actual The actual structure.
113+
* @returns True if structures match recursively.
114+
*/
115+
function matchesRecursive(expected: any, actual: any): boolean {
116+
if (expected === actual) {
117+
return true;
118+
}
119+
120+
if (
121+
expected === null ||
122+
actual === null ||
123+
typeof expected !== "object" ||
124+
typeof actual !== "object"
125+
) {
126+
return false;
127+
}
128+
129+
const keys1 = Object.keys(expected);
130+
const keys2 = Object.keys(actual);
131+
132+
if (keys1.length !== keys2.length) {
133+
return false;
134+
}
135+
136+
for (const key of keys1) {
137+
if (!keys2.includes(key) || !matchesArgument(expected[key], actual[key])) {
138+
return false;
139+
}
140+
}
141+
142+
return true;
143+
}

evals-cli/src/report/report.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import { Config } from "../types/config.js";
77
import { Message, TestResult, TestResults } from "../types/evals.js";
8-
import { deepEqual } from "../utils.js";
8+
import { matchesArgument } from "../matcher.js";
99

1010
export function renderReport(config: Config, testResults: TestResults): string {
1111
return `
@@ -87,7 +87,7 @@ function renderDetail(testNumber: number, testResult: TestResult): string {
8787
? "pass"
8888
: "fail";
8989

90-
const argsOutcome = deepEqual(
90+
const argsOutcome = matchesArgument(
9191
testResult.test.expectedCall?.arguments,
9292
testResult.response?.args,
9393
)

0 commit comments

Comments
 (0)