Skip to content

Commit 046a0fb

Browse files
committed
fix(SupportTargetOutputKey): support target output key
Legacy exact match evaluator must support target output key.
1 parent 92b317d commit 046a0fb

3 files changed

Lines changed: 30 additions & 2 deletions

File tree

samples/calculator/evaluations/eval-sets/legacy.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"batchSize": 10,
66
"evaluatorRefs": [
77
"equality",
8+
"equality-with-target-key",
89
"llm-as-a-judge",
910
"json-similarity",
1011
"trajectory"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"fileName": "equality-with-target-key.json",
3+
"id": "equality-with-target-key",
4+
"name": "Legacy Equality Evaluator With Target Key",
5+
"description": "An evaluator that judges the agent based on expected output under \"result\" key.",
6+
"category": 0,
7+
"type": 1,
8+
"targetOutputKey": "result",
9+
"createdAt": "2025-06-26T17:45:39.651Z",
10+
"updatedAt": "2025-06-26T17:45:39.651Z"
11+
}

src/uipath/eval/evaluators/legacy_exact_match_evaluator.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,23 @@ async def evaluate(
4040
Returns:
4141
EvaluationResult: Boolean result indicating exact match (True/False)
4242
"""
43+
actual_output = agent_execution.agent_output
44+
expected_output = evaluation_criteria.expected_output
45+
46+
if self.target_output_key and self.target_output_key != "*":
47+
if isinstance(actual_output, dict) and isinstance(expected_output, dict):
48+
if not (
49+
self.target_output_key in actual_output
50+
and self.target_output_key in expected_output
51+
):
52+
# Assuming that we should pass the test.
53+
expected_output = actual_output = {}
54+
elif self.target_output_key in actual_output:
55+
actual_output = actual_output[self.target_output_key]
56+
elif self.target_output_key in expected_output:
57+
expected_output = expected_output[self.target_output_key]
58+
4359
return BooleanEvaluationResult(
44-
score=self._canonical_json(agent_execution.agent_output)
45-
== self._canonical_json(evaluation_criteria.expected_output)
60+
score=self._canonical_json(actual_output)
61+
== self._canonical_json(expected_output)
4662
)

0 commit comments

Comments
 (0)