fix(SupportTargetOutputKey): support target output key

akshaylive · akshaylive · commit 046a0fba60cf · 2026-01-09T15:38:57.000-08:00
Legacy exact match evaluator must support target output key.
diff --git a/samples/calculator/evaluations/eval-sets/legacy.json b/samples/calculator/evaluations/eval-sets/legacy.json
@@ -5,6 +5,7 @@
   "batchSize": 10,
   "evaluatorRefs": [
     "equality",
+    "equality-with-target-key",
     "llm-as-a-judge",
     "json-similarity",
     "trajectory"
diff --git a/samples/calculator/evaluations/evaluators/legacy-equality-with-target-key.json b/samples/calculator/evaluations/evaluators/legacy-equality-with-target-key.json
@@ -0,0 +1,11 @@
+{
+    "fileName": "equality-with-target-key.json",
+    "id": "equality-with-target-key",
+    "name": "Legacy Equality Evaluator With Target Key",
+    "description": "An evaluator that judges the agent based on expected output under \"result\" key.",
+    "category": 0,
+    "type": 1,
+    "targetOutputKey": "result",
+    "createdAt": "2025-06-26T17:45:39.651Z",
+    "updatedAt": "2025-06-26T17:45:39.651Z"
+}
diff --git a/src/uipath/eval/evaluators/legacy_exact_match_evaluator.py b/src/uipath/eval/evaluators/legacy_exact_match_evaluator.py
@@ -40,7 +40,23 @@ async def evaluate(
         Returns:
             EvaluationResult: Boolean result indicating exact match (True/False)
         """
+        actual_output = agent_execution.agent_output
+        expected_output = evaluation_criteria.expected_output
+
+        if self.target_output_key and self.target_output_key != "*":
+            if isinstance(actual_output, dict) and isinstance(expected_output, dict):
+                if not (
+                    self.target_output_key in actual_output
+                    and self.target_output_key in expected_output
+                ):
+                    # Assuming that we should pass the test.
+                    expected_output = actual_output = {}
+                elif self.target_output_key in actual_output:
+                    actual_output = actual_output[self.target_output_key]
+                elif self.target_output_key in expected_output:
+                    expected_output = expected_output[self.target_output_key]
+
         return BooleanEvaluationResult(
-            score=self._canonical_json(agent_execution.agent_output)
-            == self._canonical_json(evaluation_criteria.expected_output)
+            score=self._canonical_json(actual_output)
+            == self._canonical_json(expected_output)
         )