IBM · elronbandel · Feb 23, 2025 · Feb 21, 2025 · Feb 23, 2025
diff --git a/...nitxt/catalog/metrics/llm_as_judge/direct/criteria/correctness_based_on_ground_truth.json b/...nitxt/catalog/metrics/llm_as_judge/direct/criteria/correctness_based_on_ground_truth.json
@@ -0,0 +1,27 @@
+{
+    "__type__": "criteria_with_options",
+    "name": "correctness_based_on_ground_truth",
+    "description": "Does the response correctly convey the same factual information as the ground truth?",
+    "options": [
+        {
+            "__type__": "criteria_option",
+            "name": "correct",
+            "description": "The response conveys the same factual meaning as the ground truth. Minor rewording, synonyms, or grammatical differences are acceptable. The response is relevant to the question and does not introduce unrelated or misleading information."
+        },
+        {
+            "__type__": "criteria_option",
+            "name": "partially_correct",
+            "description": "The response contains some correct information but is incomplete or lacks essential details. It may also contain minor inaccuracies or extraneous information that slightly misrepresents the ground truth."
+        },
+        {
+            "__type__": "criteria_option",
+            "name": "incorrect",
+            "description": "The response does not align with the ground truth. It either presents incorrect, unrelated, or misleading information, or omits key details that change the intended meaning."
+        }
+    ],
+    "option_map": {
+        "correct": 1.0,
+        "partially_correct": 0.5,
+        "incorrect": 0.0
+    }
+}
diff --git a/src/unitxt/llm_as_judge_constants.py b/src/unitxt/llm_as_judge_constants.py
@@ -934,6 +934,30 @@ class DirectCriteriaCatalogEnum(Enum):
         },
     )
 
+    CORRECTNESS_BASED_ON_GROUND_TRUTH = CriteriaWithOptions(
+        name="correctness_based_on_ground_truth",
+        description="Does the response correctly convey the same factual information as the ground truth?",
+        options=[
+            CriteriaOption(
+                name="correct",
+                description="The response conveys the same factual meaning as the ground truth. Minor rewording, synonyms, or grammatical differences are acceptable. The response is relevant to the question and does not introduce unrelated or misleading information.",
+            ),
+            CriteriaOption(
+                name="partially_correct",
+                description="The response contains some correct information but is incomplete or lacks essential details. It may also contain minor inaccuracies or extraneous information that slightly misrepresents the ground truth.",
+            ),
+            CriteriaOption(
+                name="incorrect",
+                description="The response does not align with the ground truth. It either presents incorrect, unrelated, or misleading information, or omits key details that change the intended meaning.",
+            ),
+        ],
+        option_map={
+            "correct": 1.0,
+            "partially_correct": 0.5,
+            "incorrect": 0.0,
+        },
+    )
+
 
 DIRECT_CRITERIA = [c.value for c in DirectCriteriaCatalogEnum]