evidentlyai · grgkovac · Jan 20, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026
diff --git a/src/evidently/descriptors/__init__.py b/src/evidently/descriptors/__init__.py
@@ -61,6 +61,7 @@
 from .generated_descriptors import OOVWordsPercentage
 from .generated_descriptors import OpenAI
 from .generated_descriptors import PIILLMEval
+from .generated_descriptors import QualityLLMEval
 from .generated_descriptors import SemanticSimilarity
 from .generated_descriptors import SentenceCount
 from .generated_descriptors import Sentiment
@@ -88,6 +89,7 @@
     "CompletenessLLMEval",
     "Contains",
     "ContainsLink",
+    "QualityLLMEval",
     "ContextQualityLLMEval",
     "ContextRelevance",
     "CorrectnessLLMEval",

diff --git a/src/evidently/descriptors/generated_descriptors.py b/src/evidently/descriptors/generated_descriptors.py
@@ -841,6 +841,47 @@ def CompletenessLLMEval(
     return FeatureDescriptor(feature=feature, alias=alias, tests=tests)
 
 
+def QualityLLMEval(
+    column_name: str,
+    provider: str = "openai",
+    model: str = "gpt-4o-mini",
+    additional_columns: Optional[Dict[str, str]] = None,
+    include_category: Optional[bool] = None,
+    include_score: Optional[bool] = None,
+    include_reasoning: Optional[bool] = None,
+    uncertainty: Optional[Uncertainty] = None,
+    alias: Optional[str] = None,
+    tests: Optional[List[Union["DescriptorTest", "GenericTest"]]] = None,
+):
+    """Score the reference free quality using LLM evaluation.
+
+    Args:
+    * `column_name`: Name of the text column to evaluate.
+    * `provider`: LLM provider name (e.g., "openai", "anthropic").
+    * `model`: Model name to use (e.g., "gpt-4o-mini").
+    * `additional_columns`: Optional mapping of prompt variables to column names.
+    * `include_category`: Whether to include category in output.
+    * `include_score`: Whether to include score in output.
+    * `include_reasoning`: Whether to include reasoning in output.
+    * `uncertainty`: Optional uncertainty handling strategy.
+    * `alias`: Optional alias for the descriptor.
+    * `tests`: Optional list of tests to apply.
+    """
+    from evidently.legacy.descriptors.llm_judges import QualityLLMEval as QualityLLMEvalV1
+
+    feature = QualityLLMEvalV1(
+        provider=provider,
+        model=model,
+        additional_columns=additional_columns,
+        include_category=include_category,
+        include_score=include_score,
+        include_reasoning=include_reasoning,
+        uncertainty=uncertainty,
+        display_name=alias,
+    ).feature(column_name)
+    return FeatureDescriptor(feature=feature, alias=alias, tests=tests)
+
+
 def ContextQualityLLMEval(
     column_name: str,
     question: str,

diff --git a/src/evidently/legacy/descriptors/__init__.py b/src/evidently/legacy/descriptors/__init__.py
@@ -20,6 +20,7 @@
 from .llm_judges import LLMEval
 from .llm_judges import NegativityLLMEval
 from .llm_judges import PIILLMEval
+from .llm_judges import QualityLLMEval
 from .llm_judges import ToxicityLLMEval
 from .non_letter_character_percentage_descriptor import NonLetterCharacterPercentage
 from .oov_words_percentage_descriptor import OOV
@@ -52,6 +53,7 @@
     "NegativityLLMEval",
     "PIILLMEval",
     "DeclineLLMEval",
+    "QualityLLMEval",
     "ContextQualityLLMEval",
     "BiasLLMEval",
     "ToxicityLLMEval",

diff --git a/src/evidently/legacy/descriptors/_registry.py b/src/evidently/legacy/descriptors/_registry.py
@@ -35,6 +35,11 @@
     "evidently.legacy.descriptors.llm_judges.BinaryClassificationLLMEval",
     "evidently:descriptor:BinaryClassificationLLMEval",
 )
+register_type_alias(
+    FeatureDescriptor,
+    "evidently.legacy.descriptors.llm_judges.QualityLLMEval",
+    "evidently:descriptor:QualityLLMEval",
+)
 register_type_alias(
     FeatureDescriptor,
     "evidently.legacy.descriptors.llm_judges.ContextQualityLLMEval",

diff --git a/src/evidently/legacy/descriptors/llm_judges.py b/src/evidently/legacy/descriptors/llm_judges.py
@@ -160,6 +160,28 @@ class Config:
     model = "gpt-4o-mini"
 
 
+class QualityLLMEval(BinaryClassificationLLMEval):
+    class Config:
+        type_alias = "evidently:descriptor:QualityLLMEval"
+
+    name: ClassVar = "Quality"
+    template: ClassVar = BinaryClassificationPromptTemplate(
+        criteria=textwrap.dedent(
+            """
+            "A LQ indicates that the post is of very low quality, semantically meaningless, and contains broken-off or repetitive text."
+            "A HQ indicates the post is of very high quality, addressing a complex topic with advanced vocabulary, phrasing, and style."
+            """
+        ).strip(),
+        target_category="HQ",
+        non_target_category="LQ",
+        uncertainty=Uncertainty.UNKNOWN,
+        include_reasoning=True,
+        pre_messages=[LLMMessage.system("You are a judge which evaluates text.")],
+    )
+    provider = "openai"
+    model = "gpt-4o-mini"
+
+
 class ContextQualityLLMEval(BinaryClassificationLLMEval):
     class Config:
         type_alias = "evidently:descriptor:ContextQualityLLMEval"