lightspeed-core · asamal4 · Apr 24, 2026 · Apr 24, 2026
diff --git a/examples/02_metrics/conversation_quality/README.md b/examples/02_metrics/conversation_quality/README.md
@@ -0,0 +1,27 @@
+# Conversation Quality
+
+Multi-turn dialogue evaluation using DeepEval conversation-level metrics.
+
+## Run Example
+
+```bash
+# From project root
+export OPENAI_API_KEY="your-key"
+uv run lightspeed-eval \
+  --system-config examples/02_metrics/conversation_quality/system.yaml \
+  --eval-data examples/02_metrics/conversation_quality/eval_data.yaml
+```
+
+## Metrics & Required Data
+
+**Current Config:** API Disabled (`api.enabled: false`) - all data in eval_data.yaml
+
+| Metric                                 | Name                      | Description                                                                      | Required Data (API Disabled)  | Required Data (API Enabled) |
+|----------------------------------------|---------------------------|----------------------------------------------------------------------------------|-------------------------------|-----------------------------|
+| `deepeval:conversation_completeness`   | Conversation Completeness | Evaluates if conversation fully addresses all aspects of the task                | turns (query, response)       | turns (query)               |
+| `deepeval:conversation_relevancy`      | Turn Relevancy            | Measures relevance of each turn throughout the conversation                      | turns (query, response)       | turns (query)               |
+| `deepeval:knowledge_retention`         | Knowledge Retention       | Checks if AI retains and uses information from earlier turns                     | turns (query, response)       | turns (query)               |
+
+**Note:** Conversation-level metrics evaluate the entire multi-turn dialogue, not individual turns. With API enabled, `response` is fetched from live API; only `query` goes in YAML.
+
+Results written to: `examples/02_metrics/conversation_quality/eval_output/`
diff --git a/examples/02_metrics/conversation_quality/eval_data.yaml b/examples/02_metrics/conversation_quality/eval_data.yaml
@@ -0,0 +1,13 @@
+- conversation_group_id: namespace_learning_conversation
+  description: Multi-turn conversation about namespaces
+  tag: multi-turn
+  turns:
+  - turn_id: turn_1
+    query: What is a namespace in OpenShift?
+    response: A namespace in OpenShift is a logical partition that provides scope for resource names and isolation between different projects or teams.
+  - turn_id: turn_2
+    query: How do I create a namespace?
+    response: You can create a namespace using the command 'oc create namespace <name>'. For example, 'oc create namespace my-project' creates a namespace called my-project.
+  - turn_id: turn_3
+    query: Can I use the namespace I just created?
+    response: Yes, to use the namespace you created (my-project), run 'oc project my-project'. This switches your current context to that namespace.
diff --git a/examples/02_metrics/conversation_quality/system.yaml b/examples/02_metrics/conversation_quality/system.yaml
@@ -0,0 +1,67 @@
+core:
+  max_threads: 1
+llm_pool:
+  defaults:
+    cache_enabled: false
+    timeout: 120
+    num_retries: 1
+    parameters:
+      temperature: 0.0
+      max_completion_tokens: 1024
+  models:
+    judge_gpt_4o_mini:
+      provider: openai
+      model: gpt-4o-mini
+judge_panel:
+  judges:
+  - judge_gpt_4o_mini
+api:
+  enabled: false
+  cache_enabled: false
+metrics_metadata:
+  conversation_level:
+    deepeval:conversation_completeness:
+      threshold: 0.7
+      description: Evaluates if conversation fully addresses all aspects of the task
+      default: true
+    deepeval:conversation_relevancy:
+      threshold: 0.7
+      description: Measures relevance of turns throughout the conversation
+      default: true
+    deepeval:knowledge_retention:
+      threshold: 0.7
+      description: Checks if AI retains and uses information from earlier turns
+      default: true
+storage:
+- type: file
+  output_dir: examples/02_metrics/conversation_quality/eval_output
+  base_filename: evaluation
+  enabled_outputs:
+  - csv
+  - json
+  - txt
+  csv_columns:
+  - conversation_group_id
+  - metric_identifier
+  - score
+  - threshold
+  - result
+  - reason
+visualization:
+  figsize:
+  - 12
+  - 8
+  dpi: 150
+  enabled_graphs:
+  - pass_rates
+  - score_distribution
+  - status_breakdown
+environment:
+  DEEPEVAL_TELEMETRY_OPT_OUT: 'YES'
+  DEEPEVAL_DISABLE_PROGRESS_BAR: 'YES'
+  LITELLM_LOG: ERROR
+logging:
+  source_level: INFO
+  package_level: WARNING
+  log_format: '%(asctime)s - %(levelname)s - %(message)s'
+  show_timestamps: true