rungalileo · xke · Jun 12, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/python/experiments/multi-turn/.env.example b/python/experiments/multi-turn/.env.example
@@ -0,0 +1,5 @@
+GALILEO_API_KEY="your-galileo-api-key"
+GALILEO_PROJECT="your-galileo-project"
+
+# Provide the console url below if you are not using app.galileo.ai
+# GALILEO_CONSOLE_URL="your-galileo-console-url"
diff --git a/python/experiments/multi-turn/README.md b/python/experiments/multi-turn/README.md
@@ -0,0 +1,87 @@
+# Multi-Turn Experiment Example
+
+The example in this folder demonstrates how to use [create_experiment](https://docs.galileo.ai/sdk-api/python/reference/experiments#create_experiment) to compute a session-level metric for a multi-turn conversation. 
+
+## Setup Instructions
+
+### 1. Create and Activate Virtual Environment
+
+```bash
+# Navigate to the example folder
+cd python/experiments/multi-turn
+
+# Create virtual environment
+python -m venv venv
+
+# Activate virtual environment
+source venv/bin/activate
+```
+
+### 2. Install Dependencies
+
+Run
+
+```bash
+pip install -r requirements.txt
+```
+
+### 3. Configure Environment Variables
+
+Your `.env` should look like this. Feel free to follow the `.env.example` and enter your credentials
+
+```bash
+
+# Required: Your Galileo API key
+GALILEO_API_KEY="your-galileo-api-key"
+
+# Required: Galileo project name
+GALILEO_PROJECT="your-galileo-project"
+
+# Provide the console url below if you are not using app.galileo.ai
+# GALILEO_CONSOLE_URL="your-galileo-console-url"
+```
+
+### 4. Add Integration in Galileo Console
+
+The session-level metric in this example uses an LLM. 
+
+Make sure that you've configured a valid LLM integration in the Galileo console.
+
+Related documentation: [Configure an LLM integration](https://docs.galileo.ai/getting-started/evaluate-and-improve/evaluate-and-improve#configure-an-llm-integration)
+
+## Basic Example
+
+Run the basic example:
+
+```bash
+python basic-example.py
+```
+
+The `METRIC_NAME` variable in this script cites a session-level metric.
+
+Pre-defined session-level metrics include:
+
+- `GalileoMetrics.conversation_quality`
+- `GalileoMetrics.action_completion`
+- `GalileoMetrics.action_advancement`
+- `GalileoMetrics.agent_efficiency`
+- `GalileoMetrics.context_adherence`
+- `GalileoMetrics.context_relevance`
+- `GalileoMetrics.tool_error_rate`
+
+Related documentation: [Metrics Comparison](https://docs.galileo.ai/concepts/metrics/metric-comparison)
+
+Optionally, you can define your own custom session-level metric in the Galileo Console UI, and then add the custom metric name. 
+
+![Example custom session-level boolean metric](screenshot-custom-session-level-boolean-metric.png)
+
+## Troubleshooting
+
+Visit the "Sessions" tab of the Experiment in the Galileo Console to confirm the status of the metric computation.
+
+![Troubleshooting auth error](screenshot-session-level-metric-auth-error.png)
+
+If you see an auth error, go to the metric details and make sure that a [valid integration](https://docs.galileo.ai/getting-started/evaluate-and-improve/evaluate-and-improve#configure-an-llm-integration) has been configured. 
+
+![Metric details](screenshot-session-level-metric-details.png)
+
diff --git a/python/experiments/multi-turn/basic-example.py b/python/experiments/multi-turn/basic-example.py
@@ -0,0 +1,97 @@
+import os
+import time
+
+from galileo import galileo_context, GalileoMetrics
+from galileo.experiments import create_experiment, get_experiment
+from galileo.projects import get_project, create_project
+from galileo.search import get_sessions
+from galileo.utils.metrics import create_metric_configs
+from galileo.resources.models import MetricSuccess
+
+# Provide the name of a session-level metric
+METRIC_NAME = GalileoMetrics.conversation_quality
+
+# example custom metric name (must be set up in advance)
+# METRIC_NAME = "multi-turn-session-test-metric-apples"
+
+# Load environment variables from the .env file
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Get the Galileo project
+
+project_name = os.getenv("GALILEO_PROJECT")
+project_obj = get_project(name=project_name)
+if not project_obj:
+    project_obj = create_project(project_name)
+
+print(f"Project name: {project_obj.name}, Project ID: {project_obj.id}")
+
+# Create a unique experiment
+
+time_suffix = time.strftime("%m%d-%H%M")
+
+experiment = create_experiment(experiment_name=f"multi-turn-experiment-{time_suffix}", experiment_group="multi-turn examples")
+print(f"Experiment name: {experiment.name}")
+
+galileo_context.init(project=project_obj.name, experiment_id=experiment.id)
+
+# Enable a session-level metric in the created experiment, and get the metric ID
+
+metric_configs, _ = create_metric_configs(
+    project_id=project_obj.id,
+    run_id=experiment.id,
+    metrics=[METRIC_NAME],
+)
+assert len(metric_configs) == 1
+metric_name = metric_configs[0].name
+metric_id = metric_configs[0].id
+print(f"Metric Name: {metric_name}")
+print(f"Metric ID: {metric_id}")
+
+# Log a multi-turn convo using Galileo context and logger
+
+multi_turn_convo = [
+    {"user": "What is your favorite fruit?", "assistant": "I like blueberries. What about you?"},
+    {"user": "I like strawberries.", "assistant": "Strawberries are great! Do you like blueberries too?"},
+    {"user": "Yes, I do!", "assistant": "Awesome! Blueberries are delicious and packed with nutrients."},
+]
+
+
+logger = galileo_context.get_logger_instance(project=project_obj.name, experiment_id=experiment.id)
+
+# Create a session and log traces for each turn in the conversation
+
+logger.start_session()
+
+for turn in multi_turn_convo:
+
+    logger.start_trace(input=turn["user"], name="User turn")
+    logger.add_llm_span(
+        input=turn["user"],
+        output=turn["assistant"],
+        model="gpt-5.4-mini",
+    )
+    logger.conclude(output=turn["assistant"])
+
+
+galileo_context.flush()
+
+# Poll the session-level metric until it's computed
+
+status = "unknown"
+while True:
+
+    sessions = get_sessions(project_id=project_obj.id, experiment_id=experiment.id)
+    assert len(sessions.records) > 0, "No sessions found for the experiment"
+
+    session = sessions.records[0]
+    metric = session.metric_info[metric_id]
+
+    if isinstance(metric, MetricSuccess):
+        print(f"Metric {METRIC_NAME} computed successfully with value: {metric.value}")
+        break
+    print(f"Metric is not computed yet, retrying in 10 seconds...")
+
+    time.sleep(10)
diff --git a/python/experiments/multi-turn/requirements.txt b/python/experiments/multi-turn/requirements.txt
@@ -0,0 +1,2 @@
+galileo
+python-dotenv
diff --git a/python/experiments/multi-turn/screenshot-custom-session-level-boolean-metric.png b/python/experiments/multi-turn/screenshot-custom-session-level-boolean-metric.png
diff --git a/python/experiments/multi-turn/screenshot-session-level-metric-auth-error.png b/python/experiments/multi-turn/screenshot-session-level-metric-auth-error.png
diff --git a/python/experiments/multi-turn/screenshot-session-level-metric-details.png b/python/experiments/multi-turn/screenshot-session-level-metric-details.png