88from pydantic import ValidationError
99
1010from lightspeed_evaluation .core .models import EvaluationData , TurnData
11+ from lightspeed_evaluation .core .models .data import DatasetMetadata
1112from lightspeed_evaluation .core .system .exceptions import DataValidationError
1213
1314if TYPE_CHECKING :
@@ -167,6 +168,7 @@ def __init__(
167168 """
168169 self .validation_errors : list [str ] = []
169170 self .evaluation_data : Optional [list [EvaluationData ]] = None
171+ self .dataset_metadata : Optional [DatasetMetadata ] = None
170172 self .api_enabled = api_enabled
171173 self .original_data_path : Optional [str ] = None
172174 self .fail_on_invalid_data = fail_on_invalid_data
@@ -189,6 +191,15 @@ def _conversation_level_metrics(self) -> set[str]:
189191 def _load_and_parse_yaml (self , data_path : str ) -> list [EvaluationData ]:
190192 """Load a YAML file and convert each entry to an EvaluationData model.
191193
194+ Supports two root formats for backward compatibility:
195+
196+ 1. **List format** (original): YAML root is a list of conversations.
197+ 2. **Dict format** (new): YAML root is a dict with optional ``metadata``
198+ and required ``conversations`` keys.
199+
200+ When the dict format is used, dataset-level metadata is parsed and
201+ stored on ``self.dataset_metadata``.
202+
192203 Args:
193204 data_path: Path to the evaluation data YAML file.
194205
@@ -211,13 +222,12 @@ def _load_and_parse_yaml(self, data_path: str) -> list[EvaluationData]:
211222
212223 if raw_data is None :
213224 raise DataValidationError ("Empty or invalid YAML file" )
214- if not isinstance (raw_data , list ):
215- raise DataValidationError (
216- f"YAML root must be a list, got { type (raw_data ).__name__ } "
217- )
225+
226+ self .dataset_metadata = None
227+ raw_conversations = self ._extract_conversations_and_metadata (raw_data )
218228
219229 evaluation_data = []
220- for i , data_dict in enumerate (raw_data ):
230+ for i , data_dict in enumerate (raw_conversations ):
221231 try :
222232 eval_data = EvaluationData (** data_dict )
223233 evaluation_data .append (eval_data )
@@ -235,6 +245,57 @@ def _load_and_parse_yaml(self, data_path: str) -> list[EvaluationData]:
235245 ) from e
236246 return evaluation_data
237247
248+ def _extract_conversations_and_metadata (self , raw_data : object ) -> list [dict ]:
249+ """Extract conversation list and optional dataset metadata from raw YAML.
250+
251+ Args:
252+ raw_data: Parsed YAML data (list or dict).
253+
254+ Returns:
255+ List of raw conversation dicts.
256+
257+ Raises:
258+ DataValidationError: If the structure is invalid.
259+ """
260+ if isinstance (raw_data , list ):
261+ return raw_data
262+
263+ if isinstance (raw_data , dict ):
264+ if "conversations" not in raw_data :
265+ raise DataValidationError (
266+ "YAML root is a dict but missing required 'conversations' key. "
267+ "Expected either a list of conversations or a dict with "
268+ "'conversations' (and optional 'metadata') keys."
269+ )
270+
271+ metadata_raw = raw_data .get ("metadata" )
272+ if metadata_raw is not None :
273+ if not isinstance (metadata_raw , dict ):
274+ raise DataValidationError (
275+ f"'metadata' must be a mapping, "
276+ f"got { type (metadata_raw ).__name__ } "
277+ )
278+ try :
279+ self .dataset_metadata = DatasetMetadata (** metadata_raw )
280+ except ValidationError as e :
281+ error_details = format_pydantic_error (e )
282+ raise DataValidationError (
283+ f"Invalid dataset metadata: { error_details } "
284+ ) from e
285+
286+ raw_conversations = raw_data ["conversations" ]
287+ if not isinstance (raw_conversations , list ):
288+ raise DataValidationError (
289+ "'conversations' must be a list, "
290+ f"got { type (raw_conversations ).__name__ } "
291+ )
292+ return raw_conversations
293+
294+ raise DataValidationError (
295+ f"YAML root must be a list or a dict with 'conversations' key, "
296+ f"got { type (raw_data ).__name__ } "
297+ )
298+
238299 def _apply_metrics_filter (
239300 self , evaluation_data : list [EvaluationData ], metrics : list [str ]
240301 ) -> None :
0 commit comments