|
12 | 12 | logger = logging.getLogger(__name__) |
13 | 13 |
|
14 | 14 |
|
| 15 | +class TurnMetadata(BaseModel): |
| 16 | + """Optional user-defined metadata for a single turn.""" |
| 17 | + |
| 18 | + model_config = ConfigDict(extra="forbid") |
| 19 | + |
| 20 | + complexity: Optional[str] = Field( |
| 21 | + default=None, |
| 22 | + description="Complexity level (e.g. Simple, Moderate, Complex)", |
| 23 | + ) |
| 24 | + data_source: Optional[str] = Field( |
| 25 | + default=None, |
| 26 | + description="Data source (e.g. Human-written, Production log, Synthetic)", |
| 27 | + ) |
| 28 | + human_verified: Optional[bool] = Field( |
| 29 | + default=None, description="Whether a domain expert verified this turn" |
| 30 | + ) |
| 31 | + verified_by: Optional[str] = Field(default=None, description="Verifier name or ID") |
| 32 | + negative_type: Optional[str] = Field( |
| 33 | + default=None, |
| 34 | + description="Negative scenario type (e.g. Out-of-scope, Adversarial, Ambiguous)", |
| 35 | + ) |
| 36 | + persona: Optional[str] = Field( |
| 37 | + default=None, |
| 38 | + description="User persona represented (e.g. developer, admin, beginner)", |
| 39 | + ) |
| 40 | + date_created: Optional[str] = Field( |
| 41 | + default=None, description="When the sample was created (e.g. 2025-06-15)" |
| 42 | + ) |
| 43 | + additional_metadata: Optional[dict[str, Any]] = Field( |
| 44 | + default=None, description="Arbitrary key-value pairs for extra metadata" |
| 45 | + ) |
| 46 | + |
| 47 | + |
| 48 | +class ConversationMetadata(BaseModel): |
| 49 | + """Optional user-defined metadata for a conversation group.""" |
| 50 | + |
| 51 | + model_config = ConfigDict(extra="forbid") |
| 52 | + |
| 53 | + scenario_category: Optional[str] = Field( |
| 54 | + default=None, |
| 55 | + description="Scenario category (e.g. Core/Happy path, Edge Case, Negative)", |
| 56 | + ) |
| 57 | + use_case: Optional[str] = Field( |
| 58 | + default=None, |
| 59 | + description="System capability (e.g. RAG, Agent/Tools)", |
| 60 | + ) |
| 61 | + interaction_type: Optional[str] = Field( |
| 62 | + default=None, |
| 63 | + description="Interaction type (e.g. Single-turn, Multi-turn)", |
| 64 | + ) |
| 65 | + topic: Optional[str] = Field( |
| 66 | + default=None, |
| 67 | + description="Domain subject area (e.g. networking, storage)", |
| 68 | + ) |
| 69 | + jtbd_reference: Optional[str] = Field( |
| 70 | + default=None, description="Jobs-to-be-done reference (Job/Task)" |
| 71 | + ) |
| 72 | + notes: Optional[str] = Field( |
| 73 | + default=None, description="Free-text notes about the conversation" |
| 74 | + ) |
| 75 | + additional_metadata: Optional[dict[str, Any]] = Field( |
| 76 | + default=None, description="Arbitrary key-value pairs for extra metadata" |
| 77 | + ) |
| 78 | + |
| 79 | + |
| 80 | +class DatasetMetadata(BaseModel): |
| 81 | + """Optional user-defined metadata for the entire evaluation dataset.""" |
| 82 | + |
| 83 | + model_config = ConfigDict(extra="forbid") |
| 84 | + |
| 85 | + team_product: Optional[str] = Field( |
| 86 | + default=None, description="Owning team or product (with contact details)" |
| 87 | + ) |
| 88 | + dataset_version: Optional[str] = Field( |
| 89 | + default=None, description="Dataset version for tracking iterations" |
| 90 | + ) |
| 91 | + pii_confirmed_removed: Optional[bool] = Field( |
| 92 | + default=None, description="Whether PII has been confirmed removed" |
| 93 | + ) |
| 94 | + generation_tools: Optional[list[str]] = Field( |
| 95 | + default=None, description="Tools used for synthetic data generation" |
| 96 | + ) |
| 97 | + llms_used: Optional[list[str]] = Field( |
| 98 | + default=None, description="LLMs used in the generation pipeline" |
| 99 | + ) |
| 100 | + last_updated: Optional[str] = Field( |
| 101 | + default=None, description="Date the dataset was last updated (e.g. 2025-06-15)" |
| 102 | + ) |
| 103 | + additional_metadata: Optional[dict[str, Any]] = Field( |
| 104 | + default=None, description="Arbitrary key-value pairs for extra metadata" |
| 105 | + ) |
| 106 | + |
| 107 | + |
15 | 108 | def _validate_and_deduplicate_metrics( |
16 | 109 | metrics: list[str], metric_type: str = "metric" |
17 | 110 | ) -> list[str]: |
@@ -39,6 +132,10 @@ class TurnData(StreamingMetricsMixin): |
39 | 132 | model_config = ConfigDict(extra="forbid") |
40 | 133 |
|
41 | 134 | turn_id: str = Field(..., min_length=1, description="Turn ID (alphanumeric)") |
| 135 | + metadata: Optional[TurnMetadata] = Field( |
| 136 | + default=None, |
| 137 | + description="User-defined metadata for traceability and quality grading", |
| 138 | + ) |
42 | 139 | query: Optional[str] = Field( |
43 | 140 | default=None, |
44 | 141 | min_length=1, |
@@ -428,6 +525,10 @@ class EvaluationData(BaseModel): |
428 | 525 | conversation_group_id: str = Field( |
429 | 526 | ..., min_length=1, description="Unique conversation group identifier" |
430 | 527 | ) |
| 528 | + metadata: Optional[ConversationMetadata] = Field( |
| 529 | + default=None, |
| 530 | + description="User-defined metadata for traceability and quality grading", |
| 531 | + ) |
431 | 532 | description: Optional[str] = Field( |
432 | 533 | default=None, |
433 | 534 | min_length=1, |
|
0 commit comments