1414from lightspeed_evaluation .core .models .data import EvaluationData , EvaluationResult
1515from lightspeed_evaluation .core .output .statistics import (
1616 bootstrap_intervals ,
17+ calculate_api_latency_stats ,
1718 calculate_api_token_usage ,
1819 calculate_streaming_stats ,
1920)
2021
2122
2223class NumericStats (BaseModel ):
23- """Numeric statistics for a set of values (e.g., TTFT, duration)."""
24+ """Numeric statistics for a set of values (e.g., TTFT, duration, latency )."""
2425
2526 count : int = Field (default = 0 , description = "Number of values" )
2627 mean : Optional [float ] = Field (default = None , description = "Mean value" )
2728 median : Optional [float ] = Field (default = None , description = "Median value" )
2829 std : Optional [float ] = Field (default = None , description = "Standard deviation" )
2930 min_value : Optional [float ] = Field (default = None , description = "Minimum value" )
3031 max_value : Optional [float ] = Field (default = None , description = "Maximum value" )
32+ p95 : Optional [float ] = Field (default = None , description = "95th percentile" )
33+ p99 : Optional [float ] = Field (default = None , description = "99th percentile" )
3134
3235
3336class ScoreStatistics (BaseModel ):
@@ -137,6 +140,9 @@ class EvaluationSummary(BaseModel):
137140 api_tokens : Optional [ApiTokenUsage ] = Field (
138141 default = None , description = "API token usage (when evaluation data provided)"
139142 )
143+ api_latency : Optional [NumericStats ] = Field (
144+ default = None , description = "API latency statistics (when API enabled)"
145+ )
140146 streaming : Optional [StreamingStats ] = Field (
141147 default = None , description = "Streaming performance stats (when available)"
142148 )
@@ -178,6 +184,9 @@ def from_results(
178184 api_tokens = _compute_api_token_usage (evaluation_data )
179185 streaming = _compute_streaming_stats (evaluation_data )
180186
187+ # Compute API latency statistics from results
188+ api_latency = _compute_api_latency_stats (results )
189+
181190 return cls (
182191 timestamp = timestamp ,
183192 results = results ,
@@ -186,6 +195,7 @@ def from_results(
186195 by_conversation = by_conversation ,
187196 by_tag = by_tag ,
188197 api_tokens = api_tokens ,
198+ api_latency = api_latency ,
189199 streaming = streaming ,
190200 )
191201
@@ -417,7 +427,7 @@ def _numeric_stats_from_dict(raw: dict[str, Any]) -> Optional[NumericStats]:
417427 """Convert a raw numeric stats dictionary to a NumericStats model.
418428
419429 Args:
420- raw: Dictionary with count, mean, median, std, min, max keys.
430+ raw: Dictionary with count, mean, median, std, min, max, p95, p99 keys.
421431
422432 Returns:
423433 NumericStats instance, or None if count is 0.
@@ -432,6 +442,8 @@ def _numeric_stats_from_dict(raw: dict[str, Any]) -> Optional[NumericStats]:
432442 std = raw .get ("std" ),
433443 min_value = raw .get ("min" ),
434444 max_value = raw .get ("max" ),
445+ p95 = raw .get ("p95" ),
446+ p99 = raw .get ("p99" ),
435447 )
436448
437449
@@ -454,6 +466,23 @@ def _compute_api_token_usage(
454466 )
455467
456468
469+ def _compute_api_latency_stats (
470+ results : Optional [list [EvaluationResult ]],
471+ ) -> Optional [NumericStats ]:
472+ """Compute API latency statistics from evaluation results.
473+
474+ Args:
475+ results: List of evaluation results containing API latency data.
476+
477+ Returns:
478+ NumericStats instance, or None if no API latency data available.
479+ """
480+ if not results :
481+ return None
482+ raw = calculate_api_latency_stats (results )
483+ return _numeric_stats_from_dict (raw )
484+
485+
457486def _compute_streaming_stats (
458487 evaluation_data : list [EvaluationData ],
459488) -> Optional [StreamingStats ]:
0 commit comments