We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 1a449e3 commit 587deb2Copy full SHA for 587deb2
inference_perf/client/metricsclient/base.py
@@ -57,6 +57,10 @@ class ModelServerMetrics(BaseModel):
57
median_time_per_output_token: float = 0.0
58
p90_time_per_output_token: float = 0.0
59
p99_time_per_output_token: float = 0.0
60
+ avg_inter_token_latency: float = 0.0
61
+ median_inter_token_latency: float = 0.0
62
+ p90_inter_token_latency: float = 0.0
63
+ p99_inter_token_latency: float = 0.0
64
65
# Request
66
total_requests: int = 0
@@ -77,6 +81,7 @@ class ModelServerMetrics(BaseModel):
77
81
prefix_cache_queries: float = 0.0
78
82
79
83
84
+
80
85
class MetricsClient(ABC):
86
@abstractmethod
87
def __init__(self) -> None:
0 commit comments