Skip to content

Commit 587deb2

Browse files
authored
chore: add inter_token_latency in ModelServerMetrics for sglang metrics (#242)
1 parent 1a449e3 commit 587deb2

File tree

1 file changed

+5
-0
lines changed
  • inference_perf/client/metricsclient

1 file changed

+5
-0
lines changed

inference_perf/client/metricsclient/base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ class ModelServerMetrics(BaseModel):
5757
median_time_per_output_token: float = 0.0
5858
p90_time_per_output_token: float = 0.0
5959
p99_time_per_output_token: float = 0.0
60+
avg_inter_token_latency: float = 0.0
61+
median_inter_token_latency: float = 0.0
62+
p90_inter_token_latency: float = 0.0
63+
p99_inter_token_latency: float = 0.0
6064

6165
# Request
6266
total_requests: int = 0
@@ -77,6 +81,7 @@ class ModelServerMetrics(BaseModel):
7781
prefix_cache_queries: float = 0.0
7882

7983

84+
8085
class MetricsClient(ABC):
8186
@abstractmethod
8287
def __init__(self) -> None:

0 commit comments

Comments
 (0)