Skip to content

[fix] Adjusting VllmStatLogger for 0.7.0 changes in API (#81) #82

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,6 @@ vllm:request_generation_tokens_sum{model="vllm_model",version="1"} 16
vllm:request_generation_tokens_bucket{model="vllm_model",version="1",le="1"} 0
...
vllm:request_generation_tokens_bucket{model="vllm_model",version="1",le="+Inf"} 1
# HELP vllm:request_params_best_of Histogram of the best_of request parameter.
# TYPE vllm:request_params_best_of histogram
vllm:request_params_best_of_count{model="vllm_model",version="1"} 1
vllm:request_params_best_of_sum{model="vllm_model",version="1"} 1
vllm:request_params_best_of_bucket{model="vllm_model",version="1",le="1"} 1
...
vllm:request_params_best_of_bucket{model="vllm_model",version="1",le="+Inf"} 1
# HELP vllm:request_params_n Histogram of the n request parameter.
# TYPE vllm:request_params_n histogram
vllm:request_params_n_count{model="vllm_model",version="1"} 1
Expand Down
20 changes: 3 additions & 17 deletions ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,24 +189,10 @@ def test_custom_sampling_params(self):
model_name=self.vllm_model_name,
)
metrics_dict = self.parse_vllm_metrics()
total_prompts = len(self.prompts)

# vllm:request_params_best_of
"""
self.assertEqual(
metrics_dict["vllm:request_params_best_of_count"], total_prompts
)
self.assertEqual(
metrics_dict["vllm:request_params_best_of_sum"], best_of * total_prompts
)
self.assertEqual(
metrics_dict["vllm:request_params_best_of_bucket"], total_prompts
)
"""
# vllm:request_params_n
self.assertEqual(metrics_dict["vllm:request_params_n_count"], total_prompts)
# self.assertEqual(metrics_dict["vllm:request_params_n_sum"], n * total_prompts)
self.assertEqual(metrics_dict["vllm:request_params_n_bucket"], total_prompts)
self.assertIn("vllm:request_params_n_count", metrics_dict)
self.assertIn("vllm:request_params_n_sum", metrics_dict)
self.assertIn("vllm:request_params_n_bucket", metrics_dict)

def test_vllm_metrics_disabled(self):
# Test vLLM metrics
Expand Down
6 changes: 2 additions & 4 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,8 @@ def _setup_metrics(self):
"version": self.args["model_version"],
}
# Add vLLM custom metrics
engine_config = self._llm_engine.engine.model_config
self._vllm_metrics = VllmStatLogger(
labels, engine_config.max_model_len, self.logger
)
vllm_config = self._llm_engine.engine.vllm_config
self._vllm_metrics = VllmStatLogger(labels, vllm_config, self.logger)
self._llm_engine.add_logger("triton", self._vllm_metrics)
except pb_utils.TritonModelException as e:
if "metrics not supported" in str(e):
Expand Down
9 changes: 6 additions & 3 deletions src/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from typing import Dict, List, Union

import triton_python_backend_utils as pb_utils
from vllm.config import VllmConfig
from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase
from vllm.engine.metrics import Stats as VllmStats
from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets
Expand Down Expand Up @@ -163,11 +164,13 @@ def __init__(self, labels: List[str], max_model_len: int):
class VllmStatLogger(VllmStatLoggerBase):
"""StatLogger is used as an adapter between vLLM stats collector and Triton metrics provider."""

def __init__(self, labels: Dict, max_model_len: int, log_logger) -> None:
def __init__(self, labels: Dict, vllm_config: VllmConfig, log_logger) -> None:
# Tracked stats over current local logging interval.
# local_interval not used here. It's for vLLM logs to stdout.
super().__init__(local_interval=0)
self.metrics = TritonMetrics(labels, max_model_len)
super().__init__(local_interval=0, vllm_config=vllm_config)
self.metrics = TritonMetrics(
labels=labels, max_model_len=vllm_config.model_config.max_model_len
)
self.log_logger = log_logger

# Starting the metrics thread. It allows vLLM to keep making progress
Expand Down
Loading