Skip to content

Commit 2b2a90b

Browse files
committed
add impl
1 parent 27db053 commit 2b2a90b

File tree

2 files changed

+46
-0
lines changed

2 files changed

+46
-0
lines changed

multimodal/qwen3-vl/src/mlperf_inf_mm_q3vl/deploy.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ def __enter__(self) -> Self:
7474
"""
7575
self._startup()
7676
self._wait_for_ready()
77+
if self.settings.profile:
78+
self._start_profile()
7779
return self
7880

7981
def __exit__(
@@ -89,6 +91,8 @@ def __exit__(
8991
exc_val: The exception value if an exception was raised.
9092
exc_tb: The exception traceback if an exception was raised.
9193
"""
94+
if self.settings.profile:
95+
self._stop_profile()
9296
logger.info("Shutting down endpoint: {}", self.endpoint)
9397
self._shutdown()
9498
logger.info("Endpoint shut down successfully")
@@ -101,6 +105,16 @@ def _startup(self) -> None:
101105
"""
102106
raise NotImplementedError
103107

108+
@abstractmethod
109+
def _start_profile(self) -> None:
110+
"""Start the profiler"""
111+
raise NotImplementedError
112+
113+
@abstractmethod
114+
def _stop_profile(self) -> None:
115+
"""Stop the profiler"""
116+
raise NotImplementedError
117+
104118
@abstractmethod
105119
def _failfast(self) -> None:
106120
"""Raise an exception if the endpoint is already detected to be dead."""
@@ -131,6 +145,7 @@ def _wait_for_ready(self) -> None:
131145

132146
raise EndpointStartupTimeoutError(self.endpoint.startup_timeout)
133147

148+
134149
@abstractmethod
135150
def _shutdown(self) -> None:
136151
"""Shut down the endpoint and clean up resources.
@@ -288,6 +303,32 @@ def _stderr_log_file_key(self) -> str:
288303
"""Get the log file key for the stderr log."""
289304
return "vllm-stderr"
290305

306+
def _start_profile(self) -> None:
307+
profile_url = self.endpoint.url.rstrip("/v1") + "/start_profile"
308+
try:
309+
response = requests.post(
310+
profile_url,
311+
timeout=self.endpoint.payload_timeout.total_seconds(),
312+
)
313+
if response.status_code == HTTP_OK:
314+
logger.info("Profile started successfully")
315+
return
316+
except requests.exceptions.RequestException:
317+
pass
318+
319+
def _stop_profile(self) -> None:
320+
profile_url = self.endpoint.url.rstrip("/v1") + "/stop_profile"
321+
try:
322+
response = requests.post(
323+
profile_url,
324+
timeout=self.endpoint.payload_timeout.total_seconds(),
325+
)
326+
if response.status_code == HTTP_OK:
327+
logger.info("Profile stopped successfully")
328+
return
329+
except requests.exceptions.RequestException:
330+
pass
331+
291332
def _build_command(self) -> list[str]:
292333
"""Build the command to start the vLLM server."""
293334
# Parse the URL to extract host and port

multimodal/qwen3-vl/src/mlperf_inf_mm_q3vl/schema.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,9 @@ class Settings(BaseModelWithAttributeDescriptionsFromDocstrings):
585585
logging: LogSettings
586586
"""Test logging parameters."""
587587

588+
profile: bool = False
589+
"""Whether to profile the endpoint."""
590+
588591
@model_validator(mode="after")
589592
def override_test_settings_from_user_conf(self) -> Self:
590593
"""Override the test settings from the user.conf file."""
@@ -768,6 +771,8 @@ class EndpointToDeploy(Endpoint):
768771
healthcheck_timeout: timedelta = timedelta(seconds=5)
769772
"""The timeout for the healthcheck request to the endpoint."""
770773

774+
payload_timeout: timedelta = timedelta(seconds=10)
775+
"""The timeout for the payload request to the endpoint."""
771776

772777
class VllmEndpoint(EndpointToDeploy):
773778
"""Specifies how to deploy an OpenAI API endpoint in vLLM for benchmarking."""

0 commit comments

Comments
 (0)