add impl

maxyanghu · maxyanghu · commit 2b2a90b29c3d · 2026-01-09T07:23:24.000-08:00
diff --git a/multimodal/qwen3-vl/src/mlperf_inf_mm_q3vl/deploy.py b/multimodal/qwen3-vl/src/mlperf_inf_mm_q3vl/deploy.py
@@ -74,6 +74,8 @@ def __enter__(self) -> Self:
         """
         self._startup()
         self._wait_for_ready()
+        if self.settings.profile:
+            self._start_profile()
         return self
 
     def __exit__(
@@ -89,6 +91,8 @@ def __exit__(
             exc_val: The exception value if an exception was raised.
             exc_tb: The exception traceback if an exception was raised.
         """
+        if self.settings.profile:
+            self._stop_profile()
         logger.info("Shutting down endpoint: {}", self.endpoint)
         self._shutdown()
         logger.info("Endpoint shut down successfully")
@@ -101,6 +105,16 @@ def _startup(self) -> None:
         """
         raise NotImplementedError
 
+    @abstractmethod
+    def _start_profile(self) -> None:
+        """Start the profiler"""
+        raise NotImplementedError
+
+    @abstractmethod
+    def _stop_profile(self) -> None:
+        """Stop the profiler"""
+        raise NotImplementedError
+
     @abstractmethod
     def _failfast(self) -> None:
         """Raise an exception if the endpoint is already detected to be dead."""
@@ -131,6 +145,7 @@ def _wait_for_ready(self) -> None:
 
         raise EndpointStartupTimeoutError(self.endpoint.startup_timeout)
 
+
     @abstractmethod
     def _shutdown(self) -> None:
         """Shut down the endpoint and clean up resources.
@@ -288,6 +303,32 @@ def _stderr_log_file_key(self) -> str:
         """Get the log file key for the stderr log."""
         return "vllm-stderr"
 
+    def _start_profile(self) -> None:
+        profile_url = self.endpoint.url.rstrip("/v1") + "/start_profile"
+        try:
+            response = requests.post(
+                profile_url,
+                timeout=self.endpoint.payload_timeout.total_seconds(),
+            )
+            if response.status_code == HTTP_OK:
+                logger.info("Profile started successfully")
+                return
+        except requests.exceptions.RequestException:
+            pass
+    
+    def _stop_profile(self) -> None:
+        profile_url = self.endpoint.url.rstrip("/v1") + "/stop_profile"
+        try:
+            response = requests.post(
+                profile_url,
+                timeout=self.endpoint.payload_timeout.total_seconds(),
+            )
+            if response.status_code == HTTP_OK:
+                logger.info("Profile stopped successfully")
+                return
+        except requests.exceptions.RequestException:
+            pass
+
     def _build_command(self) -> list[str]:
         """Build the command to start the vLLM server."""
         # Parse the URL to extract host and port
diff --git a/multimodal/qwen3-vl/src/mlperf_inf_mm_q3vl/schema.py b/multimodal/qwen3-vl/src/mlperf_inf_mm_q3vl/schema.py
@@ -585,6 +585,9 @@ class Settings(BaseModelWithAttributeDescriptionsFromDocstrings):
     logging: LogSettings
     """Test logging parameters."""
 
+    profile: bool = False
+    """Whether to profile the endpoint."""
+
     @model_validator(mode="after")
     def override_test_settings_from_user_conf(self) -> Self:
         """Override the test settings from the user.conf file."""
@@ -768,6 +771,8 @@ class EndpointToDeploy(Endpoint):
     healthcheck_timeout: timedelta = timedelta(seconds=5)
     """The timeout for the healthcheck request to the endpoint."""
 
+    payload_timeout: timedelta = timedelta(seconds=10)
+    """The timeout for the payload request to the endpoint."""
 
 class VllmEndpoint(EndpointToDeploy):
     """Specifies how to deploy an OpenAI API endpoint in vLLM for benchmarking."""