@@ -74,6 +74,8 @@ def __enter__(self) -> Self:
7474 """
7575 self ._startup ()
7676 self ._wait_for_ready ()
77+ if self .settings .profile :
78+ self ._start_profile ()
7779 return self
7880
7981 def __exit__ (
@@ -89,6 +91,8 @@ def __exit__(
8991 exc_val: The exception value if an exception was raised.
9092 exc_tb: The exception traceback if an exception was raised.
9193 """
94+ if self .settings .profile :
95+ self ._stop_profile ()
9296 logger .info ("Shutting down endpoint: {}" , self .endpoint )
9397 self ._shutdown ()
9498 logger .info ("Endpoint shut down successfully" )
@@ -101,6 +105,16 @@ def _startup(self) -> None:
101105 """
102106 raise NotImplementedError
103107
108+ @abstractmethod
109+ def _start_profile (self ) -> None :
110+ """Start the profiler"""
111+ raise NotImplementedError
112+
113+ @abstractmethod
114+ def _stop_profile (self ) -> None :
115+ """Stop the profiler"""
116+ raise NotImplementedError
117+
104118 @abstractmethod
105119 def _failfast (self ) -> None :
106120 """Raise an exception if the endpoint is already detected to be dead."""
@@ -131,6 +145,7 @@ def _wait_for_ready(self) -> None:
131145
132146 raise EndpointStartupTimeoutError (self .endpoint .startup_timeout )
133147
148+
134149 @abstractmethod
135150 def _shutdown (self ) -> None :
136151 """Shut down the endpoint and clean up resources.
@@ -288,6 +303,32 @@ def _stderr_log_file_key(self) -> str:
288303 """Get the log file key for the stderr log."""
289304 return "vllm-stderr"
290305
306+ def _start_profile (self ) -> None :
307+ profile_url = self .endpoint .url .rstrip ("/v1" ) + "/start_profile"
308+ try :
309+ response = requests .post (
310+ profile_url ,
311+ timeout = self .endpoint .payload_timeout .total_seconds (),
312+ )
313+ if response .status_code == HTTP_OK :
314+ logger .info ("Profile started successfully" )
315+ return
316+ except requests .exceptions .RequestException :
317+ pass
318+
319+ def _stop_profile (self ) -> None :
320+ profile_url = self .endpoint .url .rstrip ("/v1" ) + "/stop_profile"
321+ try :
322+ response = requests .post (
323+ profile_url ,
324+ timeout = self .endpoint .payload_timeout .total_seconds (),
325+ )
326+ if response .status_code == HTTP_OK :
327+ logger .info ("Profile stopped successfully" )
328+ return
329+ except requests .exceptions .RequestException :
330+ pass
331+
291332 def _build_command (self ) -> list [str ]:
292333 """Build the command to start the vLLM server."""
293334 # Parse the URL to extract host and port
0 commit comments