@@ -184,10 +184,11 @@ def get_log_bytes(
184184 ) -> tuple [bytes , int ]:
185185 """
186186 Retrieve log bytes from the child process.
187- :param start: First byte to read (inclusive, 0-based)
188- :param end: Last byte to read (inclusive). None means up to
189- start + MAX_LOG_RESPONSE_BYTES - 1 or EOF.
190- :return: (content_bytes, total_file_size)
187+ :param start: First byte to read (inclusive, 0-based).
188+ :param end: Last byte to read (inclusive, must be >= start).
189+ None means up to start + MAX_LOG_RESPONSE_BYTES - 1
190+ or EOF, whichever comes first.
191+ :return: (content_bytes, current_total_log_length)
191192 :raises LogRangeNotAvailable: If start is beyond available content
192193 """
193194 try :
@@ -364,6 +365,7 @@ def parse_range_header(range_header: str) -> tuple[int, int | None]:
364365 if m is None :
365366 raise ValueError (f"Unsupported or malformed Range header: { range_header } " )
366367 start = int (m .group (1 ))
368+ # group(2) is the end value; absent in open-ended ranges like "bytes=100-"
367369 end = int (m .group (2 )) if m .group (2 ) else None
368370 if end is not None and end < start :
369371 raise ValueError (f"Range end ({ end } ) must be >= start ({ start } )" )
@@ -500,10 +502,13 @@ async def get_vllm_instance_logs(
500502 """
501503 Get logs from a specific vLLM instance.
502504
503- Without a Range header the full log (up to 1 MB) is returned with 200 OK.
504- With ``Range: bytes=START-END`` or ``Range: bytes=START-`` the
505- requested slice is returned with 206 Partial Content and a
506- ``Content-Range`` header.
505+ Supports range requests per RFC 9110 §14 (Range Requests).
506+
507+ Without a Range header the full log (up to 1 MB) is returned with
508+ 200 OK. With ``Range: bytes=START-END`` or ``Range: bytes=START-``
509+ the requested slice is returned with 206 Partial Content. In both
510+ cases the response includes a ``Content-Range`` header indicating the byte range
511+ and current total log length.
507512 """
508513 try :
509514 if range is None :
@@ -518,10 +523,12 @@ async def get_vllm_instance_logs(
518523
519524 data , total = vllm_manager .get_instance_log_bytes (instance_id , start , end )
520525
521- actual_end = start + len (data ) - 1 if data else start
522- headers = {"Accept-Ranges" : "bytes" }
526+ actual_end = start + len (data ) - 1
527+ headers = {
528+ "Accept-Ranges" : "bytes" ,
529+ "Content-Range" : f"bytes { start } -{ actual_end } /{ total } " ,
530+ }
523531 if partial :
524- headers ["Content-Range" ] = f"bytes { start } -{ actual_end } /{ total } "
525532 status_code = HTTPStatus .PARTIAL_CONTENT
526533 else :
527534 status_code = HTTPStatus .OK
@@ -646,10 +653,28 @@ def set_env_vars(env_vars: Dict[str, str]):
646653
647654 args = parser .parse_args ()
648655
656+ # Configure root logger so launcher messages are visible before uvicorn
657+ logging .basicConfig (
658+ level = getattr (logging , args .log_level .upper ()),
659+ format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ,
660+ )
661+
649662 # Get node name from environment variable
650663 node_name = os .getenv ("NODE_NAME" )
651664 namespace = os .getenv ("NAMESPACE" )
652665
666+ logger .info (
667+ "Launcher starting with args: mock_gpus=%s, mock_gpu_count=%d, "
668+ "host=%s, port=%d, log_level=%s, node_name=%s, namespace=%s" ,
669+ args .mock_gpus ,
670+ args .mock_gpu_count ,
671+ args .host ,
672+ args .port ,
673+ args .log_level ,
674+ node_name ,
675+ namespace ,
676+ )
677+
653678 # Reinitialize the global manager with mock mode settings
654679 vllm_manager = VllmMultiProcessManager (
655680 mock_gpus = args .mock_gpus ,
0 commit comments