diff --git a/inference_server/launcher/launcher.py b/inference_server/launcher/launcher.py index e9332a33..dbcfb82b 100755 --- a/inference_server/launcher/launcher.py +++ b/inference_server/launcher/launcher.py @@ -184,10 +184,11 @@ def get_log_bytes( ) -> tuple[bytes, int]: """ Retrieve log bytes from the child process. - :param start: First byte to read (inclusive, 0-based) - :param end: Last byte to read (inclusive). None means up to - start + MAX_LOG_RESPONSE_BYTES - 1 or EOF. - :return: (content_bytes, total_file_size) + :param start: First byte to read (inclusive, 0-based). + :param end: Last byte to read (inclusive, must be >= start). + None means up to start + MAX_LOG_RESPONSE_BYTES - 1 + or EOF, whichever comes first. + :return: (content_bytes, current_total_log_length) :raises LogRangeNotAvailable: If start is beyond available content """ try: @@ -364,6 +365,7 @@ def parse_range_header(range_header: str) -> tuple[int, int | None]: if m is None: raise ValueError(f"Unsupported or malformed Range header: {range_header}") start = int(m.group(1)) + # group(2) is the end value; absent in open-ended ranges like "bytes=100-" end = int(m.group(2)) if m.group(2) else None if end is not None and end < start: raise ValueError(f"Range end ({end}) must be >= start ({start})") @@ -500,10 +502,13 @@ async def get_vllm_instance_logs( """ Get logs from a specific vLLM instance. - Without a Range header the full log (up to 1 MB) is returned with 200 OK. - With ``Range: bytes=START-END`` or ``Range: bytes=START-`` the - requested slice is returned with 206 Partial Content and a - ``Content-Range`` header. + Supports range requests per RFC 9110 ยง14 (Range Requests). + + Without a Range header the full log (up to 1 MB) is returned with + 200 OK. With ``Range: bytes=START-END`` or ``Range: bytes=START-`` + the requested slice is returned with 206 Partial Content. In both + cases the response includes a ``Content-Range`` header indicating the byte range + and current total log length. """ try: if range is None: @@ -518,10 +523,12 @@ async def get_vllm_instance_logs( data, total = vllm_manager.get_instance_log_bytes(instance_id, start, end) - actual_end = start + len(data) - 1 if data else start - headers = {"Accept-Ranges": "bytes"} + actual_end = start + len(data) - 1 + headers = { + "Accept-Ranges": "bytes", + "Content-Range": f"bytes {start}-{actual_end}/{total}", + } if partial: - headers["Content-Range"] = f"bytes {start}-{actual_end}/{total}" status_code = HTTPStatus.PARTIAL_CONTENT else: status_code = HTTPStatus.OK @@ -646,10 +653,28 @@ def set_env_vars(env_vars: Dict[str, str]): args = parser.parse_args() + # Configure root logger so launcher messages are visible before uvicorn + logging.basicConfig( + level=getattr(logging, args.log_level.upper()), + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + # Get node name from environment variable node_name = os.getenv("NODE_NAME") namespace = os.getenv("NAMESPACE") + logger.info( + "Launcher starting with args: mock_gpus=%s, mock_gpu_count=%d, " + "host=%s, port=%d, log_level=%s, node_name=%s, namespace=%s", + args.mock_gpus, + args.mock_gpu_count, + args.host, + args.port, + args.log_level, + node_name, + namespace, + ) + # Reinitialize the global manager with mock mode settings vllm_manager = VllmMultiProcessManager( mock_gpus=args.mock_gpus, diff --git a/inference_server/launcher/tests/test_launcher.py b/inference_server/launcher/tests/test_launcher.py index b3127a8e..135aa703 100644 --- a/inference_server/launcher/tests/test_launcher.py +++ b/inference_server/launcher/tests/test_launcher.py @@ -715,7 +715,7 @@ def test_get_instance_logs_endpoint(self, mock_manager, client): """Test getting instance logs without Range header returns 200""" mock_manager.get_instance_log_bytes.return_value = ( b"Log line 1Log line 2Log line 3", - 29, + 30, ) response = client.get("/v2/vllm/instances/test-id/log") @@ -723,6 +723,7 @@ def test_get_instance_logs_endpoint(self, mock_manager, client): assert response.status_code == 200 assert response.headers["content-type"] == "application/octet-stream" assert response.content == b"Log line 1Log line 2Log line 3" + assert response.headers["content-range"] == "bytes 0-29/30" mock_manager.get_instance_log_bytes.assert_called_once_with("test-id", 0, None) @patch("launcher.vllm_manager")