Skip to content

Commit f87ffe0

Browse files
Improve launcher logging - Part 2 (llm-d-incubation#367)
* Changes: - Add Content-Range header to 200 OK log responses (not just 206) - Remove dead-code guard on actual_end calculation - Improve get_log_bytes docstring: clarify inclusive semantics and return value - Add explanatory comment in parse_range_header for open-ended ranges - Update log endpoint docstring with RFC 9110 §14 reference - Log CLI arguments on launcher startup with logging.basicConfig() setup - Fix test mock total (29→30) and assert Content-Range on 200 responses Signed-off-by: Diego-Castan <diego.castan@ibm.com> * fix typo Signed-off-by: Diego-Castan <diego.castan@ibm.com> --------- Signed-off-by: Diego-Castan <diego.castan@ibm.com>
1 parent b7df209 commit f87ffe0

File tree

2 files changed

+38
-12
lines changed

2 files changed

+38
-12
lines changed

inference_server/launcher/launcher.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,11 @@ def get_log_bytes(
184184
) -> tuple[bytes, int]:
185185
"""
186186
Retrieve log bytes from the child process.
187-
:param start: First byte to read (inclusive, 0-based)
188-
:param end: Last byte to read (inclusive). None means up to
189-
start + MAX_LOG_RESPONSE_BYTES - 1 or EOF.
190-
:return: (content_bytes, total_file_size)
187+
:param start: First byte to read (inclusive, 0-based).
188+
:param end: Last byte to read (inclusive, must be >= start).
189+
None means up to start + MAX_LOG_RESPONSE_BYTES - 1
190+
or EOF, whichever comes first.
191+
:return: (content_bytes, current_total_log_length)
191192
:raises LogRangeNotAvailable: If start is beyond available content
192193
"""
193194
try:
@@ -364,6 +365,7 @@ def parse_range_header(range_header: str) -> tuple[int, int | None]:
364365
if m is None:
365366
raise ValueError(f"Unsupported or malformed Range header: {range_header}")
366367
start = int(m.group(1))
368+
# group(2) is the end value; absent in open-ended ranges like "bytes=100-"
367369
end = int(m.group(2)) if m.group(2) else None
368370
if end is not None and end < start:
369371
raise ValueError(f"Range end ({end}) must be >= start ({start})")
@@ -500,10 +502,13 @@ async def get_vllm_instance_logs(
500502
"""
501503
Get logs from a specific vLLM instance.
502504
503-
Without a Range header the full log (up to 1 MB) is returned with 200 OK.
504-
With ``Range: bytes=START-END`` or ``Range: bytes=START-`` the
505-
requested slice is returned with 206 Partial Content and a
506-
``Content-Range`` header.
505+
Supports range requests per RFC 9110 §14 (Range Requests).
506+
507+
Without a Range header the full log (up to 1 MB) is returned with
508+
200 OK. With ``Range: bytes=START-END`` or ``Range: bytes=START-``
509+
the requested slice is returned with 206 Partial Content. In both
510+
cases the response includes a ``Content-Range`` header indicating the byte range
511+
and current total log length.
507512
"""
508513
try:
509514
if range is None:
@@ -518,10 +523,12 @@ async def get_vllm_instance_logs(
518523

519524
data, total = vllm_manager.get_instance_log_bytes(instance_id, start, end)
520525

521-
actual_end = start + len(data) - 1 if data else start
522-
headers = {"Accept-Ranges": "bytes"}
526+
actual_end = start + len(data) - 1
527+
headers = {
528+
"Accept-Ranges": "bytes",
529+
"Content-Range": f"bytes {start}-{actual_end}/{total}",
530+
}
523531
if partial:
524-
headers["Content-Range"] = f"bytes {start}-{actual_end}/{total}"
525532
status_code = HTTPStatus.PARTIAL_CONTENT
526533
else:
527534
status_code = HTTPStatus.OK
@@ -646,10 +653,28 @@ def set_env_vars(env_vars: Dict[str, str]):
646653

647654
args = parser.parse_args()
648655

656+
# Configure root logger so launcher messages are visible before uvicorn
657+
logging.basicConfig(
658+
level=getattr(logging, args.log_level.upper()),
659+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
660+
)
661+
649662
# Get node name from environment variable
650663
node_name = os.getenv("NODE_NAME")
651664
namespace = os.getenv("NAMESPACE")
652665

666+
logger.info(
667+
"Launcher starting with args: mock_gpus=%s, mock_gpu_count=%d, "
668+
"host=%s, port=%d, log_level=%s, node_name=%s, namespace=%s",
669+
args.mock_gpus,
670+
args.mock_gpu_count,
671+
args.host,
672+
args.port,
673+
args.log_level,
674+
node_name,
675+
namespace,
676+
)
677+
653678
# Reinitialize the global manager with mock mode settings
654679
vllm_manager = VllmMultiProcessManager(
655680
mock_gpus=args.mock_gpus,

inference_server/launcher/tests/test_launcher.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,14 +715,15 @@ def test_get_instance_logs_endpoint(self, mock_manager, client):
715715
"""Test getting instance logs without Range header returns 200"""
716716
mock_manager.get_instance_log_bytes.return_value = (
717717
b"Log line 1Log line 2Log line 3",
718-
29,
718+
30,
719719
)
720720

721721
response = client.get("/v2/vllm/instances/test-id/log")
722722

723723
assert response.status_code == 200
724724
assert response.headers["content-type"] == "application/octet-stream"
725725
assert response.content == b"Log line 1Log line 2Log line 3"
726+
assert response.headers["content-range"] == "bytes 0-29/30"
726727
mock_manager.get_instance_log_bytes.assert_called_once_with("test-id", 0, None)
727728

728729
@patch("launcher.vllm_manager")

0 commit comments

Comments
 (0)