Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 36 additions & 11 deletions inference_server/launcher/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,11 @@ def get_log_bytes(
) -> tuple[bytes, int]:
"""
Retrieve log bytes from the child process.
:param start: First byte to read (inclusive, 0-based)
:param end: Last byte to read (inclusive). None means up to
start + MAX_LOG_RESPONSE_BYTES - 1 or EOF.
:return: (content_bytes, total_file_size)
:param start: First byte to read (inclusive, 0-based).
:param end: Last byte to read (inclusive, must be >= start).
None means up to start + MAX_LOG_RESPONSE_BYTES - 1
or EOF, whichever comes first.
:return: (content_bytes, current_total_log_length)
:raises LogRangeNotAvailable: If start is beyond available content
"""
try:
Expand Down Expand Up @@ -364,6 +365,7 @@ def parse_range_header(range_header: str) -> tuple[int, int | None]:
if m is None:
raise ValueError(f"Unsupported or malformed Range header: {range_header}")
start = int(m.group(1))
# group(2) is the end value; absent in open-ended ranges like "bytes=100-"
end = int(m.group(2)) if m.group(2) else None
if end is not None and end < start:
raise ValueError(f"Range end ({end}) must be >= start ({start})")
Expand Down Expand Up @@ -500,10 +502,13 @@ async def get_vllm_instance_logs(
"""
Get logs from a specific vLLM instance.

Without a Range header the full log (up to 1 MB) is returned with 200 OK.
With ``Range: bytes=START-END`` or ``Range: bytes=START-`` the
requested slice is returned with 206 Partial Content and a
``Content-Range`` header.
Supports range requests per RFC 9110 §14 (Range Requests).

Without a Range header the full log (up to 1 MB) is returned with
200 OK. With ``Range: bytes=START-END`` or ``Range: bytes=START-``
the requested slice is returned with 206 Partial Content. In both
cases the response includes a ``Content-Range`` header indicating the byte range
and current total log length.
"""
try:
if range is None:
Expand All @@ -518,10 +523,12 @@ async def get_vllm_instance_logs(

data, total = vllm_manager.get_instance_log_bytes(instance_id, start, end)

actual_end = start + len(data) - 1 if data else start
headers = {"Accept-Ranges": "bytes"}
actual_end = start + len(data) - 1
headers = {
"Accept-Ranges": "bytes",
"Content-Range": f"bytes {start}-{actual_end}/{total}",
}
if partial:
headers["Content-Range"] = f"bytes {start}-{actual_end}/{total}"
status_code = HTTPStatus.PARTIAL_CONTENT
else:
status_code = HTTPStatus.OK
Expand Down Expand Up @@ -646,10 +653,28 @@ def set_env_vars(env_vars: Dict[str, str]):

args = parser.parse_args()

# Configure root logger so launcher messages are visible before uvicorn
logging.basicConfig(
level=getattr(logging, args.log_level.upper()),
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

# Get node name from environment variable
node_name = os.getenv("NODE_NAME")
namespace = os.getenv("NAMESPACE")

logger.info(
"Launcher starting with args: mock_gpus=%s, mock_gpu_count=%d, "
"host=%s, port=%d, log_level=%s, node_name=%s, namespace=%s",
args.mock_gpus,
args.mock_gpu_count,
args.host,
args.port,
args.log_level,
node_name,
namespace,
)

# Reinitialize the global manager with mock mode settings
vllm_manager = VllmMultiProcessManager(
mock_gpus=args.mock_gpus,
Expand Down
3 changes: 2 additions & 1 deletion inference_server/launcher/tests/test_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,14 +715,15 @@ def test_get_instance_logs_endpoint(self, mock_manager, client):
"""Test getting instance logs without Range header returns 200"""
mock_manager.get_instance_log_bytes.return_value = (
b"Log line 1Log line 2Log line 3",
29,
30,
)

response = client.get("/v2/vllm/instances/test-id/log")

assert response.status_code == 200
assert response.headers["content-type"] == "application/octet-stream"
assert response.content == b"Log line 1Log line 2Log line 3"
assert response.headers["content-range"] == "bytes 0-29/30"
mock_manager.get_instance_log_bytes.assert_called_once_with("test-id", 0, None)

@patch("launcher.vllm_manager")
Expand Down
Loading