Skip to content

Commit 797d241

Browse files
MikeSpreitzerclaude
andcommitted
Include creation parameters inline in launcher instance state replies
VllmInstance.get_status() now spreads VllmConfig fields (options, gpu_uuids, env_vars) inline into the returned dict via model_dump(), so callers can inspect the configuration that was used to start each instance without a separate side-channel. InstanceStatus in the Go client is updated to match the flat JSON schema with Options, GpuUUIDs, and EnvVars fields. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Signed-off-by: Mike Spreitzer <mspreitz@us.ibm.com>
1 parent f87ffe0 commit 797d241

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

inference_server/launcher/launcher.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def get_status(self) -> dict:
177177
return {
178178
"status": "running" if self.process.is_alive() else "stopped",
179179
"instance_id": self.instance_id,
180+
**self.config.model_dump(),
180181
}
181182

182183
def get_log_bytes(

inference_server/launcher/tests/test_launcher.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,11 +258,14 @@ def test_instance_get_status(
258258
instance.start()
259259
status = instance.get_status()
260260
assert status["status"] == "running"
261+
assert status["options"] == vllm_config.options
262+
assert status["env_vars"] == vllm_config.env_vars
261263

262264
# Stopped
263265
mock_process._is_alive = False
264266
status = instance.get_status()
265267
assert status["status"] == "stopped"
268+
assert status["options"] == vllm_config.options
266269

267270
@patch("launcher.multiprocessing.Process")
268271
def test_instance_uuid_to_index_translation(
@@ -473,6 +476,8 @@ def test_get_instance_status(self, mock_process_class, manager, vllm_config):
473476

474477
assert status["status"] == "running"
475478
assert status["instance_id"] == "test-id"
479+
assert status["options"] == vllm_config.options
480+
assert status["env_vars"] == vllm_config.env_vars
476481

477482
@patch("launcher.multiprocessing.Process")
478483
def test_get_instance_status_nonexistent(self, mock_process_class, manager):
@@ -494,6 +499,9 @@ def test_get_all_instances_status(self, mock_process_class, manager, vllm_config
494499
assert status["total_instances"] == 2
495500
assert status["running_instances"] == 2
496501
assert len(status["instances"]) == 2
502+
for inst in status["instances"]:
503+
assert inst["options"] == vllm_config.options
504+
assert inst["env_vars"] == vllm_config.env_vars
497505

498506
@patch("launcher.multiprocessing.Process")
499507
def test_list_instances(self, mock_process_class, manager, vllm_config):
@@ -693,13 +701,18 @@ def test_get_instance_status(self, mock_manager, client):
693701
mock_manager.get_instance_status.return_value = {
694702
"status": "running",
695703
"instance_id": "test-id",
704+
"options": "--model test-model",
705+
"gpu_uuids": None,
706+
"env_vars": {"KEY": "val"},
696707
}
697708

698709
response = client.get("/v2/vllm/instances/test-id")
699710

700711
assert response.status_code == 200
701712
data = response.json()
702713
assert data["status"] == "running"
714+
assert data["options"] == "--model test-model"
715+
assert data["env_vars"] == {"KEY": "val"}
703716

704717
@patch("launcher.vllm_manager")
705718
def test_get_nonexistent_instance_status(self, mock_manager, client):

pkg/controller/dual-pods/launcherclient.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,11 @@ type VllmConfig struct {
5757

5858
// InstanceStatus returned by status APIs.
5959
type InstanceStatus struct {
60-
InstanceID string `json:"instance_id"`
61-
Status string `json:"status"`
60+
InstanceID string `json:"instance_id"`
61+
Status string `json:"status"`
62+
Options string `json:"options"`
63+
GpuUUIDs []string `json:"gpu_uuids,omitempty"`
64+
EnvVars map[string]string `json:"env_vars,omitempty"`
6265
}
6366

6467
// AllInstancesStatus response.

0 commit comments

Comments
 (0)