Add annotations to instances in launcher

MikeSpreitzer · MikeSpreitzer · commit 3ca0b5e58425 · 2026-04-01T18:26:43.000-04:00
.. to carry data meaningful to clients but not launcher.

Signed-off-by: Mike Spreitzer &lt;mspreitz@us.ibm.com&gt;
diff --git a/inference_server/launcher/launcher.py b/inference_server/launcher/launcher.py
@@ -59,6 +59,7 @@ class VllmConfig(BaseModel):
     options: str
     gpu_uuids: Optional[List[str]] = None
     env_vars: Optional[Dict[str, str]] = None
+    annotations: Optional[Dict[str, str]] = None
 
 
 class HalfMade(Exception):
diff --git a/inference_server/launcher/tests/test_launcher.py b/inference_server/launcher/tests/test_launcher.py
@@ -53,7 +53,9 @@
 def vllm_config():
     """Create a sample VllmConfig for testing"""
     return VllmConfig(
-        options="--model test-model --port 8000", env_vars={"TEST_VAR": "test_value"}
+        options="--model test-model --port 8000",
+        env_vars={"TEST_VAR": "test_value"},
+        annotations={"Foo": "bar"},
     )
 
 
diff --git a/pkg/controller/dual-pods/inference-server.go b/pkg/controller/dual-pods/inference-server.go
@@ -631,12 +631,12 @@ func (ctl *controller) selectBestLauncherPod(
 		hasSleepingInstance := false
 		hasPortConflict := false
 		for _, inst := range insts.Instances {
-			instPort, err := getVLLMInstancePort(inst.Options)
+			instPort, err := getVLLMInstancePort(inst.Spec.Options)
 			if err != nil {
 				logger.V(5).Info("Skipping launcher Pod because an instance has unparseable options",
 					"name", launcherPod.Name,
 					"instanceID", inst.InstanceID,
-					"options", inst.Options,
+					"options", inst.Spec.Options,
 					"err", err)
 				hasPortConflict = true
 				break
@@ -695,11 +695,16 @@ func (ctl *controller) selectBestLauncherPod(
 }
 
 func (ctl *controller) configInferenceServer(isc *fmav1alpha1.InferenceServerConfig, gpuUUIDs []string) (*VllmConfig, string, error) {
-	options := isc.Spec.ModelServerConfig.Options + " --port " + strconv.Itoa(int(isc.Spec.ModelServerConfig.Port))
+	portS := strconv.Itoa(int(isc.Spec.ModelServerConfig.Port))
+	options := isc.Spec.ModelServerConfig.Options + " --port " + portS
 	vllmCfg := VllmConfig{
 		Options:  options,
 		GpuUUIDs: gpuUUIDs,
 		EnvVars:  make(map[string]string, len(isc.Spec.ModelServerConfig.EnvVars)),
+		Annotations: map[string]string{
+			"isc-name":       isc.Name,
+			"inference-port": portS,
+		},
 	}
 	for k, v := range isc.Spec.ModelServerConfig.EnvVars {
 		vllmCfg.EnvVars[k] = v
@@ -1322,7 +1327,7 @@ var podDecoder k8sruntime.Decoder
 // syncLauncherInstances queries the launcher pod for its current instances,
 // updates the controller's internal launcherData state, and returns the fresh
 // launcher response used for the update.
-func (ctl *controller) syncLauncherInstances(ctx context.Context, nodeDat *nodeData, launcherPod *corev1.Pod) (*AllInstancesStatus, error, bool) {
+func (ctl *controller) syncLauncherInstances(ctx context.Context, nodeDat *nodeData, launcherPod *corev1.Pod) (*AllInstancesState, error, bool) {
 	logger := klog.FromContext(ctx)
 
 	if launcherPod.Status.PodIP == "" || !utils.IsPodReady(launcherPod) {
diff --git a/pkg/controller/dual-pods/launcherclient.go b/pkg/controller/dual-pods/launcherclient.go
@@ -50,25 +50,24 @@ func NewLauncherClient(baseURL string) (*LauncherClient, error) {
 
 // VllmConfig matches the launcher API schema.
 type VllmConfig struct {
-	Options  string            `json:"options"`
-	GpuUUIDs []string          `json:"gpu_uuids,omitempty"`
-	EnvVars  map[string]string `json:"env_vars,omitempty"`
+	Options     string            `json:"options"`
+	GpuUUIDs    []string          `json:"gpu_uuids,omitempty"`
+	EnvVars     map[string]string `json:"env_vars,omitempty"`
+	Annotations map[string]string `json:"annotations,omitempty"`
 }
 
-// InstanceStatus returned by status APIs.
-type InstanceStatus struct {
-	InstanceID string            `json:"instance_id"`
-	Status     string            `json:"status"`
-	Options    string            `json:"options"`
-	GpuUUIDs   []string          `json:"gpu_uuids,omitempty"`
-	EnvVars    map[string]string `json:"env_vars,omitempty"`
+// InstanceState returned by launcher API.
+type InstanceState struct {
+	InstanceID string     `json:"instance_id"`
+	Status     string     `json:"status"`
+	Spec       VllmConfig `json:",inline"`
 }
 
-// AllInstancesStatus response.
-type AllInstancesStatus struct {
-	TotalInstances   int              `json:"total_instances"`
-	RunningInstances int              `json:"running_instances"`
-	Instances        []InstanceStatus `json:"instances"`
+// AllInstancesState response.
+type AllInstancesState struct {
+	TotalInstances   int             `json:"total_instances"`
+	RunningInstances int             `json:"running_instances"`
+	Instances        []InstanceState `json:"instances"`
 }
 
 // Generic response for creation and deletion.
@@ -95,13 +94,13 @@ func (c *LauncherClient) CreateNamedInstance(
 	return c.create(ctx, path, http.MethodPut, cfg)
 }
 
-// GetInstanceStatus returns the status of a single instance.
-func (c *LauncherClient) GetInstanceStatus(
+// GetInstanceState returns the state of a single instance.
+func (c *LauncherClient) GetInstanceState(
 	ctx context.Context,
 	instanceID string,
-) (*InstanceStatus, error) {
+) (*InstanceState, error) {
 	path := fmt.Sprintf("/v2/vllm/instances/%s", instanceID)
-	var out InstanceStatus
+	var out InstanceState
 	if err := c.do(ctx, http.MethodGet, path, nil, &out); err != nil {
 		return nil, err
 	}
@@ -111,8 +110,8 @@ func (c *LauncherClient) GetInstanceStatus(
 // ListInstances returns all instances with status.
 func (c *LauncherClient) ListInstances(
 	ctx context.Context,
-) (*AllInstancesStatus, error) {
-	var out AllInstancesStatus
+) (*AllInstancesState, error) {
+	var out AllInstancesState
 	if err := c.do(ctx, http.MethodGet, "/v2/vllm/instances", nil, &out); err != nil {
 		return nil, err
 	}

Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,9 @@`
`53`	`53`	`def vllm_config():`
`54`	`54`	`"""Create a sample VllmConfig for testing"""`
`55`	`55`	`return VllmConfig(`
`56`		`- options="--model test-model --port 8000", env_vars={"TEST_VAR": "test_value"}`
	`56`	`+ options="--model test-model --port 8000",`
	`57`	`+ env_vars={"TEST_VAR": "test_value"},`
	`58`	`+ annotations={"Foo": "bar"},`
`57`	`59`	`)`
`58`	`60`
`59`	`61`