The self-annotate approach

waltforme · waltforme · commit 7b72c50376e8 · 2026-03-27T14:37:27.000Z
Signed-off-by: Jun Duan &lt;jun.duan.phd@outlook.com&gt;
diff --git a/dockerfiles/Dockerfile.launcher.benchmark b/dockerfiles/Dockerfile.launcher.benchmark
@@ -3,7 +3,7 @@ FROM ${BASE_IMAGE}
 
 WORKDIR /app
 
-COPY inference_server/launcher/launcher.py inference_server/launcher/gputranslator.py /app/
+COPY inference_server/launcher/launcher.py inference_server/launcher/gputranslator.py inference_server/launcher/launcher_pod_notifier.py /app/
 
 # Install uvicorn for serving the launcher API, nvidia-ml-py for gputranslator and kubernetes
 RUN pip install --root-user-action=ignore --no-cache-dir uvicorn nvidia-ml-py kubernetes
diff --git a/dockerfiles/Dockerfile.launcher.cpu b/dockerfiles/Dockerfile.launcher.cpu
@@ -14,7 +14,7 @@ FROM base-${TARGETARCH} AS final
 
 WORKDIR /app
 
-COPY inference_server/launcher/launcher.py inference_server/launcher/gputranslator.py /app/
+COPY inference_server/launcher/launcher.py inference_server/launcher/gputranslator.py inference_server/launcher/launcher_pod_notifier.py /app/
 RUN chmod a+x /app/launcher.py
 
 # Install uvicorn for serving the launcher API and nvidia-ml-py for gputranslator
diff --git a/inference_server/launcher/launcher_pod_notifier.py b/inference_server/launcher/launcher_pod_notifier.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+# Copyright 2026 The llm-d Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Publish vLLM instance-state changes onto the enclosing Pod."""
+
+import hashlib
+import json
+import logging
+import os
+import sys
+import time
+import urllib.error
+import urllib.request
+from typing import Any
+
+from kubernetes import client, config
+from kubernetes.client import ApiException
+
+
+SIGNATURE_ANNOTATION = "dual-pods.llm-d.ai/vllm-instance-signature"
+
+DEFAULT_BASE_URL = "http://127.0.0.1:8001"
+DEFAULT_POLL_INTERVAL_SECONDS = 2.0
+DEFAULT_ERROR_BACKOFF_SECONDS = 5.0
+
+
+logger = logging.getLogger("launcher_pod_notifier")
+
+
+def configure_logging() -> None:
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+    )
+
+
+def get_required_env(name: str) -> str:
+    value = os.getenv(name)
+    if not value:
+        raise RuntimeError(f"missing required environment variable {name}")
+    return value
+
+
+def fetch_launcher_state(base_url: str) -> dict[str, Any]:
+    url = f"{base_url}/v2/vllm/instances"
+    with urllib.request.urlopen(url, timeout=5) as response:
+        payload = json.load(response)
+    if not isinstance(payload, dict):
+        raise ValueError(f"launcher response is not an object: {payload!r}")
+    return payload
+
+
+def canonicalize_launcher_state(payload: dict[str, Any]) -> dict[str, Any]:
+    instances = payload.get("instances", [])
+    canonical_instances: list[dict[str, str]] = []
+    for instance in instances:
+        if not isinstance(instance, dict):
+            raise ValueError(f"unexpected instance entry: {instance!r}")
+        instance_id = str(instance.get("instance_id", ""))
+        status = str(instance.get("status", ""))
+        canonical_instances.append({"instance_id": instance_id, "status": status})
+    canonical_instances.sort(key=lambda item: (item["instance_id"], item["status"]))
+    return {
+        "total_instances": int(payload.get("total_instances", len(canonical_instances))),
+        "running_instances": int(payload.get("running_instances", 0)),
+        "instances": canonical_instances,
+    }
+
+
+def compute_signature(payload: dict[str, Any]) -> str:
+    canonical = canonicalize_launcher_state(payload)
+    blob = json.dumps(canonical, sort_keys=True, separators=(",", ":")).encode("utf-8")
+    return hashlib.sha256(blob).hexdigest()
+
+
+def load_incluster_client() -> client.CoreV1Api:
+    config.load_incluster_config()
+    return client.CoreV1Api()
+
+
+def get_pod_annotations(api: client.CoreV1Api, namespace: str, pod_name: str) -> dict[str, str]:
+    pod = api.read_namespaced_pod(name=pod_name, namespace=namespace)
+    return pod.metadata.annotations or {}
+
+
+def patch_pod_annotations(
+    api: client.CoreV1Api,
+    namespace: str,
+    pod_name: str,
+    *,
+    signature: str,
+) -> None:
+    body = {
+        "metadata": {
+            "annotations": {
+                SIGNATURE_ANNOTATION: signature,
+            }
+        }
+    }
+    api.patch_namespaced_pod(name=pod_name, namespace=namespace, body=body)
+
+
+def publish_if_changed(api: client.CoreV1Api, namespace: str, pod_name: str, signature: str) -> None:
+    annotations = get_pod_annotations(api, namespace, pod_name)
+    if annotations.get(SIGNATURE_ANNOTATION, "") == signature:
+        return
+
+    patch_pod_annotations(api, namespace, pod_name, signature=signature)
+    logger.info(
+        "Published launcher state change",
+        extra={"pod": pod_name, "signature": signature},
+    )
+
+
+def main() -> int:
+    configure_logging()
+
+    try:
+        pod_name = get_required_env("POD_NAME")
+        namespace = get_required_env("NAMESPACE")
+    except RuntimeError as exc:
+        logger.error("%s", exc)
+        return 1
+
+    base_url = os.getenv("LAUNCHER_BASE_URL", DEFAULT_BASE_URL).rstrip("/")
+    poll_interval = DEFAULT_POLL_INTERVAL_SECONDS
+    error_backoff = DEFAULT_ERROR_BACKOFF_SECONDS
+
+    try:
+        api = load_incluster_client()
+    except Exception as exc:
+        logger.error("Failed to initialize in-cluster Kubernetes client: %s", exc)
+        return 1
+
+    logger.info(
+        "Launcher Pod notifier started for pod %s in namespace %s against %s",
+        pod_name,
+        namespace,
+        base_url,
+    )
+
+    while True:
+        try:
+            signature = compute_signature(fetch_launcher_state(base_url))
+            publish_if_changed(api, namespace, pod_name, signature)
+            time.sleep(poll_interval)
+        except (
+            ApiException,
+            OSError,
+            TimeoutError,
+            ValueError,
+            urllib.error.HTTPError,
+            urllib.error.URLError,
+        ) as exc:
+            logger.warning("Notifier loop failed: %s", exc)
+            time.sleep(error_backoff)
+        except Exception as exc:
+            logger.exception("Unexpected notifier failure: %s", exc)
+            time.sleep(error_backoff)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/pkg/controller/utils/pod-helper.go b/pkg/controller/utils/pod-helper.go
@@ -228,6 +228,7 @@ func BuildLauncherPodFromTemplate(template corev1.PodTemplateSpec, ns, nodeName,
 		pod.Spec.NodeSelector = make(map[string]string)
 	}
 	pod.Spec.NodeSelector["kubernetes.io/hostname"] = nodeName
+	ensureLauncherNotifierSidecar(pod, container.Image, container.ImagePullPolicy)
 	return pod, nil
 }
 
@@ -271,3 +272,51 @@ func removeGPUResourceLimits(container *corev1.Container) {
 		container.Resources.Requests[corev1.ResourceName("nvidia.com/gpu")] = resource.MustParse("0")
 	}
 }
+
+func ensureLauncherNotifierSidecar(pod *corev1.Pod, launcherImage string, pullPolicy corev1.PullPolicy) {
+	const sidecarName = "vllm-instance-notifier"
+	idx := slices.IndexFunc(pod.Spec.Containers, func(c corev1.Container) bool {
+		return c.Name == sidecarName
+	})
+
+	notifier := corev1.Container{
+		Name:            sidecarName,
+		Image:           launcherImage,
+		ImagePullPolicy: pullPolicy,
+		Command:         []string{"python3", "/app/launcher_pod_notifier.py"},
+		Env: []corev1.EnvVar{
+			{
+				Name:  "LAUNCHER_BASE_URL",
+				Value: fmt.Sprintf("http://127.0.0.1:%d", common.LauncherServicePort),
+			},
+			{
+				Name: "POD_NAME",
+				ValueFrom: &corev1.EnvVarSource{
+					FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.name"},
+				},
+			},
+			{
+				Name: "NAMESPACE",
+				ValueFrom: &corev1.EnvVarSource{
+					FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.namespace"},
+				},
+			},
+		},
+		Resources: corev1.ResourceRequirements{
+			Requests: corev1.ResourceList{
+				corev1.ResourceCPU:    resource.MustParse("10m"),
+				corev1.ResourceMemory: resource.MustParse("64Mi"),
+			},
+			Limits: corev1.ResourceList{
+				corev1.ResourceCPU:    resource.MustParse("100m"),
+				corev1.ResourceMemory: resource.MustParse("128Mi"),
+			},
+		},
+	}
+
+	if idx >= 0 {
+		pod.Spec.Containers[idx] = notifier
+		return
+	}
+	pod.Spec.Containers = append(pod.Spec.Containers, notifier)
+}
diff --git a/test/e2e/run-launcher-based.sh b/test/e2e/run-launcher-based.sh
@@ -141,6 +141,13 @@ rules:
   - get
   - list
   - watch
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  verbs:
+  - get
+  - patch
 EOF
 
 kubectl create rolebinding testlauncher --role=testlauncher --serviceaccount=$(kubectl get sa default -o jsonpath={.metadata.namespace}):testlauncher