WIP

waltforme · waltforme · commit 4c1efa9b2ba7 · 2025-12-28T01:48:34.000Z
Signed-off-by: Jun Duan &lt;jun.duan.phd@outlook.com&gt;
diff --git a/pkg/controller/dual-pods/inference-server.go b/pkg/controller/dual-pods/inference-server.go
@@ -47,6 +47,7 @@ import (
 	"k8s.io/utils/ptr"
 	"sigs.k8s.io/yaml"
 
+	fmav1alpha1 "github.com/llm-d-incubation/llm-d-fast-model-actuation/api/fma/v1alpha1"
 	"github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/api"
 	stubapi "github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/spi"
 )
@@ -349,16 +350,33 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
 	}
 	// What remains to be done is to wake or create a server-providing Pod
 
-	serverPatch := requestingPod.Annotations[api.ServerPatchAnnotationName]
-	if serverPatch == "" { // this is bad, somebody has hacked important data
-		return ctl.ensureReqStatus(ctx, requestingPod, serverDat, "the "+api.ServerPatchAnnotationName+" annotation is missing")
-	}
-	// use the server patch to build the server-providing pod, if not already done.
-	desiredProvidingPod, nominalHash, err := serverDat.getNominalServerProvidingPod(ctx, requestingPod, serverPatch, api.ProviderData{
-		NodeName: requestingPod.Spec.NodeName,
-	})
-	if err != nil {
-		return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("failed to construct the nominal server-providing Pod: %s", err.Error()))
+	var desiredProvidingPod *corev1.Pod
+	var nominalHash string
+	if requestingPod.Annotations[api.LauncherConfigAnnotationName] != "" {
+		lcname := requestingPod.Annotations[api.LauncherConfigAnnotationName]
+		lc, err := ctl.lcLister.LauncherConfigs(ctl.namespace).Get(lcname)
+		if err != nil {
+			if apierrors.IsNotFound(err) {
+				return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("the LauncherConfig %q does not exist", lcname))
+			}
+			return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("failed to get the LauncherConfig %q: %s", lcname, err.Error()))
+		}
+		desiredProvidingPod, nominalHash, err = serverDat.getNominalServerProvidingPodFromLC(ctx, requestingPod, *lc)
+		if err != nil {
+			return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("failed to construct the nominal server-providing Pod from LauncherConfig %q: %s", lcname, err.Error()))
+		}
+	} else {
+		serverPatch := requestingPod.Annotations[api.ServerPatchAnnotationName]
+		if serverPatch == "" { // this is bad, somebody has hacked important data
+			return ctl.ensureReqStatus(ctx, requestingPod, serverDat, "the "+api.ServerPatchAnnotationName+" annotation is missing")
+		}
+		// use the server patch to build the server-providing pod, if not already done.
+		desiredProvidingPod, nominalHash, err = serverDat.getNominalServerProvidingPod(ctx, requestingPod, serverPatch, api.ProviderData{
+			NodeName: requestingPod.Spec.NodeName,
+		})
+		if err != nil {
+			return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("failed to construct the nominal server-providing Pod: %s", err.Error()))
+		}
 	}
 
 	sleepingAnys, err := ctl.podInformer.GetIndexer().ByIndex(nominalHashIndexName, nominalHash)
@@ -810,6 +828,45 @@ func (serverDat *serverData) getNominalServerProvidingPod(ctx context.Context, r
 	return serverDat.NominalProvidingPod, serverDat.NominalProvidingPodHash, nil
 }
 
+func (serverDat *serverData) getNominalServerProvidingPodFromLC(ctx context.Context, reqPod *corev1.Pod, lc fmav1alpha1.LauncherConfig) (*corev1.Pod, string, error) {
+	logger := klog.FromContext(ctx)
+	if serverDat.NominalProvidingPod == nil {
+		podSpec := lc.Spec.PodTemplate.Spec
+		podSpec = *deIndividualize(podSpec.DeepCopy())
+		pod := &corev1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				Labels: lc.Spec.PodTemplate.Labels,
+			},
+			Spec: podSpec,
+		}
+		hasher := sha256.New()
+		podJSON, err := json.Marshal(pod)
+		if err != nil {
+			return nil, "", fmt.Errorf("failed to marshal launcher config pod spec: %w", err)
+		}
+		hasher.Write(podJSON)
+		var modifiedHash [sha256.Size]byte
+		modifiedHashSl := hasher.Sum(modifiedHash[:0])
+		nominalHash := base64.RawStdEncoding.EncodeToString(modifiedHashSl)
+
+		logger.V(5).Info("Computed nominalHash from LauncherConfig", "nominalHash", nominalHash, "podJSON", podJSON)
+
+		serverDat.NominalProvidingPod = pod
+		serverDat.NominalProvidingPodHash = nominalHash
+
+		pod.GenerateName = reqPod.Name + "-dual-"
+		// pod.Finalizers = append(pod.Finalizers, providerFinalizer)
+		// pod.Annotations = MapSet(pod.Annotations, nominalHashAnnotationKey, nominalHash)
+		// pod.Annotations[requesterAnnotationKey] = string(reqPod.UID) + " " + reqPod.Name
+		// pod.Annotations[api.AcceleratorsAnnotationName] = *serverDat.GPUIDsStr
+		// pod.Labels = MapSet(pod.Labels, api.DualLabelName, reqPod.Name)
+		// pod.Labels[api.SleepingLabelName] = "false"
+		serverDat.NominalProvidingPod = pod
+		serverDat.NominalProvidingPodHash = nominalHash
+	}
+	return serverDat.NominalProvidingPod, serverDat.NominalProvidingPodHash, nil
+}
+
 // deIndividualize removes the parts of a PodSpec that are specific to an individual.
 // This func side-effects the given `*PodSpec` and returns it.
 func deIndividualize(podSpec *corev1.PodSpec) *corev1.PodSpec {
diff --git a/test/e2e/mkobjs.sh b/test/e2e/mkobjs.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+
+inst=$(date +%d-%H-%M-%S)
+server_img=$(make echo-var VAR=TEST_SERVER_IMG)
+requester_img=$(make echo-var VAR=TEST_REQUESTER_IMG)
+if out=$(kubectl apply -f - 2>&1 <<EOF
+apiVersion: fma.llm-d.ai/v1alpha1
+kind: InferenceServerConfig
+metadata:
+  name: inference-server-config-$inst
+spec:
+  modelServerConfig:
+    options: "--model meta-llama/Llama-2-7b-hf --tensor-parallel-size 1"
+    env_vars:
+      VLLM_SERVER_DEV_MODE: "1"
+    labels:
+      component: inference
+    annotations:
+      description: "Example InferenceServerConfig"
+  launcherConfigName: launcher-config-$inst
+---
+apiVersion: fma.llm-d.ai/v1alpha1
+kind: LauncherConfig
+metadata:
+  name: launcher-config-$inst
+spec:
+  maxSleepingInstances: 3
+  podTemplate:
+    spec:
+      containers:
+        - name: inference-server
+          image: $server_img
+          command:
+          - /ko-app/test-server
+          - --startup-delay=22
+          resources:
+            limits:
+              cpu: "200m"
+              memory: 9Gi
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 8000
+            initialDelaySeconds: 10
+            periodSeconds: 5
+---
+apiVersion: apps/v1
+kind: ReplicaSet
+metadata:
+  name: my-request-$inst
+  labels:
+    app: dp-example
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dp-example
+  template:
+    metadata:
+      labels:
+        app: dp-example
+        instance: "$inst"
+      annotations:
+        dual-pods.llm-d.ai/admin-port: "8081"
+        dual-pods.llm-d.ai/launcher-config: "launcher-config-$inst"
+    spec:
+      containers:
+        - name: inference-server
+          image: $requester_img
+          imagePullPolicy: IfNotPresent
+          command:
+          - /ko-app/test-requester
+          - --node=\$(NODE_NAME)
+          - --pod-uid=\$(POD_UID)
+          - --namespace=\$(NAMESPACE)
+          env:
+            - name: NODE_NAME
+              valueFrom:
+                fieldRef: { fieldPath: spec.nodeName }
+            - name: POD_UID
+              valueFrom:
+                fieldRef: { fieldPath: metadata.uid }
+            - name: NAMESPACE
+              valueFrom:
+                fieldRef: { fieldPath: metadata.namespace }
+          ports:
+          - name: probes
+            containerPort: 8080
+          - name: spi
+            containerPort: 8081
+          readinessProbe:
+            httpGet:
+              path: /ready
+              port: 8080
+            initialDelaySeconds: 2
+            periodSeconds: 5
+          resources:
+            limits:
+              nvidia.com/gpu: "1"
+              cpu: "200m"
+              memory: 250Mi
+      serviceAccount: testreq
+EOF
+        )
+then
+    echo inference-server-config-$inst
+    echo launcher-config-$inst
+    echo my-request-$inst
+else
+    echo Failed to create objects >&2
+    echo "$out" >&2
+    exit 1
+fi
diff --git a/test/e2e/rdobjs.sh b/test/e2e/rdobjs.sh
@@ -0,0 +1,6 @@
+kubectl get rs -oname | grep my-request-
+
+kubectl get po -oname | grep my-request-
+
+kubectl get launcherconfig -oname | grep launcher-config-
+kubectl get inferenceserverconfig -oname | grep inference-server-config-
diff --git a/test/e2e/rmobjs.sh b/test/e2e/rmobjs.sh
@@ -0,0 +1,4 @@
+kubectl get rs -oname | grep my-request- | xargs kubectl delete
+
+kubectl get launcherconfig -oname | grep launcher-config- | xargs kubectl delete
+kubectl get inferenceserverconfig -oname | grep inference-server-config- | xargs kubectl delete