Skip to content

Commit 4c1efa9

Browse files
committed
WIP
Signed-off-by: Jun Duan <jun.duan.phd@outlook.com>
1 parent 51f44de commit 4c1efa9

4 files changed

Lines changed: 190 additions & 10 deletions

File tree

pkg/controller/dual-pods/inference-server.go

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import (
4747
"k8s.io/utils/ptr"
4848
"sigs.k8s.io/yaml"
4949

50+
fmav1alpha1 "github.com/llm-d-incubation/llm-d-fast-model-actuation/api/fma/v1alpha1"
5051
"github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/api"
5152
stubapi "github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/spi"
5253
)
@@ -349,16 +350,33 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
349350
}
350351
// What remains to be done is to wake or create a server-providing Pod
351352

352-
serverPatch := requestingPod.Annotations[api.ServerPatchAnnotationName]
353-
if serverPatch == "" { // this is bad, somebody has hacked important data
354-
return ctl.ensureReqStatus(ctx, requestingPod, serverDat, "the "+api.ServerPatchAnnotationName+" annotation is missing")
355-
}
356-
// use the server patch to build the server-providing pod, if not already done.
357-
desiredProvidingPod, nominalHash, err := serverDat.getNominalServerProvidingPod(ctx, requestingPod, serverPatch, api.ProviderData{
358-
NodeName: requestingPod.Spec.NodeName,
359-
})
360-
if err != nil {
361-
return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("failed to construct the nominal server-providing Pod: %s", err.Error()))
353+
var desiredProvidingPod *corev1.Pod
354+
var nominalHash string
355+
if requestingPod.Annotations[api.LauncherConfigAnnotationName] != "" {
356+
lcname := requestingPod.Annotations[api.LauncherConfigAnnotationName]
357+
lc, err := ctl.lcLister.LauncherConfigs(ctl.namespace).Get(lcname)
358+
if err != nil {
359+
if apierrors.IsNotFound(err) {
360+
return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("the LauncherConfig %q does not exist", lcname))
361+
}
362+
return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("failed to get the LauncherConfig %q: %s", lcname, err.Error()))
363+
}
364+
desiredProvidingPod, nominalHash, err = serverDat.getNominalServerProvidingPodFromLC(ctx, requestingPod, *lc)
365+
if err != nil {
366+
return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("failed to construct the nominal server-providing Pod from LauncherConfig %q: %s", lcname, err.Error()))
367+
}
368+
} else {
369+
serverPatch := requestingPod.Annotations[api.ServerPatchAnnotationName]
370+
if serverPatch == "" { // this is bad, somebody has hacked important data
371+
return ctl.ensureReqStatus(ctx, requestingPod, serverDat, "the "+api.ServerPatchAnnotationName+" annotation is missing")
372+
}
373+
// use the server patch to build the server-providing pod, if not already done.
374+
desiredProvidingPod, nominalHash, err = serverDat.getNominalServerProvidingPod(ctx, requestingPod, serverPatch, api.ProviderData{
375+
NodeName: requestingPod.Spec.NodeName,
376+
})
377+
if err != nil {
378+
return ctl.ensureReqStatus(ctx, requestingPod, serverDat, fmt.Sprintf("failed to construct the nominal server-providing Pod: %s", err.Error()))
379+
}
362380
}
363381

364382
sleepingAnys, err := ctl.podInformer.GetIndexer().ByIndex(nominalHashIndexName, nominalHash)
@@ -810,6 +828,45 @@ func (serverDat *serverData) getNominalServerProvidingPod(ctx context.Context, r
810828
return serverDat.NominalProvidingPod, serverDat.NominalProvidingPodHash, nil
811829
}
812830

831+
func (serverDat *serverData) getNominalServerProvidingPodFromLC(ctx context.Context, reqPod *corev1.Pod, lc fmav1alpha1.LauncherConfig) (*corev1.Pod, string, error) {
832+
logger := klog.FromContext(ctx)
833+
if serverDat.NominalProvidingPod == nil {
834+
podSpec := lc.Spec.PodTemplate.Spec
835+
podSpec = *deIndividualize(podSpec.DeepCopy())
836+
pod := &corev1.Pod{
837+
ObjectMeta: metav1.ObjectMeta{
838+
Labels: lc.Spec.PodTemplate.Labels,
839+
},
840+
Spec: podSpec,
841+
}
842+
hasher := sha256.New()
843+
podJSON, err := json.Marshal(pod)
844+
if err != nil {
845+
return nil, "", fmt.Errorf("failed to marshal launcher config pod spec: %w", err)
846+
}
847+
hasher.Write(podJSON)
848+
var modifiedHash [sha256.Size]byte
849+
modifiedHashSl := hasher.Sum(modifiedHash[:0])
850+
nominalHash := base64.RawStdEncoding.EncodeToString(modifiedHashSl)
851+
852+
logger.V(5).Info("Computed nominalHash from LauncherConfig", "nominalHash", nominalHash, "podJSON", podJSON)
853+
854+
serverDat.NominalProvidingPod = pod
855+
serverDat.NominalProvidingPodHash = nominalHash
856+
857+
pod.GenerateName = reqPod.Name + "-dual-"
858+
// pod.Finalizers = append(pod.Finalizers, providerFinalizer)
859+
// pod.Annotations = MapSet(pod.Annotations, nominalHashAnnotationKey, nominalHash)
860+
// pod.Annotations[requesterAnnotationKey] = string(reqPod.UID) + " " + reqPod.Name
861+
// pod.Annotations[api.AcceleratorsAnnotationName] = *serverDat.GPUIDsStr
862+
// pod.Labels = MapSet(pod.Labels, api.DualLabelName, reqPod.Name)
863+
// pod.Labels[api.SleepingLabelName] = "false"
864+
serverDat.NominalProvidingPod = pod
865+
serverDat.NominalProvidingPodHash = nominalHash
866+
}
867+
return serverDat.NominalProvidingPod, serverDat.NominalProvidingPodHash, nil
868+
}
869+
813870
// deIndividualize removes the parts of a PodSpec that are specific to an individual.
814871
// This func side-effects the given `*PodSpec` and returns it.
815872
func deIndividualize(podSpec *corev1.PodSpec) *corev1.PodSpec {

test/e2e/mkobjs.sh

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env bash
2+
3+
inst=$(date +%d-%H-%M-%S)
4+
server_img=$(make echo-var VAR=TEST_SERVER_IMG)
5+
requester_img=$(make echo-var VAR=TEST_REQUESTER_IMG)
6+
if out=$(kubectl apply -f - 2>&1 <<EOF
7+
apiVersion: fma.llm-d.ai/v1alpha1
8+
kind: InferenceServerConfig
9+
metadata:
10+
name: inference-server-config-$inst
11+
spec:
12+
modelServerConfig:
13+
options: "--model meta-llama/Llama-2-7b-hf --tensor-parallel-size 1"
14+
env_vars:
15+
VLLM_SERVER_DEV_MODE: "1"
16+
labels:
17+
component: inference
18+
annotations:
19+
description: "Example InferenceServerConfig"
20+
launcherConfigName: launcher-config-$inst
21+
---
22+
apiVersion: fma.llm-d.ai/v1alpha1
23+
kind: LauncherConfig
24+
metadata:
25+
name: launcher-config-$inst
26+
spec:
27+
maxSleepingInstances: 3
28+
podTemplate:
29+
spec:
30+
containers:
31+
- name: inference-server
32+
image: $server_img
33+
command:
34+
- /ko-app/test-server
35+
- --startup-delay=22
36+
resources:
37+
limits:
38+
cpu: "200m"
39+
memory: 9Gi
40+
readinessProbe:
41+
httpGet:
42+
path: /health
43+
port: 8000
44+
initialDelaySeconds: 10
45+
periodSeconds: 5
46+
---
47+
apiVersion: apps/v1
48+
kind: ReplicaSet
49+
metadata:
50+
name: my-request-$inst
51+
labels:
52+
app: dp-example
53+
spec:
54+
replicas: 1
55+
selector:
56+
matchLabels:
57+
app: dp-example
58+
template:
59+
metadata:
60+
labels:
61+
app: dp-example
62+
instance: "$inst"
63+
annotations:
64+
dual-pods.llm-d.ai/admin-port: "8081"
65+
dual-pods.llm-d.ai/launcher-config: "launcher-config-$inst"
66+
spec:
67+
containers:
68+
- name: inference-server
69+
image: $requester_img
70+
imagePullPolicy: IfNotPresent
71+
command:
72+
- /ko-app/test-requester
73+
- --node=\$(NODE_NAME)
74+
- --pod-uid=\$(POD_UID)
75+
- --namespace=\$(NAMESPACE)
76+
env:
77+
- name: NODE_NAME
78+
valueFrom:
79+
fieldRef: { fieldPath: spec.nodeName }
80+
- name: POD_UID
81+
valueFrom:
82+
fieldRef: { fieldPath: metadata.uid }
83+
- name: NAMESPACE
84+
valueFrom:
85+
fieldRef: { fieldPath: metadata.namespace }
86+
ports:
87+
- name: probes
88+
containerPort: 8080
89+
- name: spi
90+
containerPort: 8081
91+
readinessProbe:
92+
httpGet:
93+
path: /ready
94+
port: 8080
95+
initialDelaySeconds: 2
96+
periodSeconds: 5
97+
resources:
98+
limits:
99+
nvidia.com/gpu: "1"
100+
cpu: "200m"
101+
memory: 250Mi
102+
serviceAccount: testreq
103+
EOF
104+
)
105+
then
106+
echo inference-server-config-$inst
107+
echo launcher-config-$inst
108+
echo my-request-$inst
109+
else
110+
echo Failed to create objects >&2
111+
echo "$out" >&2
112+
exit 1
113+
fi

test/e2e/rdobjs.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
kubectl get rs -oname | grep my-request-
2+
3+
kubectl get po -oname | grep my-request-
4+
5+
kubectl get launcherconfig -oname | grep launcher-config-
6+
kubectl get inferenceserverconfig -oname | grep inference-server-config-

test/e2e/rmobjs.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
kubectl get rs -oname | grep my-request- | xargs kubectl delete
2+
3+
kubectl get launcherconfig -oname | grep launcher-config- | xargs kubectl delete
4+
kubectl get inferenceserverconfig -oname | grep inference-server-config- | xargs kubectl delete

0 commit comments

Comments
 (0)