Skip to content

Commit ccb7a55

Browse files
committed
Test cases for multiple instances sharing one launcher
1 parent 48b98dd commit ccb7a55

File tree

3 files changed

+152
-7
lines changed

3 files changed

+152
-7
lines changed

pkg/controller/dual-pods/inference-server.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
472472
// then those with capacity for new instances.
473473
// Note that multiple vLLM instances could exist in one launcher Pod, but at most one instance could be awake at a time.
474474

475-
launcherPod, hasSleepingInstance, someNotReady, err := ctl.selectBestLauncherPod(ctx, launcherPodAnys, iscHash, nodeDat)
475+
launcherPod, hasSleepingInstance, someNotReady, err := ctl.selectBestLauncherPod(ctx, launcherPodAnys, iscHash, int(lc.Spec.MaxSleepingInstances), nodeDat)
476476
if err != nil {
477477
return err, true
478478
}
@@ -510,6 +510,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
510510
return ctl.bind(ctx, serverDat, requestingPod, launcherPod, true, int16(isc.Spec.ModelServerConfig.Port))
511511
} else {
512512
// Slower path: create new instance in launcher with capacity
513+
logger.V(5).Info("Creating new vLLM instance", "iscHash", iscHash)
513514
result, err := lClient.CreateNamedInstance(ctx, iscHash, *cfg)
514515
if err != nil {
515516
return fmt.Errorf("create vLLM instance: %w", err), true
@@ -564,6 +565,7 @@ func (ctl *controller) selectBestLauncherPod(
564565
ctx context.Context,
565566
launcherPodAnys []interface{},
566567
iscHash string,
568+
maxOthers int,
567569
nodeDat *nodeData,
568570
) (*corev1.Pod, bool, bool, error) {
569571
logger := klog.FromContext(ctx)
@@ -614,8 +616,7 @@ func (ctl *controller) selectBestLauncherPod(
614616
}
615617

616618
// Check if this launcher has capacity for a new instance
617-
// A launcher has capacity if it has zero instances (can host at least one)
618-
if insts.TotalInstances == 0 && candidateWithCapacity == nil {
619+
if insts.TotalInstances <= maxOthers && candidateWithCapacity == nil {
619620
// Priority 2: Has capacity for new instance
620621
logger.V(5).Info("Found launcher with capacity for new instance",
621622
"name", launcherPod.Name,

test/e2e/mkobjs.sh

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,60 @@ if out=$(kubectl apply -f - 2>&1 <<EOF
88
apiVersion: fma.llm-d.ai/v1alpha1
99
kind: InferenceServerConfig
1010
metadata:
11-
name: inference-server-config-$inst
11+
name: inference-server-config-smol-$inst
12+
labels:
13+
instance: "$inst"
1214
spec:
1315
modelServerConfig:
1416
port: 8005
17+
options: "--model HuggingFaceTB/SmolLM2-360M-Instruct"
18+
env_vars:
19+
VLLM_SERVER_DEV_MODE: "1"
20+
VLLM_USE_V1: "1"
21+
VLLM_LOGGING_LEVEL: "DEBUG"
22+
VLLM_CPU_KVCACHE_SPACE: "1" # GiB
23+
labels:
24+
component: inference
25+
annotations:
26+
description: "Example InferenceServerConfig"
27+
launcherConfigName: launcher-config-$inst
28+
---
29+
apiVersion: fma.llm-d.ai/v1alpha1
30+
kind: InferenceServerConfig
31+
metadata:
32+
name: inference-server-config-qwen-$inst
33+
labels:
34+
instance: "$inst"
35+
spec:
36+
modelServerConfig:
37+
port: 8006
38+
options: "--model Qwen/Qwen2.5-0.5B-Instruct"
39+
env_vars:
40+
VLLM_SERVER_DEV_MODE: "1"
41+
VLLM_USE_V1: "1"
42+
VLLM_LOGGING_LEVEL: "DEBUG"
43+
VLLM_CPU_KVCACHE_SPACE: "1" # GiB
44+
labels:
45+
component: inference
46+
annotations:
47+
description: "Example InferenceServerConfig"
48+
launcherConfigName: launcher-config-$inst
49+
---
50+
apiVersion: fma.llm-d.ai/v1alpha1
51+
kind: InferenceServerConfig
52+
metadata:
53+
name: inference-server-config-tinyllama-$inst
54+
labels:
55+
instance: "$inst"
56+
spec:
57+
modelServerConfig:
58+
port: 8007
1559
options: "--model TinyLlama/TinyLlama-1.1B-Chat-v1.0"
1660
env_vars:
1761
VLLM_SERVER_DEV_MODE: "1"
1862
VLLM_USE_V1: "1"
1963
VLLM_LOGGING_LEVEL: "DEBUG"
64+
VLLM_CPU_KVCACHE_SPACE: "1" # GiB
2065
labels:
2166
component: inference
2267
annotations:
@@ -27,8 +72,10 @@ apiVersion: fma.llm-d.ai/v1alpha1
2772
kind: LauncherConfig
2873
metadata:
2974
name: launcher-config-$inst
75+
labels:
76+
instance: "$inst"
3077
spec:
31-
maxSleepingInstances: 3
78+
maxSleepingInstances: 1
3279
podTemplate:
3380
spec:
3481
containers:
@@ -63,7 +110,7 @@ spec:
63110
instance: "$inst"
64111
annotations:
65112
dual-pods.llm-d.ai/admin-port: "8081"
66-
dual-pods.llm-d.ai/inference-server-config: "inference-server-config-$inst"
113+
dual-pods.llm-d.ai/inference-server-config: "inference-server-config-smol-$inst"
67114
spec:
68115
containers:
69116
- name: inference-server
@@ -105,9 +152,12 @@ spec:
105152
EOF
106153
)
107154
then
108-
echo inference-server-config-$inst
155+
# output to be parsed by caller, e.g. the e2e test script
156+
echo inference-server-config-smol-$inst
109157
echo launcher-config-$inst
110158
echo my-request-$inst
159+
echo inference-server-config-qwen-$inst
160+
echo inference-server-config-tinyllama-$inst
111161
else
112162
echo Failed to create objects >&2
113163
echo "$out" >&2

test/e2e/run-launcher-based.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ objs=$(test/e2e/mkobjs.sh)
165165
isc=$(echo $objs | awk '{print $1}')
166166
lc=$(echo $objs | awk '{print $2}')
167167
rslb=$(echo $objs | awk '{print $3}')
168+
isc2=$(echo $objs | awk '{print $4}')
168169
instlb=${rslb#my-request-}
169170

170171
# Expect requester pod to be created
@@ -240,10 +241,103 @@ kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
240241

241242
cheer Successful instance wake-up fast path
242243

244+
: Multiple Instances Share One Launcher
245+
246+
# Scale requester to 0 again
247+
kubectl scale rs $rslb --replicas=0
248+
249+
expect "kubectl get pods -o name -l app=dp-example,instance=$instlb | grep -c '^pod/' || true | grep -w 0"
250+
! kubectl get pod $reqlb2
251+
252+
# Launcher should remain
253+
kubectl get pod $launcherlb
254+
255+
# Verify launcher is unbound
256+
expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "" ]'
257+
258+
# Patch ReplicaSet to use isc2 instead of isc
259+
kubectl patch rs $rslb --type=json -p='[{"op": "replace", "path": "/spec/template/metadata/annotations/dual-pods.llm-d.ai~1inference-server-config", "value": "'$isc2'"}]'
260+
261+
sleep 5
262+
263+
# Scale back up (should reuse same launcher and create 2nd instance)
264+
kubectl scale rs $rslb --replicas=1
265+
266+
expect "kubectl get pods -o name -l app=dp-example,instance=$instlb | grep -c '^pod/' | grep -w 1"
267+
268+
reqlb3=$(kubectl get pods -o name -l app=dp-example,instance=$instlb | sed s%pod/%%)
269+
270+
# Should still be using the same launcher pod
271+
launcherlb3=$(kubectl get pods -o name -l dual-pods.llm-d.ai/launcher-config-name=$lc | sed s%pod/%%)
272+
[ "$launcherlb3" == "$launcherlb" ]
273+
274+
# Verify new requester is bound to same launcher
275+
expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$launcherlb" ]'
276+
277+
# Verify launcher is bound to new requester
278+
expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$reqlb3" ]'
279+
280+
# Verify the new requester is using isc2
281+
expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "'$isc2'" ]'
282+
283+
# Wait for requester to be ready (launcher should already be ready)
284+
date
285+
kubectl wait --for condition=Ready pod/$reqlb3 --timeout=30s
286+
kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
287+
288+
cheer Successful multiple instances sharing one launcher
289+
290+
: Switch Instances In One Launcher
291+
292+
# Scale requester to 0 again
293+
kubectl scale rs $rslb --replicas=0
294+
295+
expect "kubectl get pods -o name -l app=dp-example,instance=$instlb | grep -c '^pod/' || true | grep -w 0"
296+
! kubectl get pod $reqlb3
297+
298+
# Launcher should remain
299+
kubectl get pod $launcherlb
300+
301+
# Verify launcher is unbound
302+
expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "" ]'
303+
304+
# Patch ReplicaSet back to use original isc
305+
kubectl patch rs $rslb --type=json -p='[{"op": "replace", "path": "/spec/template/metadata/annotations/dual-pods.llm-d.ai~1inference-server-config", "value": "'$isc'"}]'
306+
307+
sleep 5
308+
309+
# Scale back up (should reuse same launcher and wake first instance)
310+
kubectl scale rs $rslb --replicas=1
311+
312+
expect "kubectl get pods -o name -l app=dp-example,instance=$instlb | grep -c '^pod/' | grep -w 1"
313+
314+
reqlb4=$(kubectl get pods -o name -l app=dp-example,instance=$instlb | sed s%pod/%%)
315+
316+
# Should still be using the same launcher pod
317+
launcherlb4=$(kubectl get pods -o name -l dual-pods.llm-d.ai/launcher-config-name=$lc | sed s%pod/%%)
318+
[ "$launcherlb4" == "$launcherlb" ]
319+
320+
# Verify new requester is bound to same launcher
321+
expect '[ "$(kubectl get pod $reqlb4 -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$launcherlb" ]'
322+
323+
# Verify launcher is bound to new requester
324+
expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$reqlb4" ]'
325+
326+
# Verify the new requester is using original isc
327+
expect '[ "$(kubectl get pod $reqlb4 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "'$isc'" ]'
328+
329+
# Wait for requester to be ready (launcher should already be ready)
330+
date
331+
kubectl wait --for condition=Ready pod/$reqlb4 --timeout=30s
332+
kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
333+
334+
cheer Successful switching instances in one launcher
335+
243336
: Clean up launcher-based workloads
244337

245338
kubectl delete rs $rslb --ignore-not-found=true
246339
kubectl delete inferenceserverconfig $isc --ignore-not-found=true
340+
kubectl delete inferenceserverconfig $isc2 --ignore-not-found=true
247341
kubectl delete launcherconfig $lc --ignore-not-found=true
248342
expect '[ $(kubectl get pods -o name | grep -c "^pod/my-request-") == "0" ]'
249343

0 commit comments

Comments
 (0)