Skip to content

Commit 108e92d

Browse files
committed
Test cases for multiple instances sharing one launcher
Signed-off-by: Jun Duan <jun.duan.phd@outlook.com>
1 parent 8988875 commit 108e92d

File tree

3 files changed

+152
-7
lines changed

3 files changed

+152
-7
lines changed

pkg/controller/dual-pods/inference-server.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
472472
// then those with capacity for new instances.
473473
// Note that multiple vLLM instances could exist in one launcher Pod, but at most one instance could be awake at a time.
474474

475-
launcherPod, hasSleepingInstance, someNotReady, err := ctl.selectBestLauncherPod(ctx, launcherPodAnys, iscHash, nodeDat)
475+
launcherPod, hasSleepingInstance, someNotReady, err := ctl.selectBestLauncherPod(ctx, launcherPodAnys, iscHash, int(lc.Spec.MaxSleepingInstances), nodeDat)
476476
if err != nil {
477477
return err, true
478478
}
@@ -510,6 +510,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
510510
return ctl.bind(ctx, serverDat, requestingPod, launcherPod, true, int16(isc.Spec.ModelServerConfig.Port))
511511
} else {
512512
// Slower path: create new instance in launcher with capacity
513+
logger.V(5).Info("Creating new vLLM instance", "iscHash", iscHash)
513514
result, err := lClient.CreateNamedInstance(ctx, iscHash, *cfg)
514515
if err != nil {
515516
return fmt.Errorf("create vLLM instance: %w", err), true
@@ -564,6 +565,7 @@ func (ctl *controller) selectBestLauncherPod(
564565
ctx context.Context,
565566
launcherPodAnys []interface{},
566567
iscHash string,
568+
maxOthers int,
567569
nodeDat *nodeData,
568570
) (*corev1.Pod, bool, bool, error) {
569571
logger := klog.FromContext(ctx)
@@ -614,8 +616,7 @@ func (ctl *controller) selectBestLauncherPod(
614616
}
615617

616618
// Check if this launcher has capacity for a new instance
617-
// A launcher has capacity if it has zero instances (can host at least one)
618-
if insts.TotalInstances == 0 && candidateWithCapacity == nil {
619+
if insts.TotalInstances <= maxOthers && candidateWithCapacity == nil {
619620
// Priority 2: Has capacity for new instance
620621
logger.V(5).Info("Found launcher with capacity for new instance",
621622
"name", launcherPod.Name,

test/e2e/mkobjs.sh

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,60 @@ if out=$(kubectl apply -f - 2>&1 <<EOF
88
apiVersion: fma.llm-d.ai/v1alpha1
99
kind: InferenceServerConfig
1010
metadata:
11-
name: inference-server-config-$inst
11+
name: inference-server-config-smol-$inst
12+
labels:
13+
instance: "$inst"
1214
spec:
1315
modelServerConfig:
1416
port: 8005
17+
options: "--model HuggingFaceTB/SmolLM2-360M-Instruct"
18+
env_vars:
19+
VLLM_SERVER_DEV_MODE: "1"
20+
VLLM_USE_V1: "1"
21+
VLLM_LOGGING_LEVEL: "DEBUG"
22+
VLLM_CPU_KVCACHE_SPACE: "1" # GiB
23+
labels:
24+
component: inference
25+
annotations:
26+
description: "Example InferenceServerConfig"
27+
launcherConfigName: launcher-config-$inst
28+
---
29+
apiVersion: fma.llm-d.ai/v1alpha1
30+
kind: InferenceServerConfig
31+
metadata:
32+
name: inference-server-config-qwen-$inst
33+
labels:
34+
instance: "$inst"
35+
spec:
36+
modelServerConfig:
37+
port: 8006
38+
options: "--model Qwen/Qwen2.5-0.5B-Instruct"
39+
env_vars:
40+
VLLM_SERVER_DEV_MODE: "1"
41+
VLLM_USE_V1: "1"
42+
VLLM_LOGGING_LEVEL: "DEBUG"
43+
VLLM_CPU_KVCACHE_SPACE: "1" # GiB
44+
labels:
45+
component: inference
46+
annotations:
47+
description: "Example InferenceServerConfig"
48+
launcherConfigName: launcher-config-$inst
49+
---
50+
apiVersion: fma.llm-d.ai/v1alpha1
51+
kind: InferenceServerConfig
52+
metadata:
53+
name: inference-server-config-tinyllama-$inst
54+
labels:
55+
instance: "$inst"
56+
spec:
57+
modelServerConfig:
58+
port: 8007
1559
options: "--model TinyLlama/TinyLlama-1.1B-Chat-v1.0"
1660
env_vars:
1761
VLLM_SERVER_DEV_MODE: "1"
1862
VLLM_USE_V1: "1"
1963
VLLM_LOGGING_LEVEL: "DEBUG"
64+
VLLM_CPU_KVCACHE_SPACE: "1" # GiB
2065
labels:
2166
component: inference
2267
annotations:
@@ -27,8 +72,10 @@ apiVersion: fma.llm-d.ai/v1alpha1
2772
kind: LauncherConfig
2873
metadata:
2974
name: launcher-config-$inst
75+
labels:
76+
instance: "$inst"
3077
spec:
31-
maxSleepingInstances: 3
78+
maxSleepingInstances: 1
3279
podTemplate:
3380
spec:
3481
containers:
@@ -63,7 +110,7 @@ spec:
63110
instance: "$inst"
64111
annotations:
65112
dual-pods.llm-d.ai/admin-port: "8081"
66-
dual-pods.llm-d.ai/inference-server-config: "inference-server-config-$inst"
113+
dual-pods.llm-d.ai/inference-server-config: "inference-server-config-smol-$inst"
67114
spec:
68115
containers:
69116
- name: inference-server
@@ -105,9 +152,12 @@ spec:
105152
EOF
106153
)
107154
then
108-
echo inference-server-config-$inst
155+
# output to be parsed by caller, e.g. the e2e test script
156+
echo inference-server-config-smol-$inst
109157
echo launcher-config-$inst
110158
echo my-request-$inst
159+
echo inference-server-config-qwen-$inst
160+
echo inference-server-config-tinyllama-$inst
111161
else
112162
echo Failed to create objects >&2
113163
echo "$out" >&2

test/e2e/run-launcher-based.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ objs=$(test/e2e/mkobjs.sh)
156156
isc=$(echo $objs | awk '{print $1}')
157157
lc=$(echo $objs | awk '{print $2}')
158158
rslb=$(echo $objs | awk '{print $3}')
159+
isc2=$(echo $objs | awk '{print $4}')
159160
instlb=${rslb#my-request-}
160161

161162
# Expect requester pod to be created
@@ -221,10 +222,103 @@ kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
221222

222223
cheer Successful instance wake-up fast path
223224

225+
: Multiple Instances Share One Launcher
226+
227+
# Scale requester to 0 again
228+
kubectl scale rs $rslb --replicas=0
229+
230+
expect "kubectl get pods -o name -l app=dp-example,instance=$instlb | grep -c '^pod/' || true | grep -w 0"
231+
! kubectl get pod $reqlb2
232+
233+
# Launcher should remain
234+
kubectl get pod $launcherlb
235+
236+
# Verify launcher is unbound
237+
expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "" ]'
238+
239+
# Patch ReplicaSet to use isc2 instead of isc
240+
kubectl patch rs $rslb --type=json -p='[{"op": "replace", "path": "/spec/template/metadata/annotations/dual-pods.llm-d.ai~1inference-server-config", "value": "'$isc2'"}]'
241+
242+
sleep 5
243+
244+
# Scale back up (should reuse same launcher and create 2nd instance)
245+
kubectl scale rs $rslb --replicas=1
246+
247+
expect "kubectl get pods -o name -l app=dp-example,instance=$instlb | grep -c '^pod/' | grep -w 1"
248+
249+
reqlb3=$(kubectl get pods -o name -l app=dp-example,instance=$instlb | sed s%pod/%%)
250+
251+
# Should still be using the same launcher pod
252+
launcherlb3=$(kubectl get pods -o name -l dual-pods.llm-d.ai/launcher-config-name=$lc | sed s%pod/%%)
253+
[ "$launcherlb3" == "$launcherlb" ]
254+
255+
# Verify new requester is bound to same launcher
256+
expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$launcherlb" ]'
257+
258+
# Verify launcher is bound to new requester
259+
expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$reqlb3" ]'
260+
261+
# Verify the new requester is using isc2
262+
expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "'$isc2'" ]'
263+
264+
# Wait for requester to be ready (launcher should already be ready)
265+
date
266+
kubectl wait --for condition=Ready pod/$reqlb3 --timeout=30s
267+
kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
268+
269+
cheer Successful multiple instances sharing one launcher
270+
271+
: Switch Instances In One Launcher
272+
273+
# Scale requester to 0 again
274+
kubectl scale rs $rslb --replicas=0
275+
276+
expect "kubectl get pods -o name -l app=dp-example,instance=$instlb | grep -c '^pod/' || true | grep -w 0"
277+
! kubectl get pod $reqlb3
278+
279+
# Launcher should remain
280+
kubectl get pod $launcherlb
281+
282+
# Verify launcher is unbound
283+
expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "" ]'
284+
285+
# Patch ReplicaSet back to use original isc
286+
kubectl patch rs $rslb --type=json -p='[{"op": "replace", "path": "/spec/template/metadata/annotations/dual-pods.llm-d.ai~1inference-server-config", "value": "'$isc'"}]'
287+
288+
sleep 5
289+
290+
# Scale back up (should reuse same launcher and wake first instance)
291+
kubectl scale rs $rslb --replicas=1
292+
293+
expect "kubectl get pods -o name -l app=dp-example,instance=$instlb | grep -c '^pod/' | grep -w 1"
294+
295+
reqlb4=$(kubectl get pods -o name -l app=dp-example,instance=$instlb | sed s%pod/%%)
296+
297+
# Should still be using the same launcher pod
298+
launcherlb4=$(kubectl get pods -o name -l dual-pods.llm-d.ai/launcher-config-name=$lc | sed s%pod/%%)
299+
[ "$launcherlb4" == "$launcherlb" ]
300+
301+
# Verify new requester is bound to same launcher
302+
expect '[ "$(kubectl get pod $reqlb4 -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$launcherlb" ]'
303+
304+
# Verify launcher is bound to new requester
305+
expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$reqlb4" ]'
306+
307+
# Verify the new requester is using original isc
308+
expect '[ "$(kubectl get pod $reqlb4 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "'$isc'" ]'
309+
310+
# Wait for requester to be ready (launcher should already be ready)
311+
date
312+
kubectl wait --for condition=Ready pod/$reqlb4 --timeout=30s
313+
kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
314+
315+
cheer Successful switching instances in one launcher
316+
224317
: Clean up launcher-based workloads
225318

226319
kubectl delete rs $rslb --ignore-not-found=true
227320
kubectl delete inferenceserverconfig $isc --ignore-not-found=true
321+
kubectl delete inferenceserverconfig $isc2 --ignore-not-found=true
228322
kubectl delete launcherconfig $lc --ignore-not-found=true
229323
expect '[ $(kubectl get pods -o name | grep -c "^pod/my-request-") == "0" ]'
230324

0 commit comments

Comments
 (0)