File tree Expand file tree Collapse file tree 2 files changed +2
-11
lines changed
Expand file tree Collapse file tree 2 files changed +2
-11
lines changed Original file line number Diff line number Diff line change 1717 options: "--model HuggingFaceTB/SmolLM2-360M-Instruct"
1818 env_vars:
1919 VLLM_SERVER_DEV_MODE: "1"
20- VLLM_USE_V1: "1"
2120 VLLM_LOGGING_LEVEL: "DEBUG"
2221 VLLM_CPU_KVCACHE_SPACE: "1" # GiB
2322 labels:
3837 options: "--model Qwen/Qwen2.5-0.5B-Instruct"
3938 env_vars:
4039 VLLM_SERVER_DEV_MODE: "1"
41- VLLM_USE_V1: "1"
4240 VLLM_LOGGING_LEVEL: "DEBUG"
4341 VLLM_CPU_KVCACHE_SPACE: "1" # GiB
4442 labels:
5957 options: "--model TinyLlama/TinyLlama-1.1B-Chat-v1.0"
6058 env_vars:
6159 VLLM_SERVER_DEV_MODE: "1"
62- VLLM_USE_V1: "1"
6360 VLLM_LOGGING_LEVEL: "DEBUG"
6461 VLLM_CPU_KVCACHE_SPACE: "1" # GiB
6562 labels:
Original file line number Diff line number Diff line change @@ -278,12 +278,9 @@ expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.labels.dual-pods\\.l
278278# Verify launcher is bound to new requester
279279expect ' [ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$reqlb3" ]'
280280
281- # Verify the new requester is using isc2
282- expect ' [ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "' $isc2 ' " ]'
283-
284281# Wait for requester to be ready (launcher should already be ready)
285282date
286- kubectl wait --for condition=Ready pod/$reqlb3 --timeout=30s
283+ kubectl wait --for condition=Ready pod/$reqlb3 --timeout=120s
287284kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
288285
289286cheer Successful multiple instances sharing one launcher
@@ -321,12 +318,9 @@ expect '[ "$(kubectl get pod $reqlb4 -o jsonpath={.metadata.labels.dual-pods\\.l
321318# Verify launcher is bound to new requester
322319expect ' [ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$reqlb4" ]'
323320
324- # Verify the new requester is using original isc
325- expect ' [ "$(kubectl get pod $reqlb4 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "' $isc ' " ]'
326-
327321# Wait for requester to be ready (launcher should already be ready)
328322date
329- kubectl wait --for condition=Ready pod/$reqlb4 --timeout=30s
323+ kubectl wait --for condition=Ready pod/$reqlb4 --timeout=120s
330324kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
331325
332326cheer Successful switching instances in one launcher
You can’t perform that action at this time.
0 commit comments