llm-d-incubation · MikeSpreitzer · Mar 6, 2026 · Mar 6, 2026 · Copilot · Mar 6, 2026
diff --git a/test/e2e/mkobjs.sh b/test/e2e/mkobjs.sh
@@ -17,7 +17,6 @@ spec:
     options: "--model HuggingFaceTB/SmolLM2-360M-Instruct"
     env_vars:
       VLLM_SERVER_DEV_MODE: "1"
-      VLLM_USE_V1: "1"
       VLLM_LOGGING_LEVEL: "DEBUG"
       VLLM_CPU_KVCACHE_SPACE: "1" # GiB
     labels:
@@ -38,7 +37,6 @@ spec:
     options: "--model Qwen/Qwen2.5-0.5B-Instruct"
     env_vars:
       VLLM_SERVER_DEV_MODE: "1"
-      VLLM_USE_V1: "1"
       VLLM_LOGGING_LEVEL: "DEBUG"
       VLLM_CPU_KVCACHE_SPACE: "1" # GiB
     labels:
@@ -59,7 +57,6 @@ spec:
     options: "--model TinyLlama/TinyLlama-1.1B-Chat-v1.0"
     env_vars:
       VLLM_SERVER_DEV_MODE: "1"
-      VLLM_USE_V1: "1"
       VLLM_LOGGING_LEVEL: "DEBUG"
       VLLM_CPU_KVCACHE_SPACE: "1" # GiB
     labels:

diff --git a/test/e2e/run-launcher-based.sh b/test/e2e/run-launcher-based.sh
@@ -278,12 +278,9 @@ expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.labels.dual-pods\\.l
 # Verify launcher is bound to new requester
 expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$reqlb3" ]'
 
-# Verify the new requester is using isc2
-expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "'$isc2'" ]'
-
 # Wait for requester to be ready (launcher should already be ready)
 date
-kubectl wait --for condition=Ready pod/$reqlb3 --timeout=30s
+kubectl wait --for condition=Ready pod/$reqlb3 --timeout=120s
-kubectl wait --for condition=Ready pod/$reqlb3 --timeout=120s
+kubectl wait --for condition=Ready pod/$reqlb3 --timeout=120s
+# Verify requester is using the patched inference server config (isc2)
+expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "$isc2" ]'
-kubectl wait --for condition=Ready pod/$reqlb3 --timeout=120s
+kubectl wait --for condition=Ready pod/$reqlb3 --timeout=120s
+# Verify requester is using the patched inference server config (isc2)
+expect '[ "$(kubectl get pod $reqlb3 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "$isc2" ]'
 kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
 
 cheer Successful multiple instances sharing one launcher
@@ -321,12 +318,9 @@ expect '[ "$(kubectl get pod $reqlb4 -o jsonpath={.metadata.labels.dual-pods\\.l
 # Verify launcher is bound to new requester
 expect '[ "$(kubectl get pod $launcherlb -o jsonpath={.metadata.labels.dual-pods\\.llm-d\\.ai/dual})" == "$reqlb4" ]'
 
-# Verify the new requester is using original isc
-expect '[ "$(kubectl get pod $reqlb4 -o jsonpath={.metadata.annotations.dual-pods\\.llm-d\\.ai/inference-server-config})" == "'$isc'" ]'
-
 # Wait for requester to be ready (launcher should already be ready)
 date
-kubectl wait --for condition=Ready pod/$reqlb4 --timeout=30s
+kubectl wait --for condition=Ready pod/$reqlb4 --timeout=120s
 kubectl wait --for condition=Ready pod/$launcherlb --timeout=5s
 
 cheer Successful switching instances in one launcher