Skip to content

Commit 4027718

Browse files
committed
fix: to get flowcontrol in place for EPP pod
Signed-off-by: Wen Zhou <wenzhou@redhat.com>
1 parent e279f43 commit 4027718

2 files changed

Lines changed: 32 additions & 7 deletions

File tree

deploy/install.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,14 @@ deploy_llm_d_infrastructure() {
10691069
}
10701070
}
10711071
]'
1072+
# Wait for EPP rollout to complete to ensure flowcontrol is enabled before tests run.
1073+
# Without this, tests may start while old EPP pod (without flowcontrol) is still serving
1074+
log_info "Waiting for EPP rollout to complete (flowcontrol enabled)..."
1075+
if kubectl rollout status deployment "$LLM_D_EPP_NAME" -n "$LLMD_NS" --timeout=120s; then
1076+
log_success "EPP rollout complete with flowcontrol enabled"
1077+
else
1078+
log_warning "EPP rollout did not complete in time - scale-from-zero test may fail"
1079+
fi
10721080
else
10731081
log_warning "Skipping inference-scheduler patch: Deployment $LLM_D_EPP_NAME not found in $LLMD_NS"
10741082
fi

test/e2e/scale_from_zero_test.go

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,29 +59,46 @@ var _ = Describe("Scale-From-Zero Feature", Label("smoke", "full"), Ordered, fun
5959
g.Expect(err).NotTo(HaveOccurred(), "EPP service should exist")
6060
}, 2*time.Minute, 5*time.Second).Should(Succeed(), "EPP service should exist")
6161

62-
// Wait for EPP pods to be ready
62+
// Wait for EPP pods to be ready with flowcontrol enabled
6363
Eventually(func(g Gomega) {
6464
podList, err := k8sClient.CoreV1().Pods(cfg.LLMDNamespace).List(ctx, metav1.ListOptions{
6565
LabelSelector: fmt.Sprintf("inferencepool=%s", eppServiceName),
6666
})
6767
g.Expect(err).NotTo(HaveOccurred(), "Should be able to list pods")
6868
g.Expect(len(podList.Items)).To(BeNumerically(">", 0), "EPP pods should exist")
6969

70-
// Check that at least one pod is ready
71-
hasReadyPod := false
70+
// Check that at least one pod is ready with flowcontrol enabled
71+
hasReadyPodWithFlowControl := false
7272
for _, pod := range podList.Items {
73+
isReady := false
7374
for _, condition := range pod.Status.Conditions {
7475
if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue {
75-
hasReadyPod = true
76+
isReady = true
7677
break
7778
}
7879
}
79-
if hasReadyPod {
80+
if !isReady {
81+
continue
82+
}
83+
// double ehck flowcontrol is enabled as env var
84+
for _, container := range pod.Spec.Containers {
85+
for _, env := range container.Env {
86+
if env.Name == "ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER" && env.Value == "true" {
87+
hasReadyPodWithFlowControl = true
88+
break
89+
}
90+
}
91+
if hasReadyPodWithFlowControl {
92+
break
93+
}
94+
}
95+
if hasReadyPodWithFlowControl {
8096
break
8197
}
8298
}
83-
g.Expect(hasReadyPod).To(BeTrue(), "At least one EPP pod should be ready")
84-
}, 2*time.Minute, 5*time.Second).Should(Succeed(), "EPP pods should be ready")
99+
g.Expect(hasReadyPodWithFlowControl).To(BeTrue(),
100+
"At least one ready EPP pod should have ENABLE_EXPERIMENTAL_FLOW_CONTROL_LAYER=true")
101+
}, 3*time.Minute, 5*time.Second).Should(Succeed(), "EPP pods should be ready with flowcontrol enabled")
85102

86103
// Additional delay to ensure the datastore is fully populated after EPP is ready
87104
time.Sleep(5 * time.Second)

0 commit comments

Comments
 (0)