Skip to content

Commit 3cdedb9

Browse files
Zhiqiang Fangclaude
andcommitted
test: fix race condition in config daemon pod restart test
getConfigDaemonPod had a hard Gomega Expect assertion on pod count, causing it to panic when called inside Eventually immediately after pod deletion — before the DaemonSet replacement was created. Change getConfigDaemonPod to return (*corev1.Pod, error) so callers inside Eventually can retry gracefully on transient zero-pod states. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent d0f8550 commit 3cdedb9

2 files changed

Lines changed: 13 additions & 8 deletions

File tree

test/conformance/tests/test_aws_platform.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,8 @@ var _ = Describe("[sriov] aws platform", Ordered, func() {
182182
savedStatus := nodeState.Status
183183

184184
By("delete config daemon pod on the node")
185-
configDaemonPod := getConfigDaemonPod(node)
185+
configDaemonPod, err := getConfigDaemonPod(node)
186+
Expect(err).ToNot(HaveOccurred())
186187
oldPodName := configDaemonPod.Name
187188
err = clients.Pods(operatorNamespace).Delete(context.Background(), configDaemonPod.Name, metav1.DeleteOptions{
188189
GracePeriodSeconds: ptr.To(int64(0)),
@@ -191,9 +192,9 @@ var _ = Describe("[sriov] aws platform", Ordered, func() {
191192

192193
By("waiting for the new config daemon pod to be running")
193194
Eventually(func() bool {
194-
newPod := getConfigDaemonPod(node)
195-
// Make sure we got a different pod (new one)
196-
if newPod.Name == oldPodName {
195+
newPod, err := getConfigDaemonPod(node)
196+
// Make sure the replacement pod exists and is different from the deleted one
197+
if err != nil || newPod.Name == oldPodName {
197198
return false
198199
}
199200
return newPod.Status.Phase == corev1.PodRunning

test/conformance/tests/test_sriov_operator.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,20 +1964,24 @@ func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, n
19641964
})))
19651965
}
19661966

1967-
func getConfigDaemonPod(nodeName string) *corev1.Pod {
1967+
func getConfigDaemonPod(nodeName string) (*corev1.Pod, error) {
19681968
pods := &corev1.PodList{}
19691969
label, err := labels.Parse("app=sriov-network-config-daemon")
19701970
Expect(err).ToNot(HaveOccurred())
19711971
field, err := fields.ParseSelector(fmt.Sprintf("spec.nodeName=%s", nodeName))
19721972
Expect(err).ToNot(HaveOccurred())
19731973
err = clients.List(context.Background(), pods, &runtimeclient.ListOptions{Namespace: operatorNamespace, LabelSelector: label, FieldSelector: field})
19741974
Expect(err).ToNot(HaveOccurred())
1975-
Expect(len(pods.Items)).To(Equal(1))
1976-
return &pods.Items[0]
1975+
if len(pods.Items) != 1 {
1976+
return nil, fmt.Errorf("expected 1 config daemon pod on node %s, got %d", nodeName, len(pods.Items))
1977+
}
1978+
return &pods.Items[0], nil
19771979
}
19781980

19791981
func runCommandOnConfigDaemon(nodeName string, command ...string) (string, string, error) {
1980-
output, errOutput, err := pod.ExecCommand(clients, getConfigDaemonPod(nodeName), command...)
1982+
daemonPod, err := getConfigDaemonPod(nodeName)
1983+
Expect(err).ToNot(HaveOccurred())
1984+
output, errOutput, err := pod.ExecCommand(clients, daemonPod, command...)
19811985
return output, errOutput, err
19821986
}
19831987

0 commit comments

Comments
 (0)