diff --git a/e2e/kube.go b/e2e/kube.go index 3b551f6f370..ab3593bf536 100644 --- a/e2e/kube.go +++ b/e2e/kube.go @@ -12,7 +12,6 @@ import ( "github.com/Azure/agentbaker/e2e/config" "github.com/Azure/agentbaker/e2e/toolkit" "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" - "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" @@ -20,7 +19,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -147,53 +145,56 @@ func (k *Kubeclient) WaitUntilPodRunning(ctx context.Context, namespace string, func (k *Kubeclient) WaitUntilNodeReady(ctx context.Context, t testing.TB, vmssName string) string { defer toolkit.LogStepf(t, "waiting for node %s to be ready", vmssName)() - var node *corev1.Node = nil - watcher, err := k.Typed.CoreV1().Nodes().Watch(ctx, metav1.ListOptions{}) - require.NoError(t, err, "failed to start watching nodes") - defer watcher.Stop() - for event := range watcher.ResultChan() { - if event.Type != watch.Added && event.Type != watch.Modified { - continue - } - - var nodeFromEvent *corev1.Node - switch v := event.Object.(type) { - case *corev1.Node: - nodeFromEvent = v + var lastSeenNode *corev1.Node + var nodeName string - default: - t.Logf("skipping object type %T", event.Object) - continue + err := wait.PollUntilContextCancel(ctx, 5*time.Second, true, func(ctx context.Context) (bool, error) { + nodes, listErr := k.Typed.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if listErr != nil { + t.Logf("failed to list nodes: %v", listErr) + if errorsk8s.IsForbidden(listErr) || errorsk8s.IsUnauthorized(listErr) { + return false, listErr + } + return false, nil } - if !strings.HasPrefix(nodeFromEvent.Name, vmssName) { - continue - } + for i := range nodes.Items { + node := &nodes.Items[i] + if !strings.HasPrefix(node.Name, vmssName) { + continue + } - // found the right node. Use it! - node = nodeFromEvent - nodeTaints, _ := json.Marshal(node.Spec.Taints) - nodeConditions, _ := json.Marshal(node.Status.Conditions) + lastSeenNode = node.DeepCopy() + nodeTaints, _ := json.Marshal(node.Spec.Taints) + nodeConditions, _ := json.Marshal(node.Status.Conditions) - for _, cond := range node.Status.Conditions { - if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { - t.Logf("node %s is ready. Taints: %s Conditions: %s", node.Name, string(nodeTaints), string(nodeConditions)) - return node.Name + for _, cond := range node.Status.Conditions { + if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { + t.Logf("node %s is ready. Taints: %s Conditions: %s", node.Name, string(nodeTaints), string(nodeConditions)) + nodeName = node.Name + return true, nil + } } + + t.Logf("node %s is not ready. Taints: %s Conditions: %s", node.Name, string(nodeTaints), string(nodeConditions)) + continue } - t.Logf("node %s is not ready. Taints: %s Conditions: %s", node.Name, string(nodeTaints), string(nodeConditions)) - } + return false, nil + }) - if node == nil { - t.Fatalf("%q haven't appeared in k8s API server", vmssName) + if err != nil { + if lastSeenNode == nil { + t.Fatalf("%q haven't appeared in k8s API server: %v", vmssName, err) + } else { + nodeString, _ := json.Marshal(lastSeenNode) + t.Fatalf("failed to wait for %q (%s) to be ready %+v. Detail: %s Err: %v", vmssName, lastSeenNode.Name, lastSeenNode.Status, string(nodeString), err) + } return "" } - nodeString, _ := json.Marshal(node) - t.Fatalf("failed to wait for %q (%s) to be ready %+v. Detail: %s", vmssName, node.Name, node.Status, string(nodeString)) - return node.Name + return nodeName } // GetPodNetworkDebugPodForNode returns a pod that's a member of the 'debugnonhost' daemonset running in the cluster - this will return