Skip to content

Commit 7fdff84

Browse files
authored
E2E: Add waiting for MP Pods to be cleaned up in AfterSuite (#758)
*Issue #, if available:* N/A *Description of changes:* Only MP Pod sharing tests in the suite properly wait for MP Pod cleanup, which can take up to 4min in case we need to rely on stale attachment cleaner in controller (due to eventual consistency issue in K8s controller runtime framework). When `uninstall_driver` runs, helm deletes the release and the `mount-s3` namespace starts terminating. If Mountpoint pods are slow to terminate (no controller/stale-attachment-cleaner to clean them up since it was just deleted), the namespace gets stuck in `Terminating`. On the next CI run, the `helm upgrade --install` fails because Kubernetes refuses to create resources in the terminating `mount-s3` namespace. Example: https://github.com/awslabs/mountpoint-s3-csi-driver/actions/runs/23316182156/job/68086361510?pr=752 ``` Error: failed to create resource: roles.rbac.authorization.k8s.io "s3-csi-driver-controller-role" is forbidden: unable to create new content in namespace mount-s3 because it is being terminated ``` The fix: Add waiting for MP Pods to be cleaned up properly in AfterSuite by stale attachment cleaner in controller. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent 08d6598 commit 7fdff84

1 file changed

Lines changed: 30 additions & 0 deletions

File tree

tests/e2e-kubernetes/e2e_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
package e2e
22

33
import (
4+
"context"
45
"flag"
56
"testing"
7+
"time"
68

79
"github.com/awslabs/mountpoint-s3-csi-driver/tests/e2e-kubernetes/s3client"
810
custom_testsuites "github.com/awslabs/mountpoint-s3-csi-driver/tests/e2e-kubernetes/testsuites"
911

1012
ginkgo "github.com/onsi/ginkgo/v2"
1113
"github.com/onsi/gomega"
14+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1215
f "k8s.io/kubernetes/test/e2e/framework"
1316
"k8s.io/kubernetes/test/e2e/storage/framework"
1417
"k8s.io/kubernetes/test/e2e/storage/testsuites"
@@ -81,6 +84,33 @@ func getCSITestSuites() []func() framework.TestSuite {
8184
return suites
8285
}
8386

87+
// Wait for all Mountpoint pods in mount-s3 namespace to be cleaned up after tests complete.
88+
// This ensures the mount-s3 namespace is not stuck with stale pods when the driver is uninstalled,
89+
// which would cause the namespace to get stuck in Terminating state and block the next CI run's install.
90+
var _ = ginkgo.SynchronizedAfterSuite(func() {}, func() {
91+
cs, err := f.LoadClientset()
92+
f.ExpectNoError(err, "creating kubernetes client")
93+
94+
ctx := context.Background()
95+
f.Logf("Waiting for Mountpoint pods in mount-s3 namespace to be cleaned up...")
96+
gomega.Eventually(ctx, func(ctx context.Context) (int, error) {
97+
pods, err := cs.CoreV1().Pods("mount-s3").List(ctx, metav1.ListOptions{})
98+
if err != nil {
99+
return 0, err
100+
}
101+
if len(pods.Items) > 0 {
102+
names := make([]string, len(pods.Items))
103+
for i, pod := range pods.Items {
104+
names[i] = pod.Name
105+
}
106+
f.Logf("Still waiting for %d Mountpoint pod(s) to be cleaned up: %v", len(pods.Items), names)
107+
}
108+
return len(pods.Items), nil
109+
}).WithTimeout(5*time.Minute).WithPolling(10*time.Second).Should(gomega.Equal(0),
110+
"Mountpoint pods in mount-s3 namespace were not cleaned up in time")
111+
f.Logf("All Mountpoint pods cleaned up successfully")
112+
})
113+
84114
// This executes testSuites for csi volumes.
85115
var _ = utils.SIGDescribe("CSI Volumes", func() {
86116
var testSuites []func() framework.TestSuite

0 commit comments

Comments
 (0)