Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 31 additions & 13 deletions internal/cmd/controller/cleanup/controllers/cleanup/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ import (
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

type handler struct {
apply apply.Apply
clusters fleetcontrollers.ClusterCache
namespaces corecontrollers.NamespaceClient
apply apply.Apply
clusters fleetcontrollers.ClusterCache
clustersClient fleetcontrollers.ClusterClient
namespaces corecontrollers.NamespaceClient
}

func Register(ctx context.Context, apply apply.Apply,
Expand All @@ -33,11 +35,13 @@ func Register(ctx context.Context, apply apply.Apply,
clusterRole rbaccontrollers.ClusterRoleController,
clusterRoleBinding rbaccontrollers.ClusterRoleBindingController,
namespaces corecontrollers.NamespaceController,
clusterCache fleetcontrollers.ClusterCache) {
clusterCache fleetcontrollers.ClusterCache,
clusterClient fleetcontrollers.ClusterClient) {
h := &handler{
apply: apply,
clusters: clusterCache,
namespaces: namespaces,
apply: apply,
clusters: clusterCache,
clustersClient: clusterClient,
namespaces: namespaces,
}

clusterRole.OnChange(ctx, "managed-cleanup", func(_ string, obj *rbacv1.ClusterRole) (*rbacv1.ClusterRole, error) {
Expand Down Expand Up @@ -90,15 +94,29 @@ func (h *handler) cleanupNamespace(key string, obj *corev1.Namespace) (*corev1.N
return obj, nil
}

// check if the cluster for this cluster namespace still exists, otherwise clean up the namespace
_, err := h.clusters.Get(obj.Annotations[fleet.ClusterNamespaceAnnotation], obj.Annotations[fleet.ClusterAnnotation])
if apierrors.IsNotFound(err) {
logrus.Infof("Cleaning up fleet-managed namespace %q, cluster not found", obj.Name)
clusterNS := obj.Annotations[fleet.ClusterNamespaceAnnotation]
clusterName := obj.Annotations[fleet.ClusterAnnotation]

err = h.namespaces.Delete(key, nil)
// check if the cluster for this cluster namespace still exists, otherwise clean up the namespace.
// First consult the informer cache; if the cache reports not-found, confirm with a live API call
// to avoid a race where the Cluster was just created and hasn't been reflected in the cache yet.
_, err := h.clusters.Get(clusterNS, clusterName)
if !apierrors.IsNotFound(err) {
return obj, err
}
return obj, err

// Cache said not-found — verify against the API server before deleting.
_, err = h.clustersClient.Get(clusterNS, clusterName, metav1.GetOptions{})
if err == nil {
// Cluster exists in the API server; the cache is stale. Do not delete the namespace.
return obj, nil
}
if !apierrors.IsNotFound(err) {
return obj, err
}

logrus.Infof("Cleaning up fleet-managed namespace %q, cluster not found", obj.Name)
return obj, h.namespaces.Delete(key, nil)
}

func (h *handler) cleanup(obj runtime.Object) error {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package cleanup

import (
"testing"

"github.com/rancher/wrangler/v3/pkg/generic/fake"
"go.uber.org/mock/gomock"

fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func TestCleanup(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Cleanup Controller Suite")
}

var _ = Describe("cleanupNamespace", func() {
var (
mockCtrl *gomock.Controller
clusterCache *fake.MockCacheInterface[*fleet.Cluster]
clusterClient *fake.MockClientInterface[*fleet.Cluster, *fleet.ClusterList]
namespaceClient *fake.MockNonNamespacedClientInterface[*corev1.Namespace, *corev1.NamespaceList]
h *handler
ns *corev1.Namespace
notFound = apierrors.NewNotFound(schema.GroupResource{Group: "fleet.cattle.io", Resource: "clusters"}, "test-cluster")
)

BeforeEach(func() {
mockCtrl = gomock.NewController(GinkgoT())
clusterCache = fake.NewMockCacheInterface[*fleet.Cluster](mockCtrl)
clusterClient = fake.NewMockClientInterface[*fleet.Cluster, *fleet.ClusterList](mockCtrl)
namespaceClient = fake.NewMockNonNamespacedClientInterface[*corev1.Namespace, *corev1.NamespaceList](mockCtrl)

h = &handler{
clusters: clusterCache,
clustersClient: clusterClient,
namespaces: namespaceClient,
}

ns = &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: "cluster-fleet-default-test-cluster-abc123",
Labels: map[string]string{
fleet.ManagedLabel: "true",
},
Annotations: map[string]string{
fleet.ClusterAnnotation: "test-cluster",
fleet.ClusterNamespaceAnnotation: "fleet-default",
},
},
}
})

Context("when namespace has no managed label", func() {
It("does not delete the namespace", func() {
ns.Labels = map[string]string{}
result, err := h.cleanupNamespace(ns.Name, ns)
Expect(err).NotTo(HaveOccurred())
Expect(result).To(Equal(ns))
})
})

Context("when namespace is nil", func() {
It("returns nil without error", func() {
result, err := h.cleanupNamespace("some-key", nil)
Expect(err).NotTo(HaveOccurred())
Expect(result).To(BeNil())
})
})

Context("when cluster exists in cache", func() {
It("does not delete the namespace", func() {
clusterCache.EXPECT().
Get("fleet-default", "test-cluster").
Return(&fleet.Cluster{}, nil)

result, err := h.cleanupNamespace(ns.Name, ns)
Expect(err).NotTo(HaveOccurred())
Expect(result).To(Equal(ns))
})
})

Context("when cluster is missing from cache but exists in API server", func() {
// This tests the fix for the race condition described in
// https://github.com/rancher/fleet/issues/3830: the cleanup
// controller's informer cache may not yet reflect a newly created
// Cluster, so a not-found cache result is confirmed via a live
// API call before the namespace is deleted.
It("does not delete the namespace", func() {
// Cache says cluster not found (stale)
clusterCache.EXPECT().
Get("fleet-default", "test-cluster").
Return(nil, notFound)

// API server confirms cluster exists
clusterClient.EXPECT().
Get("fleet-default", "test-cluster", gomock.Any()).
Return(&fleet.Cluster{}, nil)

result, err := h.cleanupNamespace(ns.Name, ns)
Expect(err).NotTo(HaveOccurred())
Expect(result).To(Equal(ns))
})
})

Context("when cluster is missing from both cache and API server", func() {
It("deletes the namespace", func() {
clusterCache.EXPECT().
Get("fleet-default", "test-cluster").
Return(nil, notFound)

clusterClient.EXPECT().
Get("fleet-default", "test-cluster", gomock.Any()).
Return(nil, notFound)

namespaceClient.EXPECT().
Delete(ns.Name, gomock.Any()).
Return(nil)

result, err := h.cleanupNamespace(ns.Name, ns)
Expect(err).NotTo(HaveOccurred())
Expect(result).To(Equal(ns))
})
})

Context("when cache lookup returns a non-NotFound error", func() {
It("returns the error without deleting", func() {
apiErr := apierrors.NewServiceUnavailable("API server unavailable")
clusterCache.EXPECT().
Get("fleet-default", "test-cluster").
Return(nil, apiErr)

result, err := h.cleanupNamespace(ns.Name, ns)
Expect(err).To(Equal(apiErr))
Expect(result).To(Equal(ns))
})
})

Context("when API server lookup returns a non-NotFound error", func() {
It("returns the error without deleting", func() {
apiErr := apierrors.NewServiceUnavailable("API server unavailable")
clusterCache.EXPECT().
Get("fleet-default", "test-cluster").
Return(nil, notFound)

clusterClient.EXPECT().
Get("fleet-default", "test-cluster", gomock.Any()).
Return(nil, apiErr)

result, err := h.cleanupNamespace(ns.Name, ns)
Expect(err).To(Equal(apiErr))
Expect(result).To(Equal(ns))
})
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func Register(ctx context.Context, appCtx *AppContext) error {
appCtx.RBAC.ClusterRoleBinding(),
appCtx.Core.Namespace(),
appCtx.Cluster().Cache(),
appCtx.Cluster(),
)

if err := appCtx.Start(ctx); err != nil {
Expand Down