From 9c1563b80a78dbae531888807aed5fe995d7817b Mon Sep 17 00:00:00 2001 From: "jesse.mirza@nslogin.nl" Date: Mon, 16 Mar 2026 13:41:04 +0100 Subject: [PATCH] Fix doNotDeploy not removing off-schedule BundleDeployments cleanupOrphanedBundleDeployments preserved any BD with Spec.OffSchedule=true, even when that BD was no longer targeted (e.g. because doNotDeploy was set in fleet.yaml after the app was already deployed). This left the app installed on the downstream cluster indefinitely until the cluster re-entered its maintenance window. The OffSchedule flag is intended to prevent updates/deployments during a maintenance-window gap, not to block removal of BDs that are entirely untargeted. BDs that are still actively targeted are already preserved by the uidsToKeep check; the additional `|| bd.Spec.OffSchedule` guard was therefore both redundant for targeted BDs and harmful for untargeted ones. Remove the OffSchedule guard from cleanupOrphanedBundleDeployments so that a BD is always deleted when its cluster is no longer targeted, regardless of schedule state. Add an integration test that explicitly sets OffSchedule=true on the BD before enabling doNotDeploy, confirming the BD is cleaned up. Co-Authored-By: Claude Sonnet 4.6 --- .../controller/bundle/bundle_targets_test.go | 64 +++++++++++++++++++ .../reconciler/bundle_controller.go | 10 ++- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/integrationtests/controller/bundle/bundle_targets_test.go b/integrationtests/controller/bundle/bundle_targets_test.go index a7cae80ed0..41d7b17052 100644 --- a/integrationtests/controller/bundle/bundle_targets_test.go +++ b/integrationtests/controller/bundle/bundle_targets_test.go @@ -588,6 +588,70 @@ var _ = Describe("Bundle targets", Ordered, func() { } }) }) + + // Regression test: when doNotDeploy is set after deployment and the existing + // BundleDeployment has OffSchedule=true (cluster is in a maintenance window), + // cleanupOrphanedBundleDeployments must still delete the BD. Previously the + // OffSchedule guard in that function prevented deletion, leaving the app installed. + When("setting doNotDeploy after deployment when the existing BundleDeployment is off-schedule", func() { + BeforeEach(func() { + bundleName = "do-not-deploy-offschedule" + bdLabels = map[string]string{ + "fleet.cattle.io/bundle-name": bundleName, + "fleet.cattle.io/bundle-namespace": namespace, + } + expectedNumberOfBundleDeployments = 3 + targets = []v1alpha1.BundleTarget{ + { + ClusterGroup: "one", + }, + { + ClusterGroup: "two", + }, + } + targetsInGitRepo := []v1alpha1.BundleTarget{ + { + ClusterGroup: "all", + }, + } + targetRestrictions = make([]v1alpha1.BundleTarget, len(targetsInGitRepo)) + copy(targetRestrictions, targetsInGitRepo) + targets = append(targets, targetsInGitRepo...) + }) + + It("removes the BundleDeployment for cluster two even when it is marked off-schedule", func() { + _ = verifyBundlesDeploymentsAreCreated(expectedNumberOfBundleDeployments, bdLabels, bundleName) + + By("marking the BundleDeployment for cluster two as off-schedule") + bdList := &v1alpha1.BundleDeploymentList{} + err := k8sClient.List(ctx, bdList, client.MatchingLabels(bdLabels)) + Expect(err).ToNot(HaveOccurred()) + for _, bd := range bdList.Items { + if strings.Contains(bd.Namespace, "cluster-two") { + patch := client.MergeFrom(bd.DeepCopy()) + bd.Spec.OffSchedule = true + Expect(k8sClient.Patch(ctx, &bd, patch)).ToNot(HaveOccurred()) + } + } + + By("setting doNotDeploy for cluster two") + clusterBundle := &v1alpha1.Bundle{} + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: bundleName}, clusterBundle) + Expect(err).ToNot(HaveOccurred()) + for i, t := range clusterBundle.Spec.Targets { + if t.ClusterGroup == "two" { + clusterBundle.Spec.Targets[i].DoNotDeploy = true + } + } + Expect(k8sClient.Update(ctx, clusterBundle)).ToNot(HaveOccurred()) + + By("checking that the off-schedule BundleDeployment for cluster two is deleted") + bdList = verifyBundlesDeploymentsAreCreated(2, bdLabels, bundleName) + for _, bd := range bdList.Items { + Expect(bd.Namespace).ToNot(ContainSubstring("cluster-two")) + } + }) + }) }) func verifyBundlesDeploymentsAreCreated(numBundleDeployments int, bdLabels map[string]string, bundleName string) *v1alpha1.BundleDeploymentList { diff --git a/internal/cmd/controller/reconciler/bundle_controller.go b/internal/cmd/controller/reconciler/bundle_controller.go index b495cc9691..bdea23c45e 100644 --- a/internal/cmd/controller/reconciler/bundle_controller.go +++ b/internal/cmd/controller/reconciler/bundle_controller.go @@ -891,9 +891,13 @@ func (r *BundleReconciler) cleanupOrphanedBundleDeployments(ctx context.Context, return err } toDelete := slices.DeleteFunc(list, func(bd fleet.BundleDeployment) bool { - // don't delete BundleDeployments that are not in schedule as - // that would uninstall the deployment in the agent - return uidsToKeep.Has(bd.UID) || bd.Spec.OffSchedule + // Only keep BDs that are still actively targeted. + // The OffSchedule flag prevents deploying/updating a BD during an off-schedule + // window, but it must not block removal of BDs for clusters that are no longer + // targeted at all (e.g. because doNotDeploy was set). Those BDs are absent from + // uidsToKeep and must be deleted unconditionally so the agent can uninstall the + // app from the downstream cluster. + return uidsToKeep.Has(bd.UID) }) return batchDeleteBundleDeployments(ctx, r.Client, toDelete) }