-
Notifications
You must be signed in to change notification settings - Fork 271
self-heal BD secret when having a hash mismatch #4963
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,233 @@ | ||
| package bundle | ||
|
|
||
| import ( | ||
| "time" | ||
|
|
||
| . "github.com/onsi/ginkgo/v2" | ||
| . "github.com/onsi/gomega" | ||
|
|
||
| "github.com/rancher/fleet/integrationtests/utils" | ||
| "github.com/rancher/fleet/internal/helmvalues" | ||
| fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" | ||
|
|
||
| corev1 "k8s.io/api/core/v1" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| "k8s.io/apimachinery/pkg/types" | ||
| "sigs.k8s.io/controller-runtime/pkg/client" | ||
| ) | ||
|
|
||
| var _ = Describe("BD secret / ValuesHash inconsistency deadlock", func() { | ||
| var ( | ||
| bundleNS string | ||
| clusterNS string | ||
| cluster *fleet.Cluster | ||
| bundle *fleet.Bundle | ||
| testID string | ||
| ) | ||
|
|
||
| BeforeEach(func() { | ||
| var err error | ||
| testID, err = utils.NewNamespaceName() | ||
| Expect(err).ToNot(HaveOccurred()) | ||
| if len(testID) > 8 { | ||
| testID = testID[:8] | ||
| } | ||
|
|
||
| bundleNS = "bdl-bns-" + testID | ||
| clusterNS = "bdl-cns-" + testID | ||
|
|
||
| Expect(k8sClient.Create(ctx, &corev1.Namespace{ | ||
| ObjectMeta: metav1.ObjectMeta{Name: bundleNS}, | ||
| })).ToNot(HaveOccurred()) | ||
| Expect(k8sClient.Create(ctx, &corev1.Namespace{ | ||
| ObjectMeta: metav1.ObjectMeta{Name: clusterNS}, | ||
| })).ToNot(HaveOccurred()) | ||
|
|
||
| cluster, err = utils.CreateCluster(ctx, k8sClient, "test-cluster-"+testID, bundleNS, | ||
| map[string]string{"bdl-env": testID}, clusterNS) | ||
| Expect(err).ToNot(HaveOccurred()) | ||
| }) | ||
|
|
||
| AfterEach(func() { | ||
| if bundle != nil { | ||
| Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, bundle))).To(Succeed()) | ||
| // Wait for BDs to be cleaned up before deleting namespaces. | ||
| Eventually(func(g Gomega) { | ||
| bdList := &fleet.BundleDeploymentList{} | ||
| g.Expect(k8sClient.List(ctx, bdList, client.InNamespace(clusterNS))).To(Succeed()) | ||
| g.Expect(bdList.Items).To(BeEmpty()) | ||
| }, 30*time.Second, time.Second).Should(Succeed()) | ||
| } | ||
| Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, cluster))).To(Succeed()) | ||
| Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: bundleNS}}))).To(Succeed()) | ||
| Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: clusterNS}}))).To(Succeed()) | ||
| }) | ||
|
|
||
| // createBundleWithValues builds a bundle that targets the test cluster with the given Helm values. | ||
| createBundleWithValues := func(name string, values map[string]interface{}) *fleet.Bundle { | ||
| b := &fleet.Bundle{ | ||
| ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: bundleNS}, | ||
| Spec: fleet.BundleSpec{ | ||
| Targets: []fleet.BundleTarget{ | ||
| { | ||
| ClusterSelector: &metav1.LabelSelector{ | ||
| MatchLabels: map[string]string{"bdl-env": testID}, | ||
| }, | ||
| BundleDeploymentOptions: fleet.BundleDeploymentOptions{ | ||
| Helm: &fleet.HelmOptions{ | ||
| Values: &fleet.GenericMap{Data: values}, | ||
| }, | ||
| }, | ||
| }, | ||
| }, | ||
| Resources: []fleet.BundleResource{{ | ||
| Name: "test.yaml", | ||
| Content: "apiVersion: v1\nkind: ConfigMap\nmetadata:\n name: test\ndata:\n key: value", | ||
| }}, | ||
| }, | ||
| } | ||
| Expect(k8sClient.Create(ctx, b)).ToNot(HaveOccurred()) | ||
| return b | ||
| } | ||
|
|
||
| // waitForBD waits until the BundleDeployment for bundleName has a non-empty ValuesHash, | ||
| // meaning the controller completed at least one successful reconcile and wrote the secret. | ||
| waitForBD := func(bundleName string) *fleet.BundleDeployment { | ||
| var bd *fleet.BundleDeployment | ||
| Eventually(func(g Gomega) { | ||
| bdList := &fleet.BundleDeploymentList{} | ||
| g.Expect(k8sClient.List(ctx, bdList, client.InNamespace(clusterNS))).To(Succeed()) | ||
| for i := range bdList.Items { | ||
| item := &bdList.Items[i] | ||
| if item.Labels[fleet.BundleLabel] == bundleName { | ||
| g.Expect(item.Spec.ValuesHash).ToNot(BeEmpty(), | ||
| "BD should have a ValuesHash set by manageOptionsSecret") | ||
| bd = item | ||
| return | ||
| } | ||
| } | ||
| g.Expect(false).To(BeTrue(), "BundleDeployment not found for bundle "+bundleName) | ||
| }).Should(Succeed()) | ||
| return bd | ||
| } | ||
|
|
||
| // corruptBDSecret overwrites the BD secret with data that hashes differently from the BD's | ||
| // current ValuesHash, simulating the state left when manageOptionsSecret succeeded but | ||
| // createBundleDeployment failed in a previous reconcile cycle. | ||
| corruptBDSecret := func(bd *fleet.BundleDeployment) { | ||
| staleValues := []byte(`replicas: 999`) | ||
| Eventually(func(g Gomega) { | ||
| secret := &corev1.Secret{} | ||
| g.Expect(k8sClient.Get(ctx, types.NamespacedName{ | ||
| Name: bd.Name, Namespace: bd.Namespace, | ||
| }, secret)).To(Succeed()) | ||
|
|
||
| // Sanity check: hashes must be consistent before we corrupt. | ||
| h := helmvalues.HashOptions(secret.Data[helmvalues.ValuesKey], secret.Data[helmvalues.StagedValuesKey]) | ||
| g.Expect(h).To(Equal(bd.Spec.ValuesHash), "pre-condition: secret and BD must be consistent") | ||
|
|
||
| secret.Data[helmvalues.ValuesKey] = staleValues | ||
| g.Expect(k8sClient.Update(ctx, secret)).To(Succeed()) | ||
| }).Should(Succeed()) | ||
| } | ||
|
|
||
| // getBundleReadyMessage returns the message from the bundle's Ready condition, or "" if absent. | ||
| getBundleReadyMessage := func(b *fleet.Bundle) string { | ||
| latest := &fleet.Bundle{} | ||
| if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(b), latest); err != nil { | ||
| return "" | ||
| } | ||
| for _, c := range latest.Status.Conditions { | ||
| if c.Type == "Ready" { | ||
| return c.Message | ||
| } | ||
| } | ||
| return "" | ||
| } | ||
|
|
||
| // When the BD secret and BD.Spec.ValuesHash are inconsistent, the bundle | ||
| // controller should self-heal: it must re-synchronise the secret and clear | ||
| // the targeting error without requiring a manual patch. | ||
| It("self-heals after the BD secret content diverges from ValuesHash", func() { | ||
| By("creating a bundle with Helm values so manageOptionsSecret writes a secret") | ||
| bundleName := "bdl-selfheal-" + testID | ||
| bundle = createBundleWithValues(bundleName, map[string]interface{}{"data": 8}) | ||
|
|
||
| By("waiting for the BD and its options secret to be created with a consistent hash") | ||
| bd := waitForBD(bundleName) | ||
|
|
||
| By("corrupting the BD secret (simulates: manageOptionsSecret ran but createBundleDeployment failed)") | ||
| corruptBDSecret(bd) | ||
|
|
||
| // Verify the mismatch is real before letting the controller see it. | ||
| secret := &corev1.Secret{} | ||
| Expect(k8sClient.Get(ctx, types.NamespacedName{Name: bd.Name, Namespace: bd.Namespace}, secret)).To(Succeed()) | ||
| actualHash := helmvalues.HashOptions(secret.Data[helmvalues.ValuesKey], secret.Data[helmvalues.StagedValuesKey]) | ||
| Expect(actualHash).ToNot(Equal(bd.Spec.ValuesHash), "pre-condition: hashes must differ after corruption") | ||
|
|
||
| By("triggering a bundle reconcile so the controller detects the mismatch") | ||
| Eventually(func(g Gomega) { | ||
| latest := &fleet.Bundle{} | ||
| g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bundle), latest)).To(Succeed()) | ||
| if latest.Annotations == nil { | ||
| latest.Annotations = map[string]string{} | ||
| } | ||
| latest.Annotations["test/trigger"] = "1" | ||
| g.Expect(k8sClient.Update(ctx, latest)).To(Succeed()) | ||
| }).Should(Succeed()) | ||
|
|
||
| By("confirming the hash-mismatch targeting error appears on the bundle") | ||
| Eventually(func(g Gomega) { | ||
| msg := getBundleReadyMessage(bundle) | ||
| g.Expect(msg).To(ContainSubstring("hash mismatch between secret and bundledeployment"), | ||
| "the hash-mismatch error should appear after the controller reconciles with the corrupted secret") | ||
| }).Should(Succeed()) | ||
|
|
||
| // The error should eventually clear without any further | ||
| // external intervention. | ||
| By("expecting the bundle to eventually clear the targeting error (self-heal)") | ||
| Eventually(func(g Gomega) { | ||
| msg := getBundleReadyMessage(bundle) | ||
| g.Expect(msg).ToNot(ContainSubstring("hash mismatch between secret and bundledeployment"), | ||
| "the hash-mismatch targeting error should clear once the controller self-heals") | ||
| }).Should(Succeed()) | ||
| }) | ||
|
|
||
| // After a secret/ValuesHash inconsistency, a subsequent bundle update (e.g. a | ||
| // new GitRepo commit with new values) should be applied to the BD. | ||
| It("applies new bundle values after recovering from a secret/ValuesHash inconsistency", func() { | ||
| By("creating a bundle with initial Helm values") | ||
| bundleName := "bdl-newvals-" + testID | ||
| bundle = createBundleWithValues(bundleName, map[string]interface{}{"data": 8}) | ||
|
|
||
| By("waiting for the BD and its options secret to be created consistently") | ||
| bd := waitForBD(bundleName) | ||
| originalHash := bd.Spec.ValuesHash | ||
|
|
||
| By("corrupting the BD secret to create the inconsistency") | ||
| corruptBDSecret(bd) | ||
|
|
||
| By("updating the bundle with new Helm values (simulates a new GitRepo commit)") | ||
| Eventually(func(g Gomega) { | ||
| latest := &fleet.Bundle{} | ||
| g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bundle), latest)).To(Succeed()) | ||
| latest.Spec.Targets[0].BundleDeploymentOptions.Helm.Values = &fleet.GenericMap{ | ||
| Data: map[string]interface{}{"data": 25}, | ||
| } | ||
| g.Expect(k8sClient.Update(ctx, latest)).To(Succeed()) | ||
| }).Should(Succeed()) | ||
|
|
||
| // After the controller reconciles the new values, the BD's | ||
| // ValuesHash should change from the original value to a new one reflecting the | ||
| // updated values. | ||
| By("expecting the BD's ValuesHash to change to reflect the new values") | ||
| Eventually(func(g Gomega) { | ||
| latestBD := &fleet.BundleDeployment{} | ||
| g.Expect(k8sClient.Get(ctx, types.NamespacedName{ | ||
| Name: bd.Name, Namespace: bd.Namespace, | ||
| }, latestBD)).To(Succeed()) | ||
| g.Expect(latestBD.Spec.ValuesHash).ToNot(Equal(originalHash), | ||
| "BD ValuesHash should be updated once the controller applies the new values") | ||
| }).Should(Succeed()) | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -265,12 +265,28 @@ func (r *BundleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr | |
|
|
||
| matchedTargets, secretsMissing, err := r.Builder.Targets(ctx, bundle, manifestID) | ||
| if err != nil { | ||
| wrappedErr := fmt.Errorf("targeting error: %w", err) | ||
| if errors.Is(err, fleetutil.ErrHashMismatch) { | ||
| // The BD options secret and BD.Spec.ValuesHash are inconsistent. This | ||
| // happens when manageOptionsSecret wrote the secret but | ||
| // createBundleDeployment failed in a previous reconcile cycle. | ||
| // Make the error visible on the bundle status, then repair by clearing | ||
| // ValuesHash on affected BDs so the next reconcile can re-sync the secret. | ||
| SetCondition(string(fleet.Ready), &bundle.Status, wrappedErr) | ||
| if statusErr := r.updateStatus(ctx, bundleOrig, bundle); statusErr != nil { | ||
| return ctrl.Result{}, statusErr | ||
| } | ||
| if repairErr := r.repairHashMismatch(ctx, bundle); repairErr != nil { | ||
| logger.Error(repairErr, "failed to repair BD secret/ValuesHash inconsistency") | ||
| } | ||
| return ctrl.Result{RequeueAfter: durations.DefaultRequeueAfter}, nil | ||
| } | ||
| return ctrl.Result{}, | ||
| r.updateErrorStatus( | ||
| ctx, | ||
| bundleOrig, | ||
| bundle, | ||
| fmt.Errorf("targeting error: %w", err), | ||
| wrappedErr, | ||
| ) | ||
| } | ||
|
|
||
|
|
@@ -591,6 +607,48 @@ func (r *BundleReconciler) manageOptionsSecret( | |
| return hash, secret, nil | ||
| } | ||
|
|
||
| // repairHashMismatch finds all BundleDeployments for the given bundle whose options secret | ||
| // content does not match their ValuesHash, and clears their ValuesHash so the next reconcile | ||
| // can re-sync the secret via manageOptionsSecret. | ||
| func (r *BundleReconciler) repairHashMismatch(ctx context.Context, bundle *fleet.Bundle) error { | ||
| bdList := &fleet.BundleDeploymentList{} | ||
| if err := r.List(ctx, bdList, client.MatchingLabels{ | ||
| fleet.BundleLabel: bundle.Name, | ||
| fleet.BundleNamespaceLabel: bundle.Namespace, | ||
| }); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| for i := range bdList.Items { | ||
| bd := &bdList.Items[i] | ||
| if bd.Spec.ValuesHash == "" { | ||
| continue | ||
| } | ||
|
|
||
| secret := &corev1.Secret{} | ||
| if err := r.Get(ctx, client.ObjectKey{Namespace: bd.Namespace, Name: bd.Name}, secret); err != nil { | ||
| if apierrors.IsNotFound(err) { | ||
| continue | ||
| } | ||
| return err | ||
| } | ||
|
|
||
| h := helmvalues.HashOptions(secret.Data[helmvalues.ValuesKey], secret.Data[helmvalues.StagedValuesKey]) | ||
| if h == bd.Spec.ValuesHash { | ||
| continue | ||
| } | ||
|
|
||
| // Clear ValuesHash so the next reconcile re-syncs the secret via manageOptionsSecret. | ||
| patch := client.MergeFrom(bd.DeepCopy()) | ||
| bd.Spec.ValuesHash = "" | ||
| if err := r.Patch(ctx, bd, patch); err != nil { | ||
| return fmt.Errorf("failed to clear ValuesHash on bundledeployment %s/%s: %w", bd.Namespace, bd.Name, err) | ||
|
Comment on lines
+641
to
+645
|
||
| } | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // ensureOwnerReferences sets bd as the owner of s, and returns any error occurring in the process. | ||
| func (r *BundleReconciler) ensureOwnerReferences(ctx context.Context, bd *fleet.BundleDeployment, s *corev1.Secret) error { | ||
| if s == nil { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.