Skip to content

Commit 5b1d68e

Browse files
committed
Revert "Fix stale bundle error when cluster is offline after bad commit (#4780) (#4823)"
This reverts commit 9b1d80b.
1 parent 2bb4769 commit 5b1d68e

9 files changed

Lines changed: 8 additions & 294 deletions

File tree

e2e/assets/single-cluster/offline-bundle-stuck/Chart.yaml

Lines changed: 0 additions & 6 deletions
This file was deleted.

e2e/assets/single-cluster/offline-bundle-stuck/fleet.yaml

Lines changed: 0 additions & 2 deletions
This file was deleted.

e2e/assets/single-cluster/offline-bundle-stuck/templates/configmap.yaml

Lines changed: 0 additions & 6 deletions
This file was deleted.

e2e/single-cluster/status_test.go

Lines changed: 0 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"path"
1010
"strings"
1111

12-
gogit "github.com/go-git/go-git/v5"
1312
. "github.com/onsi/ginkgo/v2"
1413
. "github.com/onsi/gomega"
1514

@@ -263,190 +262,6 @@ var _ = Describe("Checks that template errors are shown in bundles and gitrepos"
263262
})
264263
})
265264

266-
// Checks that once a cluster goes offline after a failed deployment, the bundle
267-
// status does not permanently show the stale error after a fix commit is applied
268-
// (issue https://github.com/rancher/fleet/issues/594).
269-
var _ = Describe("Bundle status does not retain stale error for offline cluster after fix", Ordered, Label("infra-setup"), func() {
270-
const (
271-
localAgentNS = "cattle-fleet-local-system"
272-
fleetAgentDeploy = "fleet-agent"
273-
)
274-
275-
var (
276-
tmpDir string
277-
cloneDir string
278-
k kubectl.Command
279-
kAgent kubectl.Command
280-
gh *githelper.Git
281-
inClusterRepoURL string
282-
gitrepoName string
283-
clone *gogit.Repository
284-
targetNamespace string
285-
r = rand.New(rand.NewSource(GinkgoRandomSeed()))
286-
)
287-
288-
BeforeEach(func() {
289-
k = env.Kubectl.Namespace(env.Namespace)
290-
kAgent = env.Kubectl.Namespace(localAgentNS)
291-
})
292-
293-
JustBeforeEach(func() {
294-
host := githelper.BuildGitHostname()
295-
addr, err := githelper.GetExternalRepoAddr(env, port, "repo")
296-
Expect(err).ToNot(HaveOccurred())
297-
gh = githelper.NewHTTP(addr)
298-
299-
inClusterRepoURL = gh.GetInClusterURL(host, port, "repo")
300-
301-
tmpDir, _ = os.MkdirTemp("", "fleet-")
302-
cloneDir = path.Join(tmpDir, "repo")
303-
304-
gitrepoName = testenv.RandomFilename("offline-stuck", r)
305-
targetNamespace = testenv.NewNamespaceName("offline-stuck", r)
306-
307-
clone, err = gh.Create(cloneDir, testenv.AssetPath("single-cluster/offline-bundle-stuck"), "examples")
308-
Expect(err).ToNot(HaveOccurred())
309-
310-
err = testenv.ApplyTemplate(k, testenv.AssetPath("status/gitrepo.yaml"), struct {
311-
Name string
312-
Repo string
313-
Branch string
314-
TargetNamespace string
315-
}{
316-
gitrepoName,
317-
inClusterRepoURL,
318-
gh.Branch,
319-
targetNamespace,
320-
})
321-
Expect(err).ToNot(HaveOccurred())
322-
})
323-
324-
AfterAll(func() {
325-
// Ensure fleet-agent is always restored, even when the test fails midway.
326-
_, _ = kAgent.Run("scale", "deployment", fleetAgentDeploy, "--replicas=1", "--timeout=60s")
327-
328-
_ = os.RemoveAll(tmpDir)
329-
_, _ = k.Delete("gitrepo", gitrepoName)
330-
331-
Eventually(func(g Gomega) {
332-
out, _ := k.Get(
333-
"bundledeployments",
334-
"-A",
335-
"-l",
336-
fmt.Sprintf("fleet.cattle.io/repo-name=%s", gitrepoName),
337-
)
338-
g.Expect(out).To(ContainSubstring("No resources found"))
339-
}).Should(Succeed())
340-
341-
_, _ = k.Delete("ns", targetNamespace, "--wait=false")
342-
})
343-
344-
It("clears the stale error from an offline cluster once a fix commit is present", func() {
345-
bundleName := gitrepoName + "-examples"
346-
347-
By("waiting for the initial deployment to be Ready")
348-
Eventually(func(g Gomega) {
349-
status := getBundleStatus(g, k, bundleName)
350-
g.Expect(status.Summary.Ready).To(Equal(1))
351-
}).Should(Succeed())
352-
353-
By("pushing a commit that introduces a YAML parse error")
354-
badContent := `apiVersion: v1
355-
kind: ConfigMap
356-
metadata:
357-
name: offline-bundle-stuck-cm
358-
data:
359-
broken: {unclosed
360-
`
361-
err := os.WriteFile(path.Join(cloneDir, "examples", "templates", "configmap.yaml"), []byte(badContent), 0644)
362-
Expect(err).ToNot(HaveOccurred())
363-
_, err = gh.Update(clone)
364-
Expect(err).ToNot(HaveOccurred())
365-
366-
By("waiting for the bundle to reflect the YAML parse error")
367-
Eventually(func(g Gomega) {
368-
status := getBundleStatus(g, k, bundleName)
369-
g.Expect(status.Summary.Ready).To(Equal(0))
370-
found := false
371-
for _, cond := range status.Conditions {
372-
if cond.Type == string(fleet.Ready) && strings.Contains(cond.Message, "did not find expected") {
373-
found = true
374-
break
375-
}
376-
}
377-
g.Expect(found).To(BeTrue(), "expected YAML parse error in bundle conditions, got: %v", status.Conditions)
378-
}, testenv.MediumTimeout, testenv.PollingInterval).Should(Succeed())
379-
380-
By("scaling down the fleet-agent to simulate an offline cluster")
381-
out, err := kAgent.Run("scale", "deployment", fleetAgentDeploy, "--replicas=0", "--timeout=60s")
382-
Expect(err).ToNot(HaveOccurred(), out)
383-
384-
// Wait until the agent pod is gone so it cannot apply any further commits.
385-
Eventually(func(g Gomega) {
386-
out, _ := kAgent.Get("pods", "-l", "app=fleet-agent")
387-
g.Expect(out).To(ContainSubstring("No resources found"))
388-
}).Should(Succeed())
389-
390-
By("pushing an intermediate commit that does not fix the YAML error")
391-
intermediateContent := `apiVersion: v1
392-
kind: ConfigMap
393-
metadata:
394-
name: offline-bundle-stuck-cm
395-
labels:
396-
version: "2"
397-
data:
398-
broken: {unclosed
399-
`
400-
err = os.WriteFile(path.Join(cloneDir, "examples", "templates", "configmap.yaml"), []byte(intermediateContent), 0644)
401-
Expect(err).ToNot(HaveOccurred())
402-
_, err = gh.Update(clone)
403-
Expect(err).ToNot(HaveOccurred())
404-
405-
By("pushing a fix commit (valid YAML)")
406-
fixContent := `apiVersion: v1
407-
kind: ConfigMap
408-
metadata:
409-
name: offline-bundle-stuck-cm
410-
data:
411-
key: fixed
412-
`
413-
err = os.WriteFile(path.Join(cloneDir, "examples", "templates", "configmap.yaml"), []byte(fixContent), 0644)
414-
Expect(err).ToNot(HaveOccurred())
415-
_, err = gh.Update(clone)
416-
Expect(err).ToNot(HaveOccurred())
417-
418-
By("verifying the error message does not persist after the fix commit is pushed")
419-
// After the controller picks up the fix commit it updates the BD spec.
420-
// Even though the offline agent cannot apply the fix yet, the bundle
421-
// should no longer surface the stale error from the previous apply attempt.
422-
Eventually(func(g Gomega) {
423-
status := getBundleStatus(g, k, bundleName)
424-
found := false
425-
for _, cond := range status.Conditions {
426-
if cond.Type == string(fleet.Ready) {
427-
found = true
428-
g.Expect(cond.Message).NotTo(
429-
ContainSubstring("did not find expected"),
430-
"bundle Ready condition still shows stale YAML error after fix commit was pushed",
431-
)
432-
break
433-
}
434-
}
435-
g.Expect(found).To(BeTrue(), "expected Ready condition to be present, got: %v", status.Conditions)
436-
}, testenv.LongTimeout, testenv.PollingInterval).Should(Succeed())
437-
438-
By("scaling the fleet-agent back up")
439-
out, err = kAgent.Run("scale", "deployment", fleetAgentDeploy, "--replicas=1", "--timeout=60s")
440-
Expect(err).ToNot(HaveOccurred(), out)
441-
442-
By("waiting for the bundle to become Ready after the agent recovers")
443-
Eventually(func(g Gomega) {
444-
status := getBundleStatus(g, k, bundleName)
445-
g.Expect(status.Summary.Ready).To(Equal(1))
446-
}, testenv.LongTimeout, testenv.PollingInterval).Should(Succeed())
447-
})
448-
})
449-
450265
// getBundleStatus retrieves the status of the bundle with the provided name.
451266
func getBundleStatus(g Gomega, k kubectl.Command, name string) fleet.BundleStatus {
452267
gr, err := k.Get("bundle", name, "-o=json")

integrationtests/controller/bundle/status_test.go

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -186,25 +186,12 @@ var _ = Describe("Bundle Status Fields", func() {
186186
return k8sClient.Update(ctx, cluster)
187187
}).ShouldNot(HaveOccurred())
188188

189-
By("validating that the bundle deployment spec is updated")
190-
Eventually(func() bool {
191-
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "name"}, bd)
192-
Expect(err).NotTo(HaveOccurred())
193-
return bd.Spec.DeploymentID != deplIDBefore
194-
}).Should(BeTrue())
195-
196-
By("simulating the agent applying the updated bundle deployment")
197-
Eventually(func() error {
189+
// Change in cluster state results in a bundle deployment update
190+
Eventually(func(g Gomega) {
198191
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "name"}, bd)
199-
if err != nil {
200-
return err
201-
}
202-
bd.Status.AppliedDeploymentID = bd.Spec.DeploymentID
203-
return k8sClient.Status().Update(ctx, bd)
204-
}).ShouldNot(HaveOccurred())
192+
g.Expect(err).NotTo(HaveOccurred())
193+
g.Expect(bd.Spec.DeploymentID).ToNot(Equal(deplIDBefore))
205194

206-
By("verifying the bundle is ready again")
207-
Eventually(func(g Gomega) {
208195
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "name"}, bundle)
209196
g.Expect(err).NotTo(HaveOccurred())
210197
g.Expect(bundle.Status.Summary.WaitApplied).To(Equal(0))

internal/cmd/agent/deployer/deployer.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,7 @@ func deployErrToStatus(err error, status fleet.BundleDeploymentStatus) (bool, fl
285285
"(chart requires kubeVersion)|" + // kubeVersion mismatch
286286
"(annotation validation error)|" + // annotations fail to pass validation
287287
"(failed, and has been rolled back due to atomic being set)|" + // atomic is set and a rollback occurs
288-
"(YAML parse error)|" + // YAML is broken in source files (Helm v3)
289-
"(MalformedYAMLError)|" + // YAML is broken in source files (Helm v4)
288+
"(YAML parse error)|" + // YAML is broken in source files
290289
"(Forbidden: updates to [0-9A-Za-z]+ spec for fields other than [0-9A-Za-z ']+ are forbidden)|" + // trying to update fields that cannot be updated
291290
"(Forbidden: spec is immutable after creation)|" + // trying to modify immutable spec
292291
"(chart requires kubeVersion: [0-9A-Za-z\\.\\-<>=]+ which is incompatible with Kubernetes)", // trying to deploy to incompatible Kubernetes

internal/cmd/controller/summary/summary.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,7 @@ func MessageFromDeployment(deployment *fleet.BundleDeployment) string {
137137
}
138138
message := MessageFromCondition("Deployed", deployment.Status.Conditions)
139139
if message == "" {
140-
// Only surface the Installed error when it belongs to the current
141-
// deployment. If the deployment IDs differ, the Installed condition
142-
// was set by a previous (now superseded) apply attempt and the
143-
// message is stale.
144-
if deployment.Status.AppliedDeploymentID == deployment.Spec.DeploymentID {
145-
message = MessageFromCondition("Installed", deployment.Status.Conditions)
146-
}
140+
message = MessageFromCondition("Installed", deployment.Status.Conditions)
147141
}
148142
if message == "" {
149143
message = MessageFromCondition("Monitored", deployment.Status.Conditions)

internal/cmd/controller/summary/summary_test.go

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package summary_test
22

33
import (
4-
"errors"
54
"fmt"
65
"testing"
76

@@ -114,56 +113,3 @@ func TestSetReadyConditions_ReasonClearedWhenBecomingReady(t *testing.T) {
114113
c.GetReason(bundleStatus))
115114
}
116115
}
117-
118-
func setCondition(bd *fleet.BundleDeployment, condType, message string) {
119-
condition.Cond(condType).SetError(bd, "", errors.New(message))
120-
}
121-
122-
func TestMessageFromDeployment(t *testing.T) {
123-
t.Run("nil deployment returns empty string", func(t *testing.T) {
124-
if msg := summary.MessageFromDeployment(nil); msg != "" {
125-
t.Errorf("expected empty string, got %q", msg)
126-
}
127-
})
128-
129-
t.Run("Deployed condition takes priority over Installed", func(t *testing.T) {
130-
bd := &fleet.BundleDeployment{}
131-
bd.Spec.DeploymentID = "id1"
132-
bd.Status.AppliedDeploymentID = "id1"
133-
setCondition(bd, "Deployed", "deploy error")
134-
setCondition(bd, "Installed", "install error")
135-
if msg := summary.MessageFromDeployment(bd); msg != "deploy error" {
136-
t.Errorf("expected deploy error to take priority, got %q", msg)
137-
}
138-
})
139-
140-
t.Run("Installed shown when deployment IDs match", func(t *testing.T) {
141-
bd := &fleet.BundleDeployment{}
142-
bd.Spec.DeploymentID = "id1"
143-
bd.Status.AppliedDeploymentID = "id1"
144-
setCondition(bd, "Installed", "install error")
145-
if msg := summary.MessageFromDeployment(bd); msg != "install error" {
146-
t.Errorf("expected install error, got %q", msg)
147-
}
148-
})
149-
150-
t.Run("Installed suppressed when deployment IDs differ", func(t *testing.T) {
151-
bd := &fleet.BundleDeployment{}
152-
bd.Spec.DeploymentID = "id2"
153-
bd.Status.AppliedDeploymentID = "id1"
154-
setCondition(bd, "Installed", "stale install error")
155-
if msg := summary.MessageFromDeployment(bd); msg != "" {
156-
t.Errorf("expected stale Installed message to be suppressed, got %q", msg)
157-
}
158-
})
159-
160-
t.Run("Monitored used as fallback when Deployed and Installed are absent", func(t *testing.T) {
161-
bd := &fleet.BundleDeployment{}
162-
bd.Spec.DeploymentID = "id1"
163-
bd.Status.AppliedDeploymentID = "id1"
164-
setCondition(bd, "Monitored", "monitor error")
165-
if msg := summary.MessageFromDeployment(bd); msg != "monitor error" {
166-
t.Errorf("expected monitor error as fallback, got %q", msg)
167-
}
168-
})
169-
}

internal/cmd/controller/target/target.go

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -136,26 +136,13 @@ func (t *Target) state() fleet.BundleState {
136136
case nil:
137137
return fleet.Pending
138138
default:
139-
return summary.GetDeploymentState(t.effectiveDeployment())
139+
return summary.GetDeploymentState(t.Deployment)
140140
}
141141
}
142142

143143
// message returns a relevant message from the target (pure function)
144144
func (t *Target) message() string {
145-
return summary.MessageFromDeployment(t.effectiveDeployment())
146-
}
147-
148-
// effectiveDeployment returns t.Deployment with Spec.DeploymentID overridden
149-
// to t.DeploymentID, or t.Deployment unchanged when the IDs already match.
150-
// Status is computed (via state and message) before the reconciler writes the
151-
// new BD spec, so the cached Spec.DeploymentID lags at that point.
152-
func (t *Target) effectiveDeployment() *fleet.BundleDeployment {
153-
if t.Deployment == nil || t.DeploymentID == t.Deployment.Spec.DeploymentID {
154-
return t.Deployment
155-
}
156-
bd := *t.Deployment
157-
bd.Spec.DeploymentID = t.DeploymentID
158-
return &bd
145+
return summary.MessageFromDeployment(t.Deployment)
159146
}
160147

161148
// initialiseOptionsMaps initialises options and staged options maps of namespace labels and annotations on bd, if

0 commit comments

Comments
 (0)