|
9 | 9 | "path" |
10 | 10 | "strings" |
11 | 11 |
|
| 12 | + gogit "github.com/go-git/go-git/v5" |
12 | 13 | . "github.com/onsi/ginkgo/v2" |
13 | 14 | . "github.com/onsi/gomega" |
14 | 15 |
|
@@ -262,6 +263,190 @@ var _ = Describe("Checks that template errors are shown in bundles and gitrepos" |
262 | 263 | }) |
263 | 264 | }) |
264 | 265 |
|
| 266 | +// Checks that once a cluster goes offline after a failed deployment, the bundle |
| 267 | +// status does not permanently show the stale error after a fix commit is applied |
| 268 | +// (issue https://github.com/rancher/fleet/issues/594). |
| 269 | +var _ = Describe("Bundle status does not retain stale error for offline cluster after fix", Ordered, Label("infra-setup"), func() { |
| 270 | + const ( |
| 271 | + localAgentNS = "cattle-fleet-local-system" |
| 272 | + fleetAgentDeploy = "fleet-agent" |
| 273 | + ) |
| 274 | + |
| 275 | + var ( |
| 276 | + tmpDir string |
| 277 | + cloneDir string |
| 278 | + k kubectl.Command |
| 279 | + kAgent kubectl.Command |
| 280 | + gh *githelper.Git |
| 281 | + inClusterRepoURL string |
| 282 | + gitrepoName string |
| 283 | + clone *gogit.Repository |
| 284 | + targetNamespace string |
| 285 | + r = rand.New(rand.NewSource(GinkgoRandomSeed())) |
| 286 | + ) |
| 287 | + |
| 288 | + BeforeEach(func() { |
| 289 | + k = env.Kubectl.Namespace(env.Namespace) |
| 290 | + kAgent = env.Kubectl.Namespace(localAgentNS) |
| 291 | + }) |
| 292 | + |
| 293 | + JustBeforeEach(func() { |
| 294 | + host := githelper.BuildGitHostname() |
| 295 | + addr, err := githelper.GetExternalRepoAddr(env, port, "repo") |
| 296 | + Expect(err).ToNot(HaveOccurred()) |
| 297 | + gh = githelper.NewHTTP(addr) |
| 298 | + |
| 299 | + inClusterRepoURL = gh.GetInClusterURL(host, port, "repo") |
| 300 | + |
| 301 | + tmpDir, _ = os.MkdirTemp("", "fleet-") |
| 302 | + cloneDir = path.Join(tmpDir, "repo") |
| 303 | + |
| 304 | + gitrepoName = testenv.RandomFilename("offline-stuck", r) |
| 305 | + targetNamespace = testenv.NewNamespaceName("offline-stuck", r) |
| 306 | + |
| 307 | + clone, err = gh.Create(cloneDir, testenv.AssetPath("single-cluster/offline-bundle-stuck"), "examples") |
| 308 | + Expect(err).ToNot(HaveOccurred()) |
| 309 | + |
| 310 | + err = testenv.ApplyTemplate(k, testenv.AssetPath("status/gitrepo.yaml"), struct { |
| 311 | + Name string |
| 312 | + Repo string |
| 313 | + Branch string |
| 314 | + TargetNamespace string |
| 315 | + }{ |
| 316 | + gitrepoName, |
| 317 | + inClusterRepoURL, |
| 318 | + gh.Branch, |
| 319 | + targetNamespace, |
| 320 | + }) |
| 321 | + Expect(err).ToNot(HaveOccurred()) |
| 322 | + }) |
| 323 | + |
| 324 | + AfterAll(func() { |
| 325 | + // Ensure fleet-agent is always restored, even when the test fails midway. |
| 326 | + _, _ = kAgent.Run("scale", "deployment", fleetAgentDeploy, "--replicas=1", "--timeout=60s") |
| 327 | + |
| 328 | + _ = os.RemoveAll(tmpDir) |
| 329 | + _, _ = k.Delete("gitrepo", gitrepoName) |
| 330 | + |
| 331 | + Eventually(func(g Gomega) { |
| 332 | + out, _ := k.Get( |
| 333 | + "bundledeployments", |
| 334 | + "-A", |
| 335 | + "-l", |
| 336 | + fmt.Sprintf("fleet.cattle.io/repo-name=%s", gitrepoName), |
| 337 | + ) |
| 338 | + g.Expect(out).To(ContainSubstring("No resources found")) |
| 339 | + }).Should(Succeed()) |
| 340 | + |
| 341 | + _, _ = k.Delete("ns", targetNamespace, "--wait=false") |
| 342 | + }) |
| 343 | + |
| 344 | + It("clears the stale error from an offline cluster once a fix commit is present", func() { |
| 345 | + bundleName := gitrepoName + "-examples" |
| 346 | + |
| 347 | + By("waiting for the initial deployment to be Ready") |
| 348 | + Eventually(func(g Gomega) { |
| 349 | + status := getBundleStatus(g, k, bundleName) |
| 350 | + g.Expect(status.Summary.Ready).To(Equal(1)) |
| 351 | + }).Should(Succeed()) |
| 352 | + |
| 353 | + By("pushing a commit that introduces a YAML parse error") |
| 354 | + badContent := `apiVersion: v1 |
| 355 | +kind: ConfigMap |
| 356 | +metadata: |
| 357 | + name: offline-bundle-stuck-cm |
| 358 | +data: |
| 359 | + broken: {unclosed |
| 360 | +` |
| 361 | + err := os.WriteFile(path.Join(cloneDir, "examples", "templates", "configmap.yaml"), []byte(badContent), 0644) |
| 362 | + Expect(err).ToNot(HaveOccurred()) |
| 363 | + _, err = gh.Update(clone) |
| 364 | + Expect(err).ToNot(HaveOccurred()) |
| 365 | + |
| 366 | + By("waiting for the bundle to reflect the YAML parse error") |
| 367 | + Eventually(func(g Gomega) { |
| 368 | + status := getBundleStatus(g, k, bundleName) |
| 369 | + g.Expect(status.Summary.Ready).To(Equal(0)) |
| 370 | + found := false |
| 371 | + for _, cond := range status.Conditions { |
| 372 | + if cond.Type == string(fleet.Ready) && strings.Contains(cond.Message, "did not find expected") { |
| 373 | + found = true |
| 374 | + break |
| 375 | + } |
| 376 | + } |
| 377 | + g.Expect(found).To(BeTrue(), "expected YAML parse error in bundle conditions, got: %v", status.Conditions) |
| 378 | + }, testenv.MediumTimeout, testenv.PollingInterval).Should(Succeed()) |
| 379 | + |
| 380 | + By("scaling down the fleet-agent to simulate an offline cluster") |
| 381 | + out, err := kAgent.Run("scale", "deployment", fleetAgentDeploy, "--replicas=0", "--timeout=60s") |
| 382 | + Expect(err).ToNot(HaveOccurred(), out) |
| 383 | + |
| 384 | + // Wait until the agent pod is gone so it cannot apply any further commits. |
| 385 | + Eventually(func(g Gomega) { |
| 386 | + out, _ := kAgent.Get("pods", "-l", "app=fleet-agent") |
| 387 | + g.Expect(out).To(ContainSubstring("No resources found")) |
| 388 | + }).Should(Succeed()) |
| 389 | + |
| 390 | + By("pushing an intermediate commit that does not fix the YAML error") |
| 391 | + intermediateContent := `apiVersion: v1 |
| 392 | +kind: ConfigMap |
| 393 | +metadata: |
| 394 | + name: offline-bundle-stuck-cm |
| 395 | + labels: |
| 396 | + version: "2" |
| 397 | +data: |
| 398 | + broken: {unclosed |
| 399 | +` |
| 400 | + err = os.WriteFile(path.Join(cloneDir, "examples", "templates", "configmap.yaml"), []byte(intermediateContent), 0644) |
| 401 | + Expect(err).ToNot(HaveOccurred()) |
| 402 | + _, err = gh.Update(clone) |
| 403 | + Expect(err).ToNot(HaveOccurred()) |
| 404 | + |
| 405 | + By("pushing a fix commit (valid YAML)") |
| 406 | + fixContent := `apiVersion: v1 |
| 407 | +kind: ConfigMap |
| 408 | +metadata: |
| 409 | + name: offline-bundle-stuck-cm |
| 410 | +data: |
| 411 | + key: fixed |
| 412 | +` |
| 413 | + err = os.WriteFile(path.Join(cloneDir, "examples", "templates", "configmap.yaml"), []byte(fixContent), 0644) |
| 414 | + Expect(err).ToNot(HaveOccurred()) |
| 415 | + _, err = gh.Update(clone) |
| 416 | + Expect(err).ToNot(HaveOccurred()) |
| 417 | + |
| 418 | + By("verifying the error message does not persist after the fix commit is pushed") |
| 419 | + // After the controller picks up the fix commit it updates the BD spec. |
| 420 | + // Even though the offline agent cannot apply the fix yet, the bundle |
| 421 | + // should no longer surface the stale error from the previous apply attempt. |
| 422 | + Eventually(func(g Gomega) { |
| 423 | + status := getBundleStatus(g, k, bundleName) |
| 424 | + found := false |
| 425 | + for _, cond := range status.Conditions { |
| 426 | + if cond.Type == string(fleet.Ready) { |
| 427 | + found = true |
| 428 | + g.Expect(cond.Message).NotTo( |
| 429 | + ContainSubstring("did not find expected"), |
| 430 | + "bundle Ready condition still shows stale YAML error after fix commit was pushed", |
| 431 | + ) |
| 432 | + break |
| 433 | + } |
| 434 | + } |
| 435 | + g.Expect(found).To(BeTrue(), "expected Ready condition to be present, got: %v", status.Conditions) |
| 436 | + }, testenv.LongTimeout, testenv.PollingInterval).Should(Succeed()) |
| 437 | + |
| 438 | + By("scaling the fleet-agent back up") |
| 439 | + out, err = kAgent.Run("scale", "deployment", fleetAgentDeploy, "--replicas=1", "--timeout=60s") |
| 440 | + Expect(err).ToNot(HaveOccurred(), out) |
| 441 | + |
| 442 | + By("waiting for the bundle to become Ready after the agent recovers") |
| 443 | + Eventually(func(g Gomega) { |
| 444 | + status := getBundleStatus(g, k, bundleName) |
| 445 | + g.Expect(status.Summary.Ready).To(Equal(1)) |
| 446 | + }, testenv.LongTimeout, testenv.PollingInterval).Should(Succeed()) |
| 447 | + }) |
| 448 | +}) |
| 449 | + |
265 | 450 | // getBundleStatus retrieves the status of the bundle with the provided name. |
266 | 451 | func getBundleStatus(g Gomega, k kubectl.Command, name string) fleet.BundleStatus { |
267 | 452 | gr, err := k.Get("bundle", name, "-o=json") |
|
0 commit comments