Skip to content

Commit a5ec30b

Browse files
test: fix flaky operator-upgrade keeper writes (#233)
1 parent 124b588 commit a5ec30b

2 files changed

Lines changed: 21 additions & 12 deletions

File tree

test/deploy/deploy_test.go

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -426,13 +426,16 @@ var _ = Describe("Operator upgrade", Ordered, Label("upgrade"), func() {
426426
"--for=condition=Ready", "clickhousecluster/"+chName)).To(Succeed())
427427

428428
By("writing test data", func() {
429-
keeperClient, err := testutil.NewKeeperClient(ctx, dialer, &keeperCR)
430-
Expect(err).NotTo(HaveOccurred())
429+
// A freshly formed keeper ensemble re-elects for a while, so retry through transient leader loss.
430+
Eventually(func(g Gomega) {
431+
keeperClient, err := testutil.NewKeeperClient(ctx, dialer, &keeperCR)
432+
g.Expect(err).NotTo(HaveOccurred())
431433

432-
defer keeperClient.Close()
434+
defer keeperClient.Close()
433435

434-
Expect(keeperClient.CheckWrite(0)).To(Succeed())
435-
Expect(keeperClient.CheckRead(0)).To(Succeed())
436+
g.Expect(keeperClient.CheckWrite(0)).To(Succeed())
437+
g.Expect(keeperClient.CheckRead(0)).To(Succeed())
438+
}, "2m", "5s").Should(Succeed())
436439

437440
chClient, err := testutil.NewClickHouseClient(ctx, dialer, &chCR)
438441
Expect(err).NotTo(HaveOccurred())
@@ -467,14 +470,17 @@ var _ = Describe("Operator upgrade", Ordered, Label("upgrade"), func() {
467470
g.Expect(meta.IsStatusConditionTrue(cluster.Status.Conditions, v1.ConditionTypeReady)).To(BeTrue())
468471
}, "10m", "5s").Should(Succeed())
469472

470-
keeperClient, err := testutil.NewKeeperClient(ctx, dialer, &keeperCR)
471-
Expect(err).NotTo(HaveOccurred())
473+
// Keeper re-elects after the rolling restart, so retry through transient leader loss.
474+
Eventually(func(g Gomega) {
475+
keeperClient, err := testutil.NewKeeperClient(ctx, dialer, &keeperCR)
476+
g.Expect(err).NotTo(HaveOccurred())
472477

473-
defer keeperClient.Close()
478+
defer keeperClient.Close()
474479

475-
Expect(keeperClient.CheckRead(0)).To(Succeed())
476-
Expect(keeperClient.CheckWrite(1)).To(Succeed())
477-
Expect(keeperClient.CheckRead(1)).To(Succeed())
480+
g.Expect(keeperClient.CheckRead(0)).To(Succeed())
481+
g.Expect(keeperClient.CheckWrite(1)).To(Succeed())
482+
g.Expect(keeperClient.CheckRead(1)).To(Succeed())
483+
}, "2m", "5s").Should(Succeed())
478484
})
479485

480486
By("updating clickhouse and verifying its health", func() {

test/testutil/keeper_client.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package testutil
33
import (
44
"context"
55
"crypto/tls"
6+
"errors"
67
"fmt"
78
"net"
89
"strconv"
@@ -134,7 +135,9 @@ func (c *KeeperClient) Close() {
134135
func (c *KeeperClient) CheckWrite(order int) error {
135136
for i := range 10 {
136137
path := fmt.Sprintf(keeperTestDataKey, order, i)
137-
if _, err := c.client.Create(path, []byte(fmt.Sprintf(keeperTestDataVal, i)), 0, nil); err != nil {
138+
139+
_, err := c.client.Create(path, []byte(fmt.Sprintf(keeperTestDataVal, i)), 0, nil)
140+
if err != nil && !errors.Is(err, zk.ErrNodeExists) {
138141
return fmt.Errorf("creating test data failed: %w", err)
139142
}
140143

0 commit comments

Comments
 (0)