Skip to content

Commit 121fa72

Browse files
author
Chun-Hung Tseng
committed
Add e2e downgrade automatic cancellation test
Verify that the downgrade can be cancelled automatically when the downgrade is completed (using `no inflight downgrade job`` as the indicator) Please see: #19365 (comment) Reference: #17976 Signed-off-by: Chun-Hung Tseng <[email protected]>
1 parent 14cf669 commit 121fa72

File tree

2 files changed

+109
-10
lines changed

2 files changed

+109
-10
lines changed

tests/e2e/cluster_downgrade_test.go

+85-8
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
122122
})
123123
}
124124
cc := epc.Etcdctl()
125-
t.Logf("Cluster created")
125+
t.Log("Cluster created")
126126
if len(epc.Procs) > 1 {
127127
t.Log("Waiting health interval to required to make membership changes")
128128
time.Sleep(etcdserver.HealthInterval)
@@ -132,7 +132,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
132132
resp, err := cc.MemberAddAsLearner(context.Background(), "fake1", []string{"http://127.0.0.1:1001"})
133133
require.NoError(t, err)
134134
if triggerSnapshot {
135-
t.Logf("Generating snapshot")
135+
t.Log("Generating snapshot")
136136
generateSnapshot(t, snapshotCount, cc)
137137
verifySnapshot(t, epc)
138138
}
@@ -142,7 +142,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
142142
beforeMembers, beforeKV := getMembersAndKeys(t, cc)
143143

144144
if triggerCancellation == cancelRightBeforeEnable {
145-
t.Logf("Cancelling downgrade before enabling")
145+
t.Log("Cancelling downgrade before enabling")
146146
e2e.DowngradeCancel(t, epc)
147147
t.Log("Downgrade cancelled, validating if cluster is in the right state")
148148
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, currentVersion))
@@ -151,7 +151,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
151151
}
152152
e2e.DowngradeEnable(t, epc, lastVersion)
153153
if triggerCancellation == cancelRightAfterEnable {
154-
t.Logf("Cancelling downgrade right after enabling (no node is downgraded yet)")
154+
t.Log("Cancelling downgrade right after enabling (no node is downgraded yet)")
155155
e2e.DowngradeCancel(t, epc)
156156
t.Log("Downgrade cancelled, validating if cluster is in the right state")
157157
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, currentVersion))
@@ -188,7 +188,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
188188
resp, err = cc.MemberAddAsLearner(context.Background(), "fake2", []string{"http://127.0.0.1:1002"})
189189
require.NoError(t, err)
190190
if triggerSnapshot {
191-
t.Logf("Generating snapshot")
191+
t.Log("Generating snapshot")
192192
generateSnapshot(t, snapshotCount, cc)
193193
verifySnapshot(t, epc)
194194
}
@@ -207,6 +207,77 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
207207
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
208208
}
209209

210+
func TestAutomaticDowngradeCancellationAfterCompletingDowngradingInClusterOf3(t *testing.T) {
211+
clusterSize := 3
212+
213+
currentEtcdBinary := e2e.BinPath.Etcd
214+
lastReleaseBinary := e2e.BinPath.EtcdLastRelease
215+
if !fileutil.Exist(lastReleaseBinary) {
216+
t.Skipf("%q does not exist", lastReleaseBinary)
217+
}
218+
219+
currentVersion, err := e2e.GetVersionFromBinary(currentEtcdBinary)
220+
require.NoError(t, err)
221+
// wipe any pre-release suffix like -alpha.0 we see commonly in builds
222+
currentVersion.PreRelease = ""
223+
224+
lastVersion, err := e2e.GetVersionFromBinary(lastReleaseBinary)
225+
require.NoError(t, err)
226+
227+
require.Equalf(t, lastVersion.Minor, currentVersion.Minor-1, "unexpected minor version difference")
228+
currentVersionStr := currentVersion.String()
229+
lastVersionStr := lastVersion.String()
230+
231+
lastClusterVersion := semver.New(lastVersionStr)
232+
lastClusterVersion.Patch = 0
233+
234+
e2e.BeforeTest(t)
235+
236+
t.Logf("Create cluster with version %s", currentVersionStr)
237+
var snapshotCount uint64 = 10
238+
epc := newCluster(t, clusterSize, snapshotCount)
239+
for i := 0; i < len(epc.Procs); i++ {
240+
e2e.ValidateVersion(t, epc.Cfg, epc.Procs[i], version.Versions{
241+
Cluster: currentVersionStr,
242+
Server: version.Version,
243+
Storage: currentVersionStr,
244+
})
245+
}
246+
cc := epc.Etcdctl()
247+
t.Log("Cluster created")
248+
if len(epc.Procs) > 1 {
249+
t.Log("Waiting health interval to required to make membership changes")
250+
time.Sleep(etcdserver.HealthInterval)
251+
}
252+
253+
t.Log("Adding member to test membership, but a learner avoid breaking quorum")
254+
resp, err := cc.MemberAddAsLearner(context.Background(), "fake1", []string{"http://127.0.0.1:1001"})
255+
require.NoError(t, err)
256+
t.Log("Removing learner to test membership")
257+
_, err = cc.MemberRemove(context.Background(), resp.Member.ID)
258+
require.NoError(t, err)
259+
beforeMembers, beforeKV := getMembersAndKeys(t, cc)
260+
261+
e2e.DowngradeEnable(t, epc, lastVersion)
262+
263+
t.Logf("Starting downgrade process for all nodes to %q", lastVersionStr)
264+
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, []int{0, 1, 2}, currentVersion, lastClusterVersion)
265+
require.NoError(t, err)
266+
267+
afterMembers, afterKV := getMembersAndKeys(t, cc)
268+
assert.Equal(t, beforeKV.Kvs, afterKV.Kvs)
269+
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
270+
271+
if len(epc.Procs) > 1 {
272+
t.Log("Waiting health interval to required to make membership changes")
273+
time.Sleep(etcdserver.HealthInterval)
274+
}
275+
276+
e2e.DowngradeAutoCancelCheck(t, epc)
277+
t.Log("Downgrade cancellation is automatically cancelled since the cluster has been downgraded, validating if cluster is in the right state")
278+
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, lastClusterVersion))
279+
}
280+
210281
func newCluster(t *testing.T, clusterSize int, snapshotCount uint64) *e2e.EtcdProcessCluster {
211282
epc, err := e2e.NewEtcdProcessCluster(context.TODO(), t,
212283
e2e.WithClusterSize(clusterSize),
@@ -250,7 +321,7 @@ func generateSnapshot(t *testing.T, snapshotCount uint64, cc *e2e.EtcdctlV3) {
250321
defer cancel()
251322

252323
var i uint64
253-
t.Logf("Adding keys")
324+
t.Log("Adding keys")
254325
for i = 0; i < snapshotCount*3; i++ {
255326
err := cc.Put(ctx, fmt.Sprintf("%d", i), "1", config.PutOptions{})
256327
assert.NoError(t, err)
@@ -264,7 +335,7 @@ func verifySnapshot(t *testing.T, epc *e2e.EtcdProcessCluster) {
264335
_, err := ss.Load()
265336
require.NoError(t, err)
266337
}
267-
t.Logf("All members have a valid snapshot")
338+
t.Log("All members have a valid snapshot")
268339
}
269340

270341
func verifySnapshotMembers(t *testing.T, epc *e2e.EtcdProcessCluster, expectedMembers *clientv3.MemberListResponse) {
@@ -301,11 +372,17 @@ func getMembersAndKeys(t *testing.T, cc *e2e.EtcdctlV3) (*clientv3.MemberListRes
301372
func generateIdenticalVersions(clusterSize int, ver *semver.Version) []*version.Versions {
302373
ret := make([]*version.Versions, clusterSize)
303374

375+
// storage version string is non-empty starting from 3.6.0
376+
storageStr := ver.String()
377+
if ver.LessThan(version.V3_6) {
378+
storageStr = ""
379+
}
380+
304381
for i := range clusterSize {
305382
ret[i] = &version.Versions{
306383
Cluster: ver.String(),
307384
Server: ver.String(),
308-
Storage: ver.String(),
385+
Storage: storageStr,
309386
}
310387
}
311388

tests/framework/e2e/downgrade.go

+24-2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"go.uber.org/zap"
2929

3030
"go.etcd.io/etcd/api/v3/version"
31+
"go.etcd.io/etcd/pkg/v3/expect"
3132
"go.etcd.io/etcd/tests/v3/framework/testutils"
3233
)
3334

@@ -58,7 +59,7 @@ func DowngradeCancel(t *testing.T, epc *EtcdProcessCluster) {
5859
var err error
5960
testutils.ExecuteWithTimeout(t, 1*time.Minute, func() {
6061
for {
61-
t.Logf("etcdctl downgrade cancel")
62+
t.Log("etcdctl downgrade cancel")
6263
err = c.DowngradeCancel(context.TODO())
6364
if err != nil {
6465
if strings.Contains(err.Error(), "no inflight downgrade job") {
@@ -72,7 +73,7 @@ func DowngradeCancel(t *testing.T, epc *EtcdProcessCluster) {
7273
continue
7374
}
7475

75-
t.Logf("etcdctl downgrade cancel executed successfully")
76+
t.Log("etcdctl downgrade cancel executed successfully")
7677
break
7778
}
7879
})
@@ -82,6 +83,19 @@ func DowngradeCancel(t *testing.T, epc *EtcdProcessCluster) {
8283
t.Log("Cluster downgrade cancellation is completed")
8384
}
8485

86+
func DowngradeAutoCancelCheck(t *testing.T, epc *EtcdProcessCluster) {
87+
c := epc.Etcdctl()
88+
89+
var err error
90+
testutils.ExecuteWithTimeout(t, 1*time.Minute, func() {
91+
t.Log("etcdctl downgrade cancel")
92+
err = c.DowngradeCancel(context.TODO())
93+
require.Errorf(t, err, "no inflight downgrade job")
94+
})
95+
96+
t.Log("Cluster downgrade is completed")
97+
}
98+
8599
func DowngradeUpgradeMembers(t *testing.T, lg *zap.Logger, clus *EtcdProcessCluster, numberOfMembersToChange int, currentVersion, targetVersion *semver.Version) error {
86100
membersToChange := rand.Perm(len(clus.Procs))[:numberOfMembersToChange]
87101
t.Logf("Elect members for operations on members: %v", membersToChange)
@@ -117,6 +131,14 @@ func DowngradeUpgradeMembersByID(t *testing.T, lg *zap.Logger, clus *EtcdProcess
117131
return err
118132
}
119133
}
134+
135+
if opString == "downgrading" && len(membersToChange) == len(clus.Procs) {
136+
lg.Info("Waiting for downgrade completion log line")
137+
leader := clus.WaitLeader(t)
138+
_, err := clus.Procs[leader].Logs().ExpectWithContext(context.TODO(), expect.ExpectedResponse{Value: "the cluster has been downgraded"})
139+
require.NoError(t, err)
140+
}
141+
120142
lg.Info("Validating versions")
121143
for _, memberID := range membersToChange {
122144
member := clus.Procs[memberID]

0 commit comments

Comments
 (0)