Skip to content

Commit 15134ba

Browse files
committed
Add e2e test to verify etcd is able to automatically fix the issue
caused by #19557 Signed-off-by: Benjamin Wang <[email protected]>
1 parent 3c65dfa commit 15134ba

File tree

2 files changed

+172
-17
lines changed

2 files changed

+172
-17
lines changed

tests/e2e/ctl_v3_member_test.go

+171-16
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ import (
2424

2525
"github.com/stretchr/testify/require"
2626

27+
"go.etcd.io/bbolt"
2728
"go.etcd.io/etcd/api/v3/etcdserverpb"
29+
"go.etcd.io/etcd/client/pkg/v3/types"
30+
"go.etcd.io/etcd/server/v3/datadir"
31+
"go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32+
"go.etcd.io/etcd/server/v3/mvcc/buckets"
2833
"go.etcd.io/etcd/tests/v3/framework/e2e"
2934
)
3035

@@ -230,31 +235,181 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230235
return e2e.SpawnWithExpectWithEnv(cmdArgs, cx.envMap, " updated in cluster ")
231236
}
232237

238+
// TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239+
// issue caused by https://github.com/etcd-io/etcd/issues/19557.
233240
func TestCtlV3PromotingLearner(t *testing.T) {
234-
e2e.BeforeTest(t)
241+
testCases := []struct {
242+
name string
243+
snapshotCount int
244+
promotionSuccess bool
245+
}{
246+
{
247+
name: "create snapshot after learner promotion which is not saved to v3store",
248+
snapshotCount: 10,
249+
},
250+
{
251+
name: "not create snapshot and learner promotion is not saved to v3store",
252+
snapshotCount: 0,
253+
},
254+
{
255+
name: "not create snapshot and learner promotion is saved to v3store",
256+
snapshotCount: 0,
257+
promotionSuccess: true,
258+
},
259+
}
260+
261+
for _, tc := range testCases {
262+
t.Run(tc.name, func(t *testing.T) {
263+
t.Log("Create a single node etcd cluster")
264+
cfg := e2e.NewConfigNoTLS()
265+
cfg.BasePeerScheme = "unix"
266+
cfg.ClusterSize = 1
267+
if tc.snapshotCount != 0 {
268+
cfg.SnapshotCount = tc.snapshotCount
269+
}
270+
271+
epc, err := e2e.NewEtcdProcessCluster(t, cfg)
272+
require.NoError(t, err, "failed to start etcd cluster: %v", err)
273+
defer func() {
274+
derr := epc.Close()
275+
require.NoError(t, derr, "failed to close etcd cluster: %v", derr)
276+
}()
277+
278+
t.Log("Add and start a learner")
279+
learnerID, err := epc.StartNewProc(nil, true, t)
280+
require.NoError(t, err)
281+
282+
t.Log("Write a key to ensure the cluster is healthy so far")
283+
etcdctl := epc.Procs[0].Etcdctl(e2e.ClientNonTLS, false, false)
284+
err = etcdctl.Put("foo", "bar")
285+
require.NoError(t, err)
286+
287+
t.Logf("Promoting the learner %x", learnerID)
288+
resp, err := etcdctl.MemberPromote(learnerID)
289+
require.NoError(t, err)
290+
291+
var promotedMember *etcdserverpb.Member
292+
for _, m := range resp.Members {
293+
if m.ID == learnerID {
294+
promotedMember = m
295+
break
296+
}
297+
}
298+
require.NotNil(t, promotedMember)
299+
t.Logf("The promoted member: %+v", promotedMember)
300+
301+
t.Log("Ensure all members are voting members")
302+
ensureAllMembersAreVotingMembers(t, etcdctl)
235303

236-
t.Log("Create a single node etcd cluster")
237-
cfg := e2e.NewConfigNoTLS()
238-
cfg.BasePeerScheme = "unix"
239-
cfg.ClusterSize = 1
304+
if tc.snapshotCount != 0 {
305+
t.Logf("Write %d keys to trigger a snapshot", tc.snapshotCount)
306+
for i := 0; i < tc.snapshotCount; i++ {
307+
err = etcdctl.Put(fmt.Sprintf("key_%d", i), fmt.Sprintf("value_%d", i))
308+
require.NoError(t, err)
309+
}
310+
}
240311

241-
epc, err := e2e.NewEtcdProcessCluster(t, cfg)
242-
require.NoError(t, err, "failed to start etcd cluster: %v", err)
312+
if tc.promotionSuccess {
313+
t.Log("Skip manually changing the already promoted learner to a learner")
314+
} else {
315+
t.Logf("Stopping the already promoted member")
316+
require.NoError(t, epc.Procs[1].Stop())
317+
318+
t.Log("Manually changing the already promoted member to a learner again")
319+
promotedMember.IsLearner = true
320+
mustSaveMemberIntoBbolt(t, epc.Procs[1].Config().DataDirPath, promotedMember)
321+
322+
t.Log("Starting the member again")
323+
require.NoError(t, epc.Procs[1].Start())
324+
}
325+
326+
t.Log("Checking all members are ready to serve client requests")
327+
for i := 0; i < len(epc.Procs); i++ {
328+
e2e.AssertProcessLogs(t, epc.Procs[i], e2e.EtcdServerReadyLines[0])
329+
}
330+
331+
// Wait for the learner published attribute to be applied by all members in the cluster
332+
t.Log("Write a key to ensure the the learner published attribute has been applied by all members")
333+
err = etcdctl.Put("foo", "bar")
334+
require.NoError(t, err)
335+
336+
t.Log("Ensure all members are voting members again")
337+
for i := 0; i < len(epc.Procs); i++ {
338+
t.Logf("Stopping the member: %d", i)
339+
require.NoError(t, epc.Procs[i].Stop())
340+
341+
t.Logf("Checking all members in member's backend store: %d", i)
342+
ensureAllMembersFromV3StoreAreVotingMembers(t, epc.Procs[i].Config().DataDirPath)
343+
344+
t.Logf("Starting the member again: %d", i)
345+
require.NoError(t, epc.Procs[i].Start())
346+
}
347+
})
348+
}
349+
}
350+
351+
func mustSaveMemberIntoBbolt(t *testing.T, dataDir string, protoMember *etcdserverpb.Member) {
352+
dbPath := datadir.ToBackendFileName(dataDir)
353+
db, err := bbolt.Open(dbPath, 0666, nil)
354+
require.NoError(t, err)
243355
defer func() {
244-
derr := epc.Close()
245-
require.NoError(t, derr, "failed to close etcd cluster: %v", derr)
356+
require.NoError(t, db.Close())
246357
}()
247358

248-
t.Log("Add and start a learner")
249-
learnerID, err := epc.StartNewProc(nil, true, t)
359+
m := &membership.Member{
360+
ID: types.ID(protoMember.ID),
361+
RaftAttributes: membership.RaftAttributes{
362+
PeerURLs: protoMember.PeerURLs,
363+
IsLearner: protoMember.IsLearner,
364+
},
365+
Attributes: membership.Attributes{
366+
Name: protoMember.Name,
367+
ClientURLs: protoMember.ClientURLs,
368+
},
369+
}
370+
371+
err = db.Update(func(tx *bbolt.Tx) error {
372+
b := tx.Bucket(buckets.Members.Name())
373+
374+
mkey := []byte(m.ID.String())
375+
mvalue, err := json.Marshal(m)
376+
require.NoError(t, err)
377+
378+
return b.Put(mkey, mvalue)
379+
})
250380
require.NoError(t, err)
381+
}
251382

252-
t.Log("Write a key to ensure the cluster is healthy so far")
253-
etcdctl := epc.Procs[0].Etcdctl(e2e.ClientNonTLS, false, false)
254-
err = etcdctl.Put("foo", "bar")
383+
func ensureAllMembersAreVotingMembers(t *testing.T, etcdctl *e2e.Etcdctl) {
384+
memberListResp, err := etcdctl.MemberList()
255385
require.NoError(t, err)
386+
for _, m := range memberListResp.Members {
387+
require.False(t, m.IsLearner)
388+
}
389+
}
256390

257-
t.Logf("Promoting the learner %x", learnerID)
258-
_, err = etcdctl.MemberPromote(learnerID)
391+
func ensureAllMembersFromV3StoreAreVotingMembers(t *testing.T, dataDir string) {
392+
dbPath := datadir.ToBackendFileName(dataDir)
393+
db, err := bbolt.Open(dbPath, 0400, &bbolt.Options{ReadOnly: true})
259394
require.NoError(t, err)
395+
defer func() {
396+
require.NoError(t, db.Close())
397+
}()
398+
399+
var members []membership.Member
400+
_ = db.View(func(tx *bbolt.Tx) error {
401+
b := tx.Bucket(buckets.Members.Name())
402+
_ = b.ForEach(func(k, v []byte) error {
403+
m := membership.Member{}
404+
err := json.Unmarshal(v, &m)
405+
require.NoError(t, err)
406+
members = append(members, m)
407+
return nil
408+
})
409+
return nil
410+
})
411+
412+
for _, m := range members {
413+
require.Falsef(t, m.IsLearner, "member is still learner: %+v", m)
414+
}
260415
}

tests/go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ require (
3030
github.com/spf13/cobra v1.1.3
3131
github.com/spf13/pflag v1.0.5
3232
github.com/stretchr/testify v1.9.0
33+
go.etcd.io/bbolt v1.3.11
3334
go.etcd.io/etcd/api/v3 v3.5.19
3435
go.etcd.io/etcd/client/pkg/v3 v3.5.19
3536
go.etcd.io/etcd/client/v2 v2.305.19
@@ -78,7 +79,6 @@ require (
7879
github.com/sirupsen/logrus v1.9.3 // indirect
7980
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
8081
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
81-
go.etcd.io/bbolt v1.3.11 // indirect
8282
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 // indirect
8383
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.20.0 // indirect
8484
go.opentelemetry.io/otel/metric v1.20.0 // indirect

0 commit comments

Comments
 (0)