Skip to content

Commit f1aa95b

Browse files
committed
Add e2e test to verify etcd is able to automatically fix the issue
caused by #19557 Signed-off-by: Benjamin Wang <[email protected]>
1 parent 3c65dfa commit f1aa95b

File tree

2 files changed

+132
-18
lines changed

2 files changed

+132
-18
lines changed

tests/e2e/ctl_v3_member_test.go

+131-17
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ import (
2424

2525
"github.com/stretchr/testify/require"
2626

27+
"go.etcd.io/bbolt"
2728
"go.etcd.io/etcd/api/v3/etcdserverpb"
29+
"go.etcd.io/etcd/client/pkg/v3/types"
30+
"go.etcd.io/etcd/server/v3/datadir"
31+
"go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32+
"go.etcd.io/etcd/server/v3/mvcc/buckets"
2833
"go.etcd.io/etcd/tests/v3/framework/e2e"
2934
)
3035

@@ -230,31 +235,140 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230235
return e2e.SpawnWithExpectWithEnv(cmdArgs, cx.envMap, " updated in cluster ")
231236
}
232237

238+
// TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239+
// issue caused by https://github.com/etcd-io/etcd/issues/19557.
233240
func TestCtlV3PromotingLearner(t *testing.T) {
234-
e2e.BeforeTest(t)
241+
testCases := []struct {
242+
name string
243+
snapshotCount int
244+
promotionSuccess bool
245+
}{
246+
{
247+
name: "create snapshot after learner promotion and not saved to v3store",
248+
snapshotCount: 10,
249+
},
250+
{
251+
name: "no snapshot and learner promotion not saved to v3store",
252+
snapshotCount: 0,
253+
},
254+
{
255+
name: "no snapshot and learner promotion saved to v3store",
256+
snapshotCount: 0,
257+
promotionSuccess: true,
258+
},
259+
}
260+
261+
for _, tc := range testCases {
262+
t.Run(tc.name, func(t *testing.T) {
263+
264+
t.Log("Create a single node etcd cluster")
265+
cfg := e2e.NewConfigNoTLS()
266+
cfg.BasePeerScheme = "unix"
267+
cfg.ClusterSize = 1
268+
if tc.snapshotCount != 0 {
269+
cfg.SnapshotCount = tc.snapshotCount
270+
}
271+
272+
epc, err := e2e.NewEtcdProcessCluster(t, cfg)
273+
require.NoError(t, err, "failed to start etcd cluster: %v", err)
274+
defer func() {
275+
derr := epc.Close()
276+
require.NoError(t, derr, "failed to close etcd cluster: %v", derr)
277+
}()
278+
279+
t.Log("Add and start a learner")
280+
learnerID, err := epc.StartNewProc(nil, true, t)
281+
require.NoError(t, err)
282+
283+
t.Log("Write a key to ensure the cluster is healthy so far")
284+
etcdctl := epc.Procs[0].Etcdctl(e2e.ClientNonTLS, false, false)
285+
err = etcdctl.Put("foo", "bar")
286+
require.NoError(t, err)
287+
288+
t.Logf("Promoting the learner %x", learnerID)
289+
resp, err := etcdctl.MemberPromote(learnerID)
290+
require.NoError(t, err)
291+
292+
var promotedMember *etcdserverpb.Member
293+
for _, m := range resp.Members {
294+
if m.ID == learnerID {
295+
promotedMember = m
296+
break
297+
}
298+
}
299+
require.NotNil(t, promotedMember)
300+
t.Logf("The promoted member: %+v", promotedMember)
301+
302+
t.Log("Ensure all members are voting members")
303+
ensureAllMembersAreVotingMembers(t, etcdctl)
304+
305+
if tc.snapshotCount != 0 {
306+
t.Logf("Write %d keys to trigger a snapshot", tc.snapshotCount)
307+
for i := 0; i < tc.snapshotCount; i++ {
308+
err = etcdctl.Put(fmt.Sprintf("key_%d", i), fmt.Sprintf("value_%d", i))
309+
require.NoError(t, err)
310+
}
311+
}
312+
313+
t.Logf("Stopping the first member")
314+
require.NoError(t, epc.Procs[0].Stop())
315+
316+
if tc.promotionSuccess {
317+
t.Log("Skip manually changing the already promoted learner to a member")
318+
} else {
319+
t.Log("Manually changing the already promoted learner to a member again")
320+
promotedMember.IsLearner = true
321+
mustSaveMemberIntoBbolt(t, epc.Procs[0].Config().DataDirPath, promotedMember)
322+
}
235323

236-
t.Log("Create a single node etcd cluster")
237-
cfg := e2e.NewConfigNoTLS()
238-
cfg.BasePeerScheme = "unix"
239-
cfg.ClusterSize = 1
324+
t.Log("Starting the first member again")
325+
require.NoError(t, epc.Procs[0].Start())
240326

241-
epc, err := e2e.NewEtcdProcessCluster(t, cfg)
242-
require.NoError(t, err, "failed to start etcd cluster: %v", err)
327+
t.Log("Checking the auto-sync learner log message")
328+
e2e.AssertProcessLogs(t, epc.Procs[0], e2e.EtcdServerReadyLines[0])
329+
330+
t.Log("Ensure all members are voting members again")
331+
ensureAllMembersAreVotingMembers(t, etcdctl)
332+
})
333+
}
334+
}
335+
336+
func mustSaveMemberIntoBbolt(t *testing.T, dataDir string, protoMember *etcdserverpb.Member) {
337+
dbPath := datadir.ToBackendFileName(dataDir)
338+
db, err := bbolt.Open(dbPath, 0666, nil)
339+
require.NoError(t, err)
243340
defer func() {
244-
derr := epc.Close()
245-
require.NoError(t, derr, "failed to close etcd cluster: %v", derr)
341+
require.NoError(t, db.Close())
246342
}()
247343

248-
t.Log("Add and start a learner")
249-
learnerID, err := epc.StartNewProc(nil, true, t)
250-
require.NoError(t, err)
344+
m := &membership.Member{
345+
ID: types.ID(protoMember.ID),
346+
RaftAttributes: membership.RaftAttributes{
347+
PeerURLs: protoMember.PeerURLs,
348+
IsLearner: protoMember.IsLearner,
349+
},
350+
Attributes: membership.Attributes{
351+
Name: protoMember.Name,
352+
ClientURLs: protoMember.ClientURLs,
353+
},
354+
}
355+
356+
err = db.Update(func(tx *bbolt.Tx) error {
357+
b := tx.Bucket(buckets.Members.Name())
358+
359+
mkey := []byte(m.ID.String())
360+
mvalue, err := json.Marshal(m)
361+
require.NoError(t, err)
251362

252-
t.Log("Write a key to ensure the cluster is healthy so far")
253-
etcdctl := epc.Procs[0].Etcdctl(e2e.ClientNonTLS, false, false)
254-
err = etcdctl.Put("foo", "bar")
363+
return b.Put(mkey, mvalue)
364+
})
255365
require.NoError(t, err)
366+
}
256367

257-
t.Logf("Promoting the learner %x", learnerID)
258-
_, err = etcdctl.MemberPromote(learnerID)
368+
func ensureAllMembersAreVotingMembers(t *testing.T, etcdctl *e2e.Etcdctl) {
369+
memberListResp, err := etcdctl.MemberList()
259370
require.NoError(t, err)
371+
for _, m := range memberListResp.Members {
372+
require.False(t, m.IsLearner)
373+
}
260374
}

tests/go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ require (
3030
github.com/spf13/cobra v1.1.3
3131
github.com/spf13/pflag v1.0.5
3232
github.com/stretchr/testify v1.9.0
33+
go.etcd.io/bbolt v1.3.11
3334
go.etcd.io/etcd/api/v3 v3.5.19
3435
go.etcd.io/etcd/client/pkg/v3 v3.5.19
3536
go.etcd.io/etcd/client/v2 v2.305.19
@@ -78,7 +79,6 @@ require (
7879
github.com/sirupsen/logrus v1.9.3 // indirect
7980
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
8081
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
81-
go.etcd.io/bbolt v1.3.11 // indirect
8282
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 // indirect
8383
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.20.0 // indirect
8484
go.opentelemetry.io/otel/metric v1.20.0 // indirect

0 commit comments

Comments
 (0)