@@ -24,7 +24,12 @@ import (
24
24
25
25
"github.com/stretchr/testify/require"
26
26
27
+ "go.etcd.io/bbolt"
27
28
"go.etcd.io/etcd/api/v3/etcdserverpb"
29
+ "go.etcd.io/etcd/client/pkg/v3/types"
30
+ "go.etcd.io/etcd/server/v3/datadir"
31
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32
+ "go.etcd.io/etcd/server/v3/mvcc/buckets"
28
33
"go.etcd.io/etcd/tests/v3/framework/e2e"
29
34
)
30
35
@@ -230,31 +235,181 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230
235
return e2e .SpawnWithExpectWithEnv (cmdArgs , cx .envMap , " updated in cluster " )
231
236
}
232
237
238
+ // TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239
+ // issue caused by https://github.com/etcd-io/etcd/issues/19557.
233
240
func TestCtlV3PromotingLearner (t * testing.T ) {
234
- e2e .BeforeTest (t )
241
+ testCases := []struct {
242
+ name string
243
+ snapshotCount int
244
+ promotionSuccess bool
245
+ }{
246
+ {
247
+ name : "create snapshot after learner promotion which is not saved to v3store" ,
248
+ snapshotCount : 10 ,
249
+ },
250
+ {
251
+ name : "not create snapshot and learner promotion is not saved to v3store" ,
252
+ snapshotCount : 0 ,
253
+ },
254
+ {
255
+ name : "not create snapshot and learner promotion is saved to v3store" ,
256
+ snapshotCount : 0 ,
257
+ promotionSuccess : true ,
258
+ },
259
+ }
260
+
261
+ for _ , tc := range testCases {
262
+ t .Run (tc .name , func (t * testing.T ) {
263
+ t .Log ("Create a single node etcd cluster" )
264
+ cfg := e2e .NewConfigNoTLS ()
265
+ cfg .BasePeerScheme = "unix"
266
+ cfg .ClusterSize = 1
267
+ if tc .snapshotCount != 0 {
268
+ cfg .SnapshotCount = tc .snapshotCount
269
+ }
270
+
271
+ epc , err := e2e .NewEtcdProcessCluster (t , cfg )
272
+ require .NoError (t , err , "failed to start etcd cluster: %v" , err )
273
+ defer func () {
274
+ derr := epc .Close ()
275
+ require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
276
+ }()
277
+
278
+ t .Log ("Add and start a learner" )
279
+ learnerID , err := epc .StartNewProc (nil , true , t )
280
+ require .NoError (t , err )
281
+
282
+ t .Log ("Write a key to ensure the cluster is healthy so far" )
283
+ etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
284
+ err = etcdctl .Put ("foo" , "bar" )
285
+ require .NoError (t , err )
286
+
287
+ t .Logf ("Promoting the learner %x" , learnerID )
288
+ resp , err := etcdctl .MemberPromote (learnerID )
289
+ require .NoError (t , err )
290
+
291
+ var promotedMember * etcdserverpb.Member
292
+ for _ , m := range resp .Members {
293
+ if m .ID == learnerID {
294
+ promotedMember = m
295
+ break
296
+ }
297
+ }
298
+ require .NotNil (t , promotedMember )
299
+ t .Logf ("The promoted member: %+v" , promotedMember )
300
+
301
+ t .Log ("Ensure all members are voting members" )
302
+ ensureAllMembersAreVotingMembers (t , etcdctl )
235
303
236
- t .Log ("Create a single node etcd cluster" )
237
- cfg := e2e .NewConfigNoTLS ()
238
- cfg .BasePeerScheme = "unix"
239
- cfg .ClusterSize = 1
304
+ if tc .snapshotCount != 0 {
305
+ t .Logf ("Write %d keys to trigger a snapshot" , tc .snapshotCount )
306
+ for i := 0 ; i < tc .snapshotCount ; i ++ {
307
+ err = etcdctl .Put (fmt .Sprintf ("key_%d" , i ), fmt .Sprintf ("value_%d" , i ))
308
+ require .NoError (t , err )
309
+ }
310
+ }
240
311
241
- epc , err := e2e .NewEtcdProcessCluster (t , cfg )
242
- require .NoError (t , err , "failed to start etcd cluster: %v" , err )
312
+ if tc .promotionSuccess {
313
+ t .Log ("Skip manually changing the already promoted learner to a learner" )
314
+ } else {
315
+ t .Logf ("Stopping the already promoted member" )
316
+ require .NoError (t , epc .Procs [1 ].Stop ())
317
+
318
+ t .Log ("Manually changing the already promoted member to a learner again" )
319
+ promotedMember .IsLearner = true
320
+ mustSaveMemberIntoBbolt (t , epc .Procs [1 ].Config ().DataDirPath , promotedMember )
321
+
322
+ t .Log ("Starting the member again" )
323
+ require .NoError (t , epc .Procs [1 ].Start ())
324
+ }
325
+
326
+ t .Log ("Checking all members are ready to serve client requests" )
327
+ for i := 0 ; i < len (epc .Procs ); i ++ {
328
+ e2e .AssertProcessLogs (t , epc .Procs [i ], e2e .EtcdServerReadyLines [0 ])
329
+ }
330
+
331
+ // Wait for the learner published attribute to be applied by all members in the cluster
332
+ t .Log ("Write a key to ensure the the learner published attribute has been applied by all members" )
333
+ err = etcdctl .Put ("foo" , "bar" )
334
+ require .NoError (t , err )
335
+
336
+ t .Log ("Ensure all members are voting members again" )
337
+ for i := 0 ; i < len (epc .Procs ); i ++ {
338
+ t .Logf ("Stopping the member: %d" , i )
339
+ require .NoError (t , epc .Procs [i ].Stop ())
340
+
341
+ t .Logf ("Checking all members in member's backend store: %d" , i )
342
+ ensureAllMembersFromV3StoreAreVotingMembers (t , epc .Procs [i ].Config ().DataDirPath )
343
+
344
+ t .Logf ("Starting the member again: %d" , i )
345
+ require .NoError (t , epc .Procs [i ].Start ())
346
+ }
347
+ })
348
+ }
349
+ }
350
+
351
+ func mustSaveMemberIntoBbolt (t * testing.T , dataDir string , protoMember * etcdserverpb.Member ) {
352
+ dbPath := datadir .ToBackendFileName (dataDir )
353
+ db , err := bbolt .Open (dbPath , 0666 , nil )
354
+ require .NoError (t , err )
243
355
defer func () {
244
- derr := epc .Close ()
245
- require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
356
+ require .NoError (t , db .Close ())
246
357
}()
247
358
248
- t .Log ("Add and start a learner" )
249
- learnerID , err := epc .StartNewProc (nil , true , t )
359
+ m := & membership.Member {
360
+ ID : types .ID (protoMember .ID ),
361
+ RaftAttributes : membership.RaftAttributes {
362
+ PeerURLs : protoMember .PeerURLs ,
363
+ IsLearner : protoMember .IsLearner ,
364
+ },
365
+ Attributes : membership.Attributes {
366
+ Name : protoMember .Name ,
367
+ ClientURLs : protoMember .ClientURLs ,
368
+ },
369
+ }
370
+
371
+ err = db .Update (func (tx * bbolt.Tx ) error {
372
+ b := tx .Bucket (buckets .Members .Name ())
373
+
374
+ mkey := []byte (m .ID .String ())
375
+ mvalue , err := json .Marshal (m )
376
+ require .NoError (t , err )
377
+
378
+ return b .Put (mkey , mvalue )
379
+ })
250
380
require .NoError (t , err )
381
+ }
251
382
252
- t .Log ("Write a key to ensure the cluster is healthy so far" )
253
- etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
254
- err = etcdctl .Put ("foo" , "bar" )
383
+ func ensureAllMembersAreVotingMembers (t * testing.T , etcdctl * e2e.Etcdctl ) {
384
+ memberListResp , err := etcdctl .MemberList ()
255
385
require .NoError (t , err )
386
+ for _ , m := range memberListResp .Members {
387
+ require .False (t , m .IsLearner )
388
+ }
389
+ }
256
390
257
- t .Logf ("Promoting the learner %x" , learnerID )
258
- _ , err = etcdctl .MemberPromote (learnerID )
391
+ func ensureAllMembersFromV3StoreAreVotingMembers (t * testing.T , dataDir string ) {
392
+ dbPath := datadir .ToBackendFileName (dataDir )
393
+ db , err := bbolt .Open (dbPath , 0400 , & bbolt.Options {ReadOnly : true })
259
394
require .NoError (t , err )
395
+ defer func () {
396
+ require .NoError (t , db .Close ())
397
+ }()
398
+
399
+ var members []membership.Member
400
+ _ = db .View (func (tx * bbolt.Tx ) error {
401
+ b := tx .Bucket (buckets .Members .Name ())
402
+ _ = b .ForEach (func (k , v []byte ) error {
403
+ m := membership.Member {}
404
+ err := json .Unmarshal (v , & m )
405
+ require .NoError (t , err )
406
+ members = append (members , m )
407
+ return nil
408
+ })
409
+ return nil
410
+ })
411
+
412
+ for _ , m := range members {
413
+ require .Falsef (t , m .IsLearner , "member is still learner: %+v" , m )
414
+ }
260
415
}
0 commit comments