45
45
import java .util .concurrent .TimeUnit ;
46
46
import java .util .concurrent .TimeoutException ;
47
47
import java .util .concurrent .atomic .AtomicInteger ;
48
+ import java .util .concurrent .atomic .AtomicReference ;
48
49
import java .util .stream .Collectors ;
49
50
import lombok .Cleanup ;
50
51
import org .apache .bookkeeper .bookie .BookieImpl ;
@@ -409,7 +410,16 @@ public void testInnerDelayedAuditOfLostBookies() throws Exception {
409
410
urLedgerMgr .setLostBookieRecoveryDelay (5 );
410
411
411
412
// shutdown a non auditor bookie; choosing non-auditor to avoid another election
412
- String shutdownBookie = shutDownNonAuditorBookie ();
413
+ AtomicReference <String > shutdownBookieRef = new AtomicReference <>();
414
+ CountDownLatch shutdownLatch = new CountDownLatch (1 );
415
+ new Thread (() -> {
416
+ try {
417
+ String shutdownBookie = shutDownNonAuditorBookie ();
418
+ shutdownBookieRef .set (shutdownBookie );
419
+ shutdownLatch .countDown ();
420
+ } catch (Exception ignore ) {
421
+ }
422
+ }).start ();
413
423
414
424
if (LOG .isDebugEnabled ()) {
415
425
LOG .debug ("Waiting for ledgers to be marked as under replicated" );
@@ -425,9 +435,10 @@ public void testInnerDelayedAuditOfLostBookies() throws Exception {
425
435
urLedgerList .contains (ledgerId ));
426
436
Map <Long , String > urLedgerData = getUrLedgerData (urLedgerList );
427
437
String data = urLedgerData .get (ledgerId );
428
- assertTrue ("Bookie " + shutdownBookie
438
+ shutdownLatch .await ();
439
+ assertTrue ("Bookie " + shutdownBookieRef .get ()
429
440
+ "is not listed in the ledger as missing replica :" + data ,
430
- data .contains (shutdownBookie ));
441
+ data .contains (shutdownBookieRef . get () ));
431
442
}
432
443
433
444
/**
@@ -486,7 +497,16 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartImmediately() throws
486
497
urLedgerMgr .setLostBookieRecoveryDelay (50 );
487
498
488
499
// shutdown a non auditor bookie; choosing non-auditor to avoid another election
489
- String shutdownBookie = shutDownNonAuditorBookie ();
500
+ AtomicReference <String > shutdownBookieRef = new AtomicReference <>();
501
+ CountDownLatch shutdownLatch = new CountDownLatch (1 );
502
+ new Thread (() -> {
503
+ try {
504
+ String shutdownBookie = shutDownNonAuditorBookie ();
505
+ shutdownBookieRef .set (shutdownBookie );
506
+ shutdownLatch .countDown ();
507
+ } catch (Exception ignore ) {
508
+ }
509
+ }).start ();
490
510
491
511
if (LOG .isDebugEnabled ()) {
492
512
LOG .debug ("Waiting for ledgers to be marked as under replicated" );
@@ -505,9 +525,10 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartImmediately() throws
505
525
urLedgerList .contains (ledgerId ));
506
526
Map <Long , String > urLedgerData = getUrLedgerData (urLedgerList );
507
527
String data = urLedgerData .get (ledgerId );
508
- assertTrue ("Bookie " + shutdownBookie
528
+ shutdownLatch .await ();
529
+ assertTrue ("Bookie " + shutdownBookieRef .get ()
509
530
+ "is not listed in the ledger as missing replica :" + data ,
510
- data .contains (shutdownBookie ));
531
+ data .contains (shutdownBookieRef . get () ));
511
532
}
512
533
513
534
@ Test
@@ -530,7 +551,16 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartLater() throws Excep
530
551
urLedgerMgr .setLostBookieRecoveryDelay (3 );
531
552
532
553
// shutdown a non auditor bookie; choosing non-auditor to avoid another election
533
- String shutdownBookie = shutDownNonAuditorBookie ();
554
+ AtomicReference <String > shutdownBookieRef = new AtomicReference <>();
555
+ CountDownLatch shutdownLatch = new CountDownLatch (1 );
556
+ new Thread (() -> {
557
+ try {
558
+ String shutdownBookie = shutDownNonAuditorBookie ();
559
+ shutdownBookieRef .set (shutdownBookie );
560
+ shutdownLatch .countDown ();
561
+ } catch (Exception ignore ) {
562
+ }
563
+ }).start ();
534
564
535
565
if (LOG .isDebugEnabled ()) {
536
566
LOG .debug ("Waiting for ledgers to be marked as under replicated" );
@@ -556,9 +586,10 @@ public void testRescheduleOfDelayedAuditOfLostBookiesToStartLater() throws Excep
556
586
urLedgerList .contains (ledgerId ));
557
587
Map <Long , String > urLedgerData = getUrLedgerData (urLedgerList );
558
588
String data = urLedgerData .get (ledgerId );
559
- assertTrue ("Bookie " + shutdownBookie
589
+ shutdownLatch .await ();
590
+ assertTrue ("Bookie " + shutdownBookieRef .get ()
560
591
+ "is not listed in the ledger as missing replica :" + data ,
561
- data .contains (shutdownBookie ));
592
+ data .contains (shutdownBookieRef . get () ));
562
593
}
563
594
564
595
@ Test
@@ -647,7 +678,12 @@ public void testTriggerAuditorWithPendingAuditTask() throws Exception {
647
678
urLedgerMgr .setLostBookieRecoveryDelay (lostBookieRecoveryDelay );
648
679
649
680
// shutdown a non auditor bookie; choosing non-auditor to avoid another election
650
- String shutdownBookie = shutDownNonAuditorBookie ();
681
+ new Thread (() -> {
682
+ try {
683
+ shutDownNonAuditorBookie ();
684
+ } catch (Exception ignore ) {
685
+ }
686
+ }).start ();
651
687
652
688
if (LOG .isDebugEnabled ()) {
653
689
LOG .debug ("Waiting for ledgers to be marked as under replicated" );
@@ -698,7 +734,12 @@ public void testTriggerAuditorBySettingDelayToZeroWithPendingAuditTask() throws
698
734
urLedgerMgr .setLostBookieRecoveryDelay (lostBookieRecoveryDelay );
699
735
700
736
// shutdown a non auditor bookie; choosing non-auditor to avoid another election
701
- String shutdownBookie = shutDownNonAuditorBookie ();
737
+ new Thread (() -> {
738
+ try {
739
+ shutDownNonAuditorBookie ();
740
+ } catch (Exception ignore ) {
741
+ }
742
+ }).start ();
702
743
703
744
if (LOG .isDebugEnabled ()) {
704
745
LOG .debug ("Waiting for ledgers to be marked as under replicated" );
@@ -750,8 +791,17 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception {
750
791
// wait for 10 seconds before starting the recovery work when a bookie fails
751
792
urLedgerMgr .setLostBookieRecoveryDelay (10 );
752
793
753
- // shutdown a non auditor bookie to avoid an election
754
- String shutdownBookie1 = shutDownNonAuditorBookie ();
794
+ // shutdown a non auditor bookie; choosing non-auditor to avoid another election
795
+ AtomicReference <String > shutdownBookieRef1 = new AtomicReference <>();
796
+ CountDownLatch shutdownLatch1 = new CountDownLatch (1 );
797
+ new Thread (() -> {
798
+ try {
799
+ String shutdownBookie1 = shutDownNonAuditorBookie ();
800
+ shutdownBookieRef1 .set (shutdownBookie1 );
801
+ shutdownLatch1 .countDown ();
802
+ } catch (Exception ignore ) {
803
+ }
804
+ }).start ();
755
805
756
806
// wait for 3 seconds and there shouldn't be any under replicated ledgers
757
807
// because we have delayed the start of audit by 10 seconds
@@ -763,7 +813,16 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception {
763
813
// the history about having delayed recovery remains. Hence we make sure
764
814
// we bring down a non auditor bookie. This should cause the audit to take
765
815
// place immediately and not wait for the remaining 7 seconds to elapse
766
- String shutdownBookie2 = shutDownNonAuditorBookie ();
816
+ AtomicReference <String > shutdownBookieRef2 = new AtomicReference <>();
817
+ CountDownLatch shutdownLatch2 = new CountDownLatch (1 );
818
+ new Thread (() -> {
819
+ try {
820
+ String shutdownBookie2 = shutDownNonAuditorBookie ();
821
+ shutdownBookieRef2 .set (shutdownBookie2 );
822
+ shutdownLatch2 .countDown ();
823
+ } catch (Exception ignore ) {
824
+ }
825
+ }).start ();
767
826
768
827
// 2 second grace period for the ledgers to get reported as under replicated
769
828
Thread .sleep (2000 );
@@ -776,9 +835,11 @@ public void testDelayedAuditWithMultipleBookieFailures() throws Exception {
776
835
urLedgerList .contains (ledgerId ));
777
836
Map <Long , String > urLedgerData = getUrLedgerData (urLedgerList );
778
837
String data = urLedgerData .get (ledgerId );
779
- assertTrue ("Bookie " + shutdownBookie1 + shutdownBookie2
838
+ shutdownLatch1 .await ();
839
+ shutdownLatch2 .await ();
840
+ assertTrue ("Bookie " + shutdownBookieRef1 .get () + shutdownBookieRef2 .get ()
780
841
+ " are not listed in the ledger as missing replicas :" + data ,
781
- data .contains (shutdownBookie1 ) && data .contains (shutdownBookie2 ));
842
+ data .contains (shutdownBookieRef1 . get ()) && data .contains (shutdownBookieRef2 . get () ));
782
843
}
783
844
784
845
/**
@@ -808,7 +869,17 @@ public void testDelayedAuditWithRollingUpgrade() throws Exception {
808
869
// shutdown a non auditor bookie to avoid an election
809
870
int idx1 = getShutDownNonAuditorBookieIdx ("" );
810
871
ServerConfiguration conf1 = confByIndex (idx1 );
811
- String shutdownBookie1 = shutdownBookie (idx1 );
872
+
873
+ AtomicReference <String > shutdownBookieRef1 = new AtomicReference <>();
874
+ CountDownLatch shutdownLatch1 = new CountDownLatch (1 );
875
+ new Thread (() -> {
876
+ try {
877
+ String shutdownBookie1 = shutdownBookie (idx1 );
878
+ shutdownBookieRef1 .set (shutdownBookie1 );
879
+ shutdownLatch1 .countDown ();
880
+ } catch (Exception ignore ) {
881
+ }
882
+ }).start ();
812
883
813
884
// wait for 2 seconds and there shouldn't be any under replicated ledgers
814
885
// because we have delayed the start of audit by 5 seconds
@@ -821,8 +892,17 @@ public void testDelayedAuditWithRollingUpgrade() throws Exception {
821
892
822
893
// Now to simulate the rolling upgrade, bring down a bookie different from
823
894
// the one we brought down/up above.
824
- String shutdownBookie2 = shutDownNonAuditorBookie (shutdownBookie1 );
825
-
895
+ // shutdown a non auditor bookie; choosing non-auditor to avoid another election
896
+ AtomicReference <String > shutdownBookieRef2 = new AtomicReference <>();
897
+ CountDownLatch shutdownLatch2 = new CountDownLatch (1 );
898
+ new Thread (() -> {
899
+ try {
900
+ String shutdownBookie2 = shutDownNonAuditorBookie ();
901
+ shutdownBookieRef2 .set (shutdownBookie2 );
902
+ shutdownLatch2 .countDown ();
903
+ } catch (Exception ignore ) {
904
+ }
905
+ }).start ();
826
906
// since the first bookie that was brought down/up has come up, there is only
827
907
// one bookie down at this time. Hence the lost bookie check shouldn't start
828
908
// immediately; it will start 5 seconds after the second bookie went down
@@ -839,11 +919,13 @@ public void testDelayedAuditWithRollingUpgrade() throws Exception {
839
919
urLedgerList .contains (ledgerId ));
840
920
Map <Long , String > urLedgerData = getUrLedgerData (urLedgerList );
841
921
String data = urLedgerData .get (ledgerId );
842
- assertTrue ("Bookie " + shutdownBookie1 + "wrongly listed as missing the ledger: " + data ,
843
- !data .contains (shutdownBookie1 ));
844
- assertTrue ("Bookie " + shutdownBookie2
922
+ shutdownLatch1 .await ();
923
+ shutdownLatch2 .await ();
924
+ assertTrue ("Bookie " + shutdownBookieRef1 .get () + "wrongly listed as missing the ledger: " + data ,
925
+ !data .contains (shutdownBookieRef1 .get ()));
926
+ assertTrue ("Bookie " + shutdownBookieRef2 .get ()
845
927
+ " is not listed in the ledger as missing replicas :" + data ,
846
- data .contains (shutdownBookie2 ));
928
+ data .contains (shutdownBookieRef2 . get () ));
847
929
LOG .info ("*****************Test Complete" );
848
930
}
849
931
@@ -1008,7 +1090,7 @@ private Auditor getAuditorBookiesAuditor() throws Exception {
1008
1090
return auditorElectors .get (bookieAddr ).auditor ;
1009
1091
}
1010
1092
1011
- private String shutDownNonAuditorBookie () throws Exception {
1093
+ private String shutDownNonAuditorBookie () throws Exception {
1012
1094
// shutdown bookie which is not an auditor
1013
1095
int indexOf = indexOfServer (getAuditorBookie ());
1014
1096
int bkIndexDownBookie ;
0 commit comments