@@ -52,6 +52,7 @@ $(cyan Commands):
52
52
$( green stop) Stops Accumulo cluster services
53
53
$( green restart) Restarts Accumulo cluster services
54
54
$( green kill) Kills Accumulo cluster services
55
+ $( green prune) Reomves zookeeper locks of extra processes
55
56
56
57
$( cyan Examples) :
57
58
$( purple ' accumulo-cluster start' ) $( blue ' # start all servers' )
@@ -62,6 +63,8 @@ $(cyan Examples):
62
63
$( purple ' accumulo-cluster start --sservers=group1' ) $( blue ' # start all group1 sservers' )
63
64
$( purple ' accumulo-cluster start --sservers="group1 group2"' ) $( blue ' # start all group1 and group2 sservers' )
64
65
$( purple ' accumulo-cluster start --local --manager --tservers' ) $( blue ' # Start the local manager and local tservers' )
66
+ $( purple ' accumulo-cluster prune --compactors' ) $( blue ' # prune all extra compactors across all groups' )
67
+ $( purple ' accumulo-cluster prune --compactors="group1"' ) $( blue ' # prune extra compactors running in group1' )
65
68
66
69
EOF
67
70
}
@@ -287,8 +290,14 @@ function parse_config() {
287
290
exit 1
288
291
fi
289
292
290
- trap ' rm -f "$CONFIG_FILE"' EXIT
291
- CONFIG_FILE=$( mktemp --tmpdir " ClusterConfigParser-XXXXXXXX.out" ) || exit 1
293
+ AC_TMP_DIR=$( mktemp -t -d " accumulo-cluster-XXXXXXXX" ) || exit 1
294
+ if isDebug; then
295
+ echo " $( blue DEBUG) : Temporary files for this run are in $AC_TMP_DIR "
296
+ else
297
+ trap ' rm -rf -- "$AC_TMP_DIR"' EXIT
298
+ fi
299
+
300
+ CONFIG_FILE=" $AC_TMP_DIR /ClusterConfigParser.out"
292
301
" $accumulo_cmd " org.apache.accumulo.core.conf.cluster.ClusterConfigParser " $conf /cluster.yaml" " $CONFIG_FILE " || parse_fail
293
302
# shellcheck source=/dev/null
294
303
. " $CONFIG_FILE "
@@ -590,9 +599,183 @@ function control_services() {
590
599
591
600
if [[ $ARG_LOCAL == 0 && $ARG_ALL == 1 && ($operation == " stop" || $operation == " kill" ) ]]; then
592
601
debug " Cleaning all server entries in ZooKeeper"
593
- debugOrRun " $accumulo_cmd " org.apache.accumulo.server.util.ZooZap -verbose -manager -tservers -compactors -sservers
602
+ debugOrRun " $accumulo_cmd " org.apache.accumulo.server.util.ZooZap -verbose -manager -tservers -compactors -sservers --gc --monitor
603
+ fi
604
+
605
+ }
606
+
607
+ function prune_group() {
608
+ local service_type=$1
609
+ local group=$2
610
+ local expectedCount=$3
611
+ declare -a hosts
612
+ read -r -a hosts <<< " $4"
613
+
614
+ if isDebug; then
615
+ echo " $( blue DEBUG) starting prune for service:$service_type group:$group expected:$expectedCount "
616
+ fi
617
+
618
+ if [ -z ${AC_TMP_DIR+x} ]; then
619
+ echo " $( red ERROR) : AC_TMP_DIR is not set"
620
+ exit 1
621
+ fi
622
+ local exclude_file=" $AC_TMP_DIR /accumulo-zoozap-exclude-$service_type -$group .txt"
623
+ touch " $exclude_file "
624
+
625
+ # Determine the host:ports known by the accumulo cluster script, these should be kept
626
+ for host in " ${hosts[@]} " ; do
627
+ " ${SSH[@]} " " $host " bash -c " '$bin /accumulo-service $service_type list'" | grep -E " ^[a-zA-Z0-9]+_${group} _[0-9]+" | head -n " $expectedCount " | awk ' {print $3}' | tr ' ,' ' \n' | awk ' {print "' " $host " ' :" $1}' >> " $exclude_file "
628
+ done
629
+
630
+ local lockTypeOpt
631
+ case $service_type in
632
+ manager)
633
+ lockTypeOpt=" -manager"
634
+ ;;
635
+ compaction-coordinator)
636
+ lockTypeOpt=" -compaction-coordinators"
637
+ ;;
638
+ compactor)
639
+ lockTypeOpt=" -compactors"
640
+ ;;
641
+ tserver)
642
+ lockTypeOpt=" -tservers"
643
+ ;;
644
+ sserver)
645
+ lockTypeOpt=" -sservers"
646
+ ;;
647
+ gc)
648
+ lockTypeOpt=" --gc"
649
+ ;;
650
+ monitor)
651
+ lockTypeOpt=" --monitor"
652
+ ;;
653
+ * )
654
+ echo " Prune does not support $service_type "
655
+ exit 1
656
+ ;;
657
+ esac
658
+
659
+ if isDebug; then
660
+ " $accumulo_cmd " org.apache.accumulo.server.util.ZooZap " $lockTypeOpt " -verbose --include-groups " $group " --exclude-host-ports " $exclude_file " --dry-run
661
+ else
662
+ " $accumulo_cmd " org.apache.accumulo.server.util.ZooZap " $lockTypeOpt " -verbose --include-groups " $group " --exclude-host-ports " $exclude_file "
663
+ fi
664
+ }
665
+
666
+ # Kills extra server processes that are not needed according to the
667
+ # cluster.yaml file. Conceptually this code is trying to reconcile the
668
+ # following three sets of servers.
669
+ #
670
+ # 1. The notional goal set of servers specified by cluster.yaml
671
+ # 2. The set of servers processes seen in zookeeper
672
+ # 3. The set of server processes known to the accumulo-cluster script. This
673
+ # is derived from pid files on hosts in set 1.
674
+ #
675
+ # This function attempts to find extra servers in set 2 that are not specified
676
+ # by set 1. When it does find extra servers it removes their zookeeper locks
677
+ # avoiding removing locks of servers in set 3. The following are different
678
+ # situations the code will see and handle.
679
+ #
680
+ # * When a host is not cluster.yaml but has some processes listed in
681
+ # zookeeper. For this case all of the process with that host can be killed.
682
+ # * When a resource group is not in cluster.yaml but has some processes listed
683
+ # in zookeeper. For this case all of the processes with that resource group
684
+ # can be killed.
685
+ # * When a host is in cluster.yaml with a target of 3 processes but has 6
686
+ # processes listed in zookeeper. For this case want to kill 3 processes that
687
+ # do not have pid files on the host.
688
+ #
689
+ function prune() {
690
+ if [[ $ARG_LOCAL == 1 ]]; then
691
+ # Currently the code is structured to remove all extra servers in a single resource group. Finer granularity is not supported.
692
+ echo " $( red ERROR) : Prune does not support running locally"
693
+ exit 1
694
+ fi
695
+
696
+ if ! jq -h >& /dev/null; then
697
+ echo " $( red ERROR:) Missing $( green jq) . Unable to continue."
698
+ exit 1
699
+ fi
700
+
701
+ if [[ -z ${AC_TMP_DIR+x} ]]; then
702
+ echo " AC_TMP_DIR is not set"
703
+ exit 1
704
+ fi
705
+ local service_json=" $AC_TMP_DIR /accumulo-service.json"
706
+ " $accumulo_cmd " admin serviceStatus --json > " $service_json " 2> /dev/null || exit 1
707
+
708
+ local var_name
709
+ local hosts
710
+ declare -a groups
711
+
712
+ local manager
713
+ if [[ $ARG_ALL == 1 || $ARG_MANAGER == 1 ]]; then
714
+ prune_group " manager" " default" " 1" " $MANAGER_HOSTS "
715
+ fi
716
+
717
+ if [[ $ARG_ALL == 1 || $ARG_GC == 1 ]]; then
718
+ prune_group " gc" " default" " 1" " $GC_HOSTS "
594
719
fi
595
720
721
+ if [[ $ARG_ALL == 1 || $ARG_MONITOR == 1 ]]; then
722
+ prune_group " monitor" " default" " 1" " $MONITOR_HOSTS "
723
+ fi
724
+
725
+ if [[ $ARG_ALL == 1 || $ARG_TSERVER == 1 ]]; then
726
+ groups=()
727
+ if [[ -n $ARG_TSERVER_GROUP ]]; then
728
+ read -r -a groups <<< " $ARG_TSERVER_GROUP"
729
+ else
730
+ # find all groups known in zookeeper, this will allow pruning entire groups that do not even exist in cluster.yaml
731
+ readarray -t groups < <( jq -r " .summaries.T_SERVER.resourceGroups | .[] " " $service_json " )
732
+ fi
733
+
734
+ for group in " ${groups[@]} " ; do
735
+ var_name=" TSERVERS_PER_HOST_$group "
736
+ local expected=${! var_name:- 0}
737
+
738
+ hosts=" TSERVER_HOSTS_$group "
739
+ prune_group " tserver" " $group " " $expected " " ${! hosts} "
740
+ done
741
+ fi
742
+
743
+ if [[ $ARG_ALL == 1 || $ARG_SSERVER == 1 ]]; then
744
+ groups=()
745
+ if [[ -n $ARG_SSERVER_GROUP ]]; then
746
+ read -r -a groups <<< " $ARG_SSERVER_GROUP"
747
+ else
748
+ # find all groups known in zookeeper, this will allow pruning entire groups that do not even exist in cluster.yaml
749
+ readarray -t groups < <( jq -r " .summaries.S_SERVER.resourceGroups | .[] " " $service_json " )
750
+ fi
751
+
752
+ for group in " ${groups[@]} " ; do
753
+ var_name=" SSERVERS_PER_HOST_$group "
754
+ local expected=${! var_name:- 0}
755
+
756
+ hosts=" SSERVER_HOSTS_$group "
757
+ prune_group " sserver" " $group " " $expected " " ${! hosts} "
758
+ done
759
+
760
+ fi
761
+
762
+ if [[ $ARG_ALL == 1 || $ARG_COMPACTOR == 1 ]]; then
763
+ groups=()
764
+ if [[ -n $ARG_COMPACTOR_GROUP ]]; then
765
+ read -r -a groups <<< " $ARG_COMPACTOR_GROUP"
766
+ else
767
+ # find all groups known in zookeeper, this will allow pruning entire groups that do not even exist in cluster.yaml
768
+ readarray -t groups < <( jq -r " .summaries.COMPACTOR.resourceGroups | .[] " " $service_json " )
769
+ fi
770
+
771
+ for group in " ${groups[@]} " ; do
772
+ var_name=" COMPACTORS_PER_HOST_$group "
773
+ local expected=${! var_name:- 0}
774
+
775
+ hosts=" COMPACTOR_HOSTS_$group "
776
+ prune_group " compactor" " $group " " $expected " " ${! hosts} "
777
+ done
778
+ fi
596
779
}
597
780
598
781
function main() {
674
857
parse_config
675
858
control_services kill
676
859
;;
860
+ prune)
861
+ parse_config
862
+ prune
863
+ ;;
677
864
* )
678
865
invalid_args " '$ARG_CMD ' is an invalid <command>"
679
866
;;
0 commit comments