Skip to content

Commit 1c9a28c

Browse files
authored
Zookeeper migration patches (#10)
* Make zookeeper ids start from an offset * Bootstrap zookeeper cluster from an external SEED_NODE * Use public ip for internal quorum election * Delay zookeeper start to allow for LB registration * Fix a bug when a node is teared down * Implement review
1 parent 9909736 commit 1c9a28c

File tree

4 files changed

+63
-9
lines changed

4 files changed

+63
-9
lines changed

docker/bin/zookeeperFunctions.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
set -ex
1414

1515
function zkConfig() {
16-
echo "$HOST.$DOMAIN:$QUORUM_PORT:$LEADER_PORT:$ROLE;$CLIENT_PORT"
16+
if [ -n "$1" ]; then
17+
FQDN="$1"
18+
else
19+
FQDN="$HOST.$DOMAIN"
20+
fi
21+
echo "$FQDN:$QUORUM_PORT:$LEADER_PORT:$ROLE;$CLIENT_PORT"
1722
}
1823

1924
function zkConnectionString() {
@@ -27,4 +32,4 @@ function zkConnectionString() {
2732
set -e
2833
echo "${CLIENT_HOST}:${CLIENT_PORT}"
2934
fi
30-
}
35+
}

docker/bin/zookeeperReady.sh

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ MYID_FILE=$DATA_DIR/myid
2020
LOG4J_CONF=/conf/log4j-quiet.properties
2121
STATIC_CONFIG=/data/conf/zoo.cfg
2222

23+
# used when zkid starts from value grater then 1, default 1
24+
OFFSET=${OFFSET:-1}
25+
26+
# use SEED_NODE to bootstrap the current zookeeper cluster, else default to local cluster
27+
# CLIENT_HOST is used in zkConnectionString function already to create zkURL
28+
CLIENT_HOST=${SEED_NODE:-$CLIENT_HOST}
29+
30+
2331
OK=$(echo ruok | nc 127.0.0.1 $CLIENT_PORT)
2432

2533
# Check to see if zookeeper service answers
@@ -44,8 +52,16 @@ if [[ "$OK" == "imok" ]]; then
4452
echo Failed to parse name and ordinal of Pod
4553
exit 1
4654
fi
47-
MYID=$((ORD+1))
55+
MYID=$(($ORD+$OFFSET))
4856
ONDISK_CONFIG=false
57+
58+
# use FQDN_TEMPLATE to create an OUTSIDE_NAME that is going to be used to establish quorum election
59+
# this should be used along with the quorumListenOnAllIPs set to true
60+
# % from the FQDN_TEMPLATE will be replaced with pod index+1
61+
if [ -n "$FQDN_TEMPLATE" ]; then
62+
OUTSIDE_NAME=$(echo ${FQDN_TEMPLATE} | sed "s/%/$(($ORD+1))/g")
63+
fi
64+
4965
if [ -f $MYID_FILE ]; then
5066
EXISTING_ID="`cat $DATA_DIR/myid`"
5167
if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then
@@ -74,7 +90,7 @@ if [[ "$OK" == "imok" ]]; then
7490
echo "Zookeeper service is ready to be upgraded from observer to participant."
7591
ROLE=participant
7692
ZKURL=$(zkConnectionString)
77-
ZKCONFIG=$(zkConfig)
93+
ZKCONFIG=$(zkConfig $OUTSIDE_NAME)
7894
java -Dlog4j.configuration=file:"$LOG4J_CONF" -jar /root/zu.jar remove $ZKURL $MYID
7995
sleep 1
8096
java -Dlog4j.configuration=file:"$LOG4J_CONF" -jar /root/zu.jar add $ZKURL $MYID $ZKCONFIG

docker/bin/zookeeperStart.sh

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,19 @@ set -ex
1414
source /conf/env.sh
1515
source /usr/local/bin/zookeeperFunctions.sh
1616

17+
START_DELAY=${START_DELAY:-0}
18+
19+
1720
HOST=`hostname -s`
1821
DATA_DIR=/data
1922
MYID_FILE=$DATA_DIR/myid
2023
LOG4J_CONF=/conf/log4j-quiet.properties
2124
DYNCONFIG=$DATA_DIR/zoo.cfg.dynamic
2225
STATIC_CONFIG=/data/conf/zoo.cfg
2326

27+
# used when zkid starts from value grater then 1, default 1
28+
OFFSET=${OFFSET:-1}
29+
2430
# Extract resource name and this members ordinal value from pod hostname
2531
if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then
2632
NAME=${BASH_REMATCH[1]}
@@ -30,7 +36,27 @@ else
3036
exit 1
3137
fi
3238

33-
MYID=$((ORD+1))
39+
MYID=$(($ORD+$OFFSET))
40+
41+
# use SEED_NODE to bootstrap the current zookeeper cluster, else default to local cluster
42+
# CLIENT_HOST is used in zkConnectionString function already to create zkURL
43+
CLIENT_HOST=${SEED_NODE:-$CLIENT_HOST}
44+
45+
46+
# use FQDN_TEMPLATE to create an OUTSIDE_NAME that is going to be used to establish quorum election
47+
# this should be used along with the quorumListenOnAllIPs set to true
48+
# % from the FQDN_TEMPLATE will be replaced with pod index+1
49+
if [ -n "$FQDN_TEMPLATE" ]; then
50+
OUTSIDE_NAME=$(echo ${FQDN_TEMPLATE} | sed "s/%/$(($ORD+1))/g")
51+
fi
52+
53+
# domain should be the OUTSIDE_NAME for when it's set
54+
DOMAIN=${SEED_NODE:-$DOMAIN}
55+
56+
# wait for loadbalancer registration and skip the first one
57+
if [ $MYID -gt $OFFSET ]; then
58+
sleep $START_DELAY
59+
fi
3460

3561
# Values for first startup
3662
WRITE_CONFIGURATION=true
@@ -114,10 +140,10 @@ fi
114140
if [[ "$WRITE_CONFIGURATION" == true ]]; then
115141
echo "Writing myid: $MYID to: $MYID_FILE."
116142
echo $MYID > $MYID_FILE
117-
if [[ $MYID -eq 1 ]]; then
143+
if [[ $MYID -eq $OFFSET && -z "$SEED_NODE" ]]; then
118144
ROLE=participant
119145
echo Initial initialization of ordinal 0 pod, creating new config.
120-
ZKCONFIG=$(zkConfig)
146+
ZKCONFIG=$(zkConfig $OUTSIDE_NAME)
121147
echo Writing bootstrap configuration with the following config:
122148
echo $ZKCONFIG
123149
echo $MYID > $MYID_FILE
@@ -128,7 +154,7 @@ fi
128154
if [[ "$REGISTER_NODE" == true ]]; then
129155
ROLE=observer
130156
ZKURL=$(zkConnectionString)
131-
ZKCONFIG=$(zkConfig)
157+
ZKCONFIG=$(zkConfig $OUTSIDE_NAME)
132158
set -e
133159
echo Registering node and writing local configuration to disk.
134160
java -Dlog4j.configuration=file:"$LOG4J_CONF" -jar /root/zu.jar add $ZKURL $MYID $ZKCONFIG $DYNCONFIG

docker/bin/zookeeperTeardown.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ DATA_DIR=/data
1818
MYID_FILE=$DATA_DIR/myid
1919
LOG4J_CONF=/conf/log4j-quiet.properties
2020

21+
# use SEED_NODE to bootstrap the current zookeeper cluster, else default to local cluster
22+
# CLIENT_HOST is used in zkConnectionString function already to create zkURL
23+
CLIENT_HOST=${SEED_NODE:-$CLIENT_HOST}
24+
25+
# used when zkid starts from value grater then 1, default 1
26+
OFFSET=${OFFSET:-1}
27+
2128
# Wait for client connections to drain. Kubernetes will wait until the confiugred
2229
# "terminationGracePeriodSeconds" before focibly killing the container
2330
CONN_COUNT=`echo cons | nc localhost 2181 | grep -v "^$" |grep -v "/127.0.0.1:" | wc -l`
@@ -40,7 +47,7 @@ MYID=`cat $MYID_FILE`
4047
ZNODE_PATH="/zookeeper-operator/$CLUSTER_NAME"
4148
CLUSTERSIZE=`java -Dlog4j.configuration=file:"$LOG4J_CONF" -jar /root/zu.jar sync $ZKURL $ZNODE_PATH`
4249
echo "CLUSTER_SIZE=$CLUSTERSIZE, MyId=$MYID"
43-
if [[ -n "$CLUSTERSIZE" && "$CLUSTERSIZE" -lt "$MYID" ]]; then
50+
if [[ -n "$CLUSTERSIZE" && "$(($CLUSTERSIZE+$OFFSET-1))" -lt "$MYID" ]]; then
4451
# If ClusterSize < MyId, this server is being permanantly removed.
4552
java -Dlog4j.configuration=file:"$LOG4J_CONF" -jar /root/zu.jar remove $ZKURL $MYID
4653
echo $?

0 commit comments

Comments
 (0)