Skip to content

Commit 54ceac8

Browse files
author
Kerkesni
committed
ensure all metadata components are started properly
Metadata components need to be started in the order repd -> bucketd -> cloudserver as they are not able to recover on their own when a component they depend on is not available at startup. Issue: ZENKO-4414
1 parent 6f896a9 commit 54ceac8

File tree

2 files changed

+65
-2
lines changed

2 files changed

+65
-2
lines changed

.github/scripts/end2end/common.sh

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,55 @@ get_token() {
99
jq -cr '.id_token'
1010
}
1111

12+
wait_for_endpoint() {
13+
local host=$1
14+
local port=$2
15+
local timeout_s=$3
16+
17+
kubectl run wait-for-port \
18+
--image=busybox \
19+
--attach=True \
20+
--rm \
21+
--restart=Never \
22+
--pod-running-timeout=5m \
23+
--image-pull-policy=IfNotPresent \
24+
--env="HOST=${host}" \
25+
--env="PORT=${port}" \
26+
--env="TIMEOUT_S=${timeout_s}" \
27+
-- sh -c '
28+
wait_for_endpoint() {
29+
local count=0
30+
echo "waiting for $HOST:$PORT to be available"
31+
while ! nc -z -w 1 $HOST "$PORT"; do
32+
count=$((count + 1))
33+
[ "$count" -ge "$TIMEOUT_S" ] && echo "Error: timedout waiting for $HOST:$PORT after $TIMEOUT_S seconds" && return 1
34+
sleep 1
35+
done
36+
echo "$HOST:$PORT is now available."
37+
}
38+
wait_for_endpoint
39+
'
40+
}
41+
42+
wait_for_all_pods_behind_services() {
43+
local service=$1
44+
local namespace=$2
45+
local port_regex=$3
46+
local timeout_s=$4
47+
kubectl get pods -n $namespace -l app=$service -o jsonpath='{range .items[*]}{.metadata.deletionTimestamp}:{.status.podIP}:{.spec.containers[*].ports[*].containerPort}{"\n"}{end}' | while read -r output; do
48+
deletion_timestamp=$(echo $output | cut -d':' -f1)
49+
ip=$(echo $output | cut -d':' -f2)
50+
ports=$(echo $output | cut -d':' -f3)
51+
# skip pods that are terminating
52+
if [ -n "$deletion_timestamp" ] || [ -z "$ip" ] || [ -z "$ports" ]; then
53+
continue
54+
fi
55+
# waiting for all ports that match the port prefix in cases where
56+
# multiple containers are running within the same pod
57+
for port in $ports; do
58+
if [[ $port == $port_regex ]]; then
59+
wait_for_endpoint $ip $port $timeout_s
60+
fi
61+
done
62+
done
63+
}

.github/scripts/end2end/deploy-metadata.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
set -exu
44

5+
. "$(dirname $0)/common.sh"
6+
57
# create a separate namespace for metadata
68
kubectl create namespace metadata
79

@@ -19,18 +21,27 @@ helm install -n metadata \
1921
--set metadata.sproxyd.persistentVolume.storageClass='' \
2022
s3c cloudserver/
2123

22-
# wait for the repds to be ready
24+
# wait for the repds to be created
2325
kubectl -n metadata rollout status --watch --timeout=300s statefulset/s3c-metadata-repd
26+
# wait for all repd pods to start serving admin API ports
27+
wait_for_all_pods_behind_services metadata-repd metadata "91*" 60
2428

2529
# current chart uses an old version of bucketd that has issues reconnecting to the repd
2630
# when bucketd is started first. Restarting bucketd after repd is ready.
2731
kubectl -n metadata rollout restart deployment/s3c-metadata-bucketd
32+
# wait for the bucketd pods to be created
2833
kubectl -n metadata rollout status --watch --timeout=300s deploy/s3c-metadata-bucketd
34+
# wait for all bucketd pods to start serving port 9000
35+
wait_for_all_pods_behind_services metadata-bucketd metadata 9000 60
2936

30-
# manually add "s3c.local" to the rest endpoint list as it is not configurable in the chart
37+
# manually add "s3c.local" to the rest endpoints list as it's not configurable in the chart
3138
current_config=$(kubectl get configmap/s3c-cloudserver-config-json -n metadata -o jsonpath='{.data.config\.json}')
3239
updated_config=$(echo "$current_config" | jq '.restEndpoints["s3c.local"] = "us-east-1"')
3340
kubectl patch configmap/s3c-cloudserver-config-json -n metadata --type='merge' -p="$(jq -n --arg v "$updated_config" '{"data": {"config.json": $v}}')"
3441

3542
# restarting cloudserver to take the new configmap changes into account
3643
kubectl -n metadata rollout restart deployment/s3c-cloudserver
44+
# wait for the cloudserver pods to be created
45+
kubectl -n metadata rollout status --watch --timeout=300s deployment/s3c-cloudserver
46+
# wait for the cloudserver pods to start serving port 8000
47+
wait_for_all_pods_behind_services cloudserver metadata 8000 60

0 commit comments

Comments
 (0)