Skip to content

Commit 211a932

Browse files
fix(local): configure MetalLB before speaker wait (#36)
NVBug: 6237988 Signed-off-by: Frank Spitulski <fspitulski@nvidia.com>
1 parent f09dc86 commit 211a932

1 file changed

Lines changed: 124 additions & 51 deletions

File tree

local/infra/scripts/setup-metallb.sh

Lines changed: 124 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ command -v kubectl >/dev/null 2>&1 || { echo "ERROR: kubectl is required" >&2; e
1212
command -v helm >/dev/null 2>&1 || { echo "ERROR: helm is required" >&2; exit 1; }
1313
command -v docker >/dev/null 2>&1 || { echo "ERROR: docker is required" >&2; exit 1; }
1414
command -v jq >/dev/null 2>&1 || { echo "ERROR: jq is required" >&2; exit 1; }
15+
command -v kind >/dev/null 2>&1 || { echo "ERROR: kind is required" >&2; exit 1; }
1516

1617
# Detect Docker network CIDR for Kind (IPv4 only)
1718
echo "Detecting Docker network configuration..."
@@ -29,43 +30,11 @@ if ! helm repo list | grep -q "metallb"; then
2930
helm repo update >/dev/null
3031
fi
3132

32-
deploy_to_cluster() {
33+
apply_metallb_resources() {
3334
local cluster_name=$1
3435
local cluster_num=$2
35-
local context="kind-${cluster_name}"
36-
37-
echo "Deploying MetalLB to ${cluster_name}..."
38-
39-
helm upgrade --install metallb metallb/metallb \
40-
--version 0.15.2 \
41-
--namespace metallb-system \
42-
--create-namespace \
43-
--kube-context "${context}" \
44-
--wait \
45-
--timeout 5m
46-
47-
kubectl wait --for=condition=ready pod \
48-
-l app.kubernetes.io/name=metallb \
49-
-n metallb-system \
50-
--timeout=5m \
51-
--context "${context}"
36+
local context=$3
5237

53-
# Wait for CRDs to be ready before creating resources
54-
echo "Waiting for MetalLB CRDs to be ready..."
55-
for i in {1..30}; do
56-
if kubectl get crd ipaddresspools.metallb.io --context "${context}" >/dev/null 2>&1 && \
57-
kubectl get crd l2advertisements.metallb.io --context "${context}" >/dev/null 2>&1; then
58-
break
59-
fi
60-
if [ $i -eq 30 ]; then
61-
echo "ERROR: MetalLB CRDs not ready after 30 seconds" >&2
62-
exit 1
63-
fi
64-
sleep 1
65-
done
66-
67-
# Create MetalLB configuration dynamically based on Docker network
68-
echo "Configuring MetalLB IP pool for ${cluster_name}..."
6938
cat <<EOF | kubectl apply -f - --context "${context}"
7039
---
7140
apiVersion: metallb.io/v1beta1
@@ -90,39 +59,143 @@ spec:
9059
interfaces:
9160
- eth0
9261
EOF
62+
}
63+
64+
wait_metallb_webhook() {
65+
local context=$1
66+
local webhook_addresses
67+
68+
echo "Waiting for MetalLB webhook endpoint..."
69+
for i in {1..60}; do
70+
webhook_addresses=$(kubectl get endpoints metallb-webhook-service \
71+
-n metallb-system \
72+
--context "${context}" \
73+
-o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)
74+
if [ -n "${webhook_addresses}" ]; then
75+
return 0
76+
fi
77+
if [ "${i}" -eq 60 ]; then
78+
echo "ERROR: MetalLB webhook endpoint not ready after 60 seconds" >&2
79+
return 1
80+
fi
81+
sleep 1
82+
done
83+
}
84+
85+
apply_metallb_config() {
86+
local cluster_name=$1
87+
local cluster_num=$2
88+
local context=$3
89+
90+
# Wait for CRDs to be ready before creating resources
91+
echo "Waiting for MetalLB CRDs to be ready..."
92+
for i in {1..30}; do
93+
if kubectl get crd ipaddresspools.metallb.io --context "${context}" >/dev/null 2>&1 && \
94+
kubectl get crd l2advertisements.metallb.io --context "${context}" >/dev/null 2>&1; then
95+
kubectl wait --for=condition=Established \
96+
crd/ipaddresspools.metallb.io \
97+
crd/l2advertisements.metallb.io \
98+
--timeout=30s \
99+
--context "${context}"
100+
break
101+
fi
102+
if [ $i -eq 30 ]; then
103+
echo "ERROR: MetalLB CRDs not ready after 30 seconds" >&2
104+
return 1
105+
fi
106+
sleep 1
107+
done
93108

94-
# Verify pool was created successfully
95-
if kubectl get ipaddresspool "${cluster_name}-pool" -n metallb-system --context "${context}" >/dev/null 2>&1; then
96-
echo "✓ IPAddressPool ${cluster_name}-pool created successfully"
109+
wait_metallb_webhook "${context}"
110+
111+
# Create MetalLB configuration dynamically based on Docker network
112+
echo "Configuring MetalLB IP pool for ${cluster_name}..."
113+
if ! apply_metallb_resources "${cluster_name}" "${cluster_num}" "${context}"; then
114+
echo "ERROR: Failed to apply MetalLB address resources for ${cluster_name}" >&2
115+
return 1
116+
fi
117+
118+
# Verify address resources were created successfully
119+
if kubectl get ipaddresspool "${cluster_name}-pool" -n metallb-system --context "${context}" >/dev/null 2>&1 && \
120+
kubectl get l2advertisement "${cluster_name}-l2-advert" -n metallb-system --context "${context}" >/dev/null 2>&1; then
121+
echo "✓ MetalLB address resources for ${cluster_name} created successfully"
97122
else
98-
echo "ERROR: Failed to create IPAddressPool for ${cluster_name}" >&2
99-
exit 1
123+
echo "ERROR: Failed to create MetalLB address resources for ${cluster_name}" >&2
124+
return 1
100125
fi
101126
}
102127

128+
deploy_to_cluster() {
129+
local cluster_name=$1
130+
local cluster_num=$2
131+
local context="kind-${cluster_name}"
132+
133+
echo "Deploying MetalLB to ${cluster_name}..."
134+
135+
helm upgrade --install metallb metallb/metallb \
136+
--version 0.15.2 \
137+
--namespace metallb-system \
138+
--create-namespace \
139+
--kube-context "${context}" \
140+
--timeout 5m
141+
142+
kubectl rollout status deployment/metallb-controller \
143+
-n metallb-system \
144+
--timeout=5m \
145+
--context "${context}"
146+
147+
apply_metallb_config "${cluster_name}" "${cluster_num}" "${context}"
148+
149+
kubectl rollout status daemonset/metallb-speaker \
150+
-n metallb-system \
151+
--timeout=5m \
152+
--context "${context}"
153+
}
154+
103155
# Deploy to all clusters in parallel with assigned IP pool numbers
104156
echo "Deploying MetalLB to all clusters in parallel..."
105-
pids=()
157+
clusters=("csc:200" "cpc-1:201" "cpc-2:202")
158+
kind_clusters=$(kind get clusters) || { echo "ERROR: Failed to list Kind clusters" >&2; exit 1; }
159+
missing_clusters=()
160+
161+
for cluster in "${clusters[@]}"; do
162+
cluster_name=${cluster%%:*}
163+
if ! grep -qx "${cluster_name}" <<< "${kind_clusters}"; then
164+
missing_clusters+=("${cluster_name}")
165+
fi
166+
done
106167

107-
if kind get clusters | grep -q "^csc$"; then
108-
deploy_to_cluster "csc" "200" &
109-
pids+=("$!")
168+
if [ "${#missing_clusters[@]}" -ne 0 ]; then
169+
echo "ERROR: Missing Kind cluster(s): ${missing_clusters[*]}" >&2
170+
exit 1
110171
fi
111172

112-
if kind get clusters | grep -q "^cpc-1$"; then
113-
deploy_to_cluster "cpc-1" "201" &
114-
pids+=("$!")
115-
fi
173+
pids=()
116174

117-
if kind get clusters | grep -q "^cpc-2$"; then
118-
deploy_to_cluster "cpc-2" "202" &
119-
pids+=("$!")
175+
for cluster in "${clusters[@]}"; do
176+
cluster_name=${cluster%%:*}
177+
cluster_num=${cluster##*:}
178+
deploy_to_cluster "${cluster_name}" "${cluster_num}" &
179+
pids+=("$!")
180+
done
181+
182+
if [ "${#pids[@]}" -eq 0 ]; then
183+
echo "ERROR: No MetalLB deployments were started" >&2
184+
exit 1
120185
fi
121186

122187
# Wait for all deployments to complete
188+
status=0
123189
for pid in "${pids[@]}"; do
124-
wait "${pid}"
190+
if ! wait "${pid}"; then
191+
status=1
192+
fi
125193
done
126194

195+
if [ "${status}" -ne 0 ]; then
196+
echo "ERROR: MetalLB deployment failed for one or more clusters" >&2
197+
exit "${status}"
198+
fi
199+
127200
echo "MetalLB deployed successfully"
128201
echo "IP Pools: ${DOCKER_BASE}.200.x, ${DOCKER_BASE}.201.x, ${DOCKER_BASE}.202.x"

0 commit comments

Comments
 (0)