Skip to content

Commit 2281599

Browse files
naiming-zededaeriknordmark
authored andcommitted
Join Bootstrap node and get Cluster-ID for verify
- In the case of multiple edge-node clusters are provisioned on the same network, which uses the same IP prefixes and breaks the clustering operations. It is very hard to debug of this case. This patch adds acquiring the 'Cluster-ID' on top of just 'cluster' string from the bootstrap node - In the waiting for transition to multi-node cluster mode, the configuration can be changed back to single-node mode. The patch addes the chagne to break out of the loop - Changed the waiting for monitoring k3s cluster up after conversion from 10 minutes to 5 minutes Signed-off-by: naiming-zededa <naiming@zededa.com>
1 parent cc2eae3 commit 2281599

File tree

3 files changed

+165
-31
lines changed

3 files changed

+165
-31
lines changed

pkg/kube/cluster-init.sh

Lines changed: 136 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ All_PODS_READY=true
2323
install_kubevirt=1
2424
TRANSITION_PIPE="/tmp/cluster_transition_pipe$$"
2525
TRANSITION_FLAG_FILE="/tmp/cluster_transition_flag"
26+
RebootReasonFile="/persist/reboot-reason"
27+
BootReasonFile="/persist/boot-reason"
28+
BootReasonKubeTransition="BootReasonKubeTransition" # Must match string in types package
2629

2730
# shellcheck source=pkg/kube/pubsub.sh
2831
. /usr/bin/pubsub.sh
@@ -404,6 +407,32 @@ are_all_pods_ready() {
404407
return 0
405408
}
406409

410+
# Reboot the system with a recorded reason
411+
# Usage: reboot_with_reason "reason string"
412+
# The "BootReasonKubeTransition" will be written to /persist/boot-reason and
413+
# the reason will be written to /persist/reboot-reason before rebooting
414+
reboot_with_reason() {
415+
local reason="$1"
416+
local timestamp
417+
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
418+
419+
if [ -z "$reason" ]; then
420+
reason="kube cluster conversion reboot"
421+
fi
422+
423+
logmsg "Rebooting system: $reason"
424+
if [ ! -f "$BootReasonFile" ]; then
425+
echo "$BootReasonKubeTransition" > "$BootReasonFile"
426+
fi
427+
echo " [$timestamp]: $BootReasonKubeTransition, $reason" >> "$RebootReasonFile"
428+
429+
# Sync to ensure the file is written to disk
430+
sync
431+
sleep 1 # Give sync a moment to complete
432+
# Perform the reboot
433+
reboot
434+
}
435+
407436
# run virtctl vnc
408437
check_and_run_vnc() {
409438
pid=$(pgrep -f "/usr/bin/virtctl vnc" )
@@ -452,18 +481,27 @@ check_and_run_vnc() {
452481

453482
# get the EdgeNodeClusterStatus
454483
enc_status_file="/run/zedkube/EdgeNodeClusterStatus/global.json"
484+
# If the node is part of a cluster, even if the case of only one node in the cluster
485+
# the clusrter_intf, is_bootstrap, join_serverIP, cluster_token, cluster_node_ip
486+
# cluster_uuid are all obtained from the enc_status_file published by zedkube;
487+
# When the kubernetes node is in 'single node' mode, these variables are empty
455488
cluster_intf=""
456489
is_bootstrap=""
457490
join_serverIP=""
458491
cluster_token=""
459492
cluster_node_ip=""
493+
cluster_uuid=""
460494
convert_to_single_node=false
461495

462496
# get the EdgeNodeClusterStatus from zedkube publication
497+
# Return values:
498+
# 0 - Success: file exists and all validations passed
499+
# 1 - File exists but validation failed (incomplete/invalid data)
500+
# 2 - File does not exist
463501
get_enc_status() {
464502
# Read the JSON data from the file, return 0 if successful, 1 if not
465503
if [ ! -f "$enc_status_file" ]; then
466-
return 1
504+
return 2
467505
fi
468506

469507
enc_data=$(cat "$enc_status_file")
@@ -473,8 +511,10 @@ get_enc_status() {
473511
cluster_token=$(echo "$enc_data" | jq -r '.EncryptedClusterToken')
474512
cluster_node_ip=$(echo "$enc_data" | jq -r '.ClusterIPPrefix.IP')
475513
cluster_node_ip_is_ready=$(echo "$enc_data" | jq -r '.ClusterIPIsReady')
514+
cluster_uuid=$(echo "$enc_data" | jq -r '.ClusterID.UUID')
476515
if [ -n "$cluster_intf" ] && [ -n "$join_serverIP" ] && [ -n "$cluster_token" ] &&\
477516
[ -n "$cluster_node_ip" ] && [ "$cluster_node_ip_is_ready" = "true" ] &&\
517+
[ -n "$cluster_uuid" ] && [ "$cluster_uuid" != "null" ] &&\
478518
{ [ "$is_bootstrap" = "true" ] || [ "$is_bootstrap" = "false" ]; }; then
479519
return 0
480520
else
@@ -544,15 +584,55 @@ change_to_new_token() {
544584

545585
# monitor function to check if the cluster mode has changed, either from single node to cluster
546586
# or from cluster to single node
587+
#
588+
# Return values:
589+
# 0 - No action needed or transition initiated successfully
590+
#
591+
# Operational Cases:
592+
#
593+
# 1. NOT INITIALIZED: Skip checks until /var/lib/all_components_initialized exists
594+
#
595+
# 2. CLUSTER-TO-SINGLE TRANSITION (enc_status=2, enc_status_file missing):
596+
# - If not in cluster mode: no action
597+
# - Otherwise: cleanup registration, mark for single-node conversion, REBOOT
598+
#
599+
# 3. SINGLE-TO-CLUSTER TRANSITION (enc_status=0, no edge-node-cluster-mode flag):
600+
# - EdgeNodeClusterStatus valid AND node was in single mode
601+
# - Wait loop until valid enc_status received
602+
# - Mark node as cluster mode before config changes
603+
# - If zks registration exists: uninstall cluster components (kubevirt, longhorn) first
604+
# - Bootstrap node case: rotate k3s token to controller-provided token
605+
# - Remove old multus config, reassign with cluster node IP
606+
# - Remove node labels for reapplication
607+
# - Create transition pipe/flag for k3s restart coordination
608+
# - Terminate k3s process
609+
# - Non-bootstrap node join case: remove TLS certs, mark debuguser for reinit
610+
# - Provision cluster config (bootstrap or join mode)
611+
# - If enc_status_file disappears during wait for joining cluster: revert back to single-node, REBOOT
612+
# - Non-bootstrap: create transition tracking file with timestamp, if joining cluster fails repeatedly, may REBOOT
613+
# - Bootstrap: wait for k3s to start
614+
# - Signal k3s restart via pipe, cleanup transition flag
615+
#
616+
# 4. ALREADY IN DESIRED MODE: No action taken
617+
#
618+
# 5. POST-CONVERSION REGISTRATION: If base-k3s-mode flag exists, uninstall kubevirt, longhorn, apply registration
619+
#
620+
# REBOOT SCENARIOS:
621+
# - Cluster-to-single: Always reboots after cleanup
622+
# - Single-to-cluster: Only non-bootstrap nodes may reboot if join fails (see check_cluster_transition_done) repeatedly
623+
# - Interrupted transition for non-bootstrap nodes: Reboots to single-node if enc_status_file disappears
547624
check_cluster_config_change() {
548625

549626
# only check the cluster change when it's fully initialized
550627
if [ ! -f /var/lib/all_components_initialized ]; then
551628
return 0
552629
fi
553630

554-
if [ ! -f "$enc_status_file" ]; then
555-
#logmsg "EdgeNodeClusterStatus file not found"
631+
get_enc_status
632+
enc_status=$?
633+
634+
if [ $enc_status -eq 2 ]; then
635+
# the EdgeNodeClusterStatus file does not exist
556636
if [ ! -f /var/lib/edge-node-cluster-mode ]; then
557637
return 0
558638
else
@@ -566,15 +646,16 @@ check_cluster_config_change() {
566646
rm /var/lib/base-k3s-mode
567647
touch /var/lib/convert-to-single-node
568648
# We're transitioning from cluster mode to single node, so reboot is still needed
569-
reboot
649+
reboot_with_reason "Transition from cluster mode to single node"
570650
fi
571-
else
651+
elif [ -n "$cluster_token" ] && [ "$cluster_node_ip_is_ready" = "true" ]; then
572652
# record we have seen this ENC status file
573653
if [ ! -f /var/lib/edge-node-cluster-mode ]; then
574654
logmsg "EdgeNodeClusterStatus file found, but the node does not have edge-node-cluster-mode"
575655
logmsg "*** check_cluster_config_change, before while loop. cluster_node_ip: $cluster_node_ip" # XXX
576656
while true; do
577657
if get_enc_status; then
658+
# got the enc_status successfully, start single node to cluster transition
578659
logmsg "got the EdgeNodeClusterStatus successfully"
579660
# mark it cluster mode before changing the config file
580661
touch /var/lib/edge-node-cluster-mode
@@ -615,6 +696,17 @@ check_cluster_config_change() {
615696

616697
logmsg "provision config file for node to cluster mode"
617698
provision_cluster_config_file true
699+
provision_status=$?
700+
701+
# If in the middle of waiting for bootstrap node to be ready, the node is converted again to single node
702+
# we need to get out of this loop and go back to single node mode
703+
if [ $provision_status -eq 1 ]; then
704+
logmsg "EdgeNodeClusterStatus file disappeared, reset the status and back to single node and reboot"
705+
rm /var/lib/base-k3s-mode
706+
touch /var/lib/convert-to-single-node
707+
reboot_with_reason "EdgeNodeClusterStatus file disappeared during cluster join, revert to single node"
708+
fi
709+
618710
if [ "$is_bootstrap" = "false" ]; then
619711
# we got here because we know the bootstrap node is already running
620712
# For a non-bootstrap node, create transition file and record timestamp
@@ -634,10 +726,16 @@ check_cluster_config_change() {
634726
logmsg "WARNING: changing the node to cluster mode, k3s can restart"
635727
break
636728
else
729+
# In the case, check get_enc_status fails, and the EdgeNodeClusterStatus file is removed
730+
# we need to exit the loop and try again
731+
if [ ! -f "$enc_status_file" ]; then
732+
logmsg "EdgeNodeClusterStatus file disappeared, exit the loop and try again"
733+
return 0
734+
fi
637735
sleep 10
638736
fi
639-
done
640-
else
737+
done # end of while true
738+
else # enc_status exists but not in all valid states
641739
return 0
642740
fi
643741
fi
@@ -673,7 +771,7 @@ check_cluster_transition_done() {
673771
fi
674772
fi
675773

676-
# Check if we've been waiting too long (10 minutes)
774+
# Check if we've been waiting too long (5 minutes)
677775
# File format is "timestamp reboot_count"
678776
# Maximum reboot attempts is 3
679777
file_content=$(cat /var/lib/transition-to-cluster)
@@ -683,7 +781,7 @@ check_cluster_transition_done() {
683781
current_timestamp=$(date +%s)
684782
elapsed_time=$((current_timestamp - transition_timestamp))
685783

686-
if [ "$elapsed_time" -ge 600 ]; then # 10 minutes in seconds
784+
if [ "$elapsed_time" -ge 300 ]; then # 5 minutes in seconds
687785
logmsg "Cluster transition timeout: Been waiting for ${elapsed_time} seconds"
688786

689787
# Increment reboot counter
@@ -693,7 +791,7 @@ check_cluster_transition_done() {
693791
# Update timestamp and reboot count in the same file
694792
echo "$(date +%s) $reboot_count" > /var/lib/transition-to-cluster
695793
logmsg "Rebooting system to retry cluster transition (attempt $reboot_count of 3)..."
696-
reboot
794+
reboot_with_reason "Reboot after retry cluster transition attempt $reboot_count"
697795
else
698796
logmsg "Maximum reboot attempts (3) reached. We will not reboot again."
699797
# We could consider adding some recovery action here
@@ -763,6 +861,9 @@ uninstall_components() {
763861
}
764862

765863
# provision the config.yaml and bootstrap-config.yaml for cluster node, passing $1 as k3s needs initializing
864+
# Return values:
865+
# 0 - Success: configuration completed successfully
866+
# 1 - enc_status_file file disappeared during bootstrap wait
766867
provision_cluster_config_file() {
767868
# prepare the config.yaml and bootstrap-config.yaml on node
768869
bootstrapContent=$(cat <<- EOF
@@ -823,17 +924,31 @@ EOF
823924
if curl --insecure --max-time 2 "https://$join_serverIP:6443" >/dev/null 2>&1; then
824925
#logmsg "curl to Endpoint https://$join_serverIP:6443 ready, check cluster status"
825926
# if we are here, check the bootstrap server is single or cluster mode
927+
# cluster status is reported via http://<join_serverIP>:8080/status API and the result if successful is
928+
# cluster:<cluster-uuid>, we need to verify the cluster-uuid matches our cluster_uuid in case we are joining
929+
# a wrong cluster in duplicate cluster IP address
826930
if ! status=$(curl --max-time 2 -s "http://$join_serverIP:$clusterStatusPort/status"); then
827931
if [ $((counter % 30)) -eq 1 ]; then
828932
logmsg "Attempt $counter: Failed to connect to the server. Waiting for 10 seconds..."
829933
fi
830-
elif [ "$status" = "cluster" ]; then
831-
logmsg "Server is in 'cluster' status. done"
832-
rm "$CLUSTER_WAIT_FILE"
833-
break
934+
elif echo "$status" | grep -q "^cluster:"; then
935+
# Extract the reported cluster UUID from the status
936+
reported_uuid=$(echo "$status" | cut -d':' -f2)
937+
938+
# Validate the cluster UUID matches
939+
if [ "$reported_uuid" = "$cluster_uuid" ]; then
940+
logmsg "Server is in 'cluster' status with matching UUID: $cluster_uuid. Done"
941+
rm "$CLUSTER_WAIT_FILE"
942+
break
943+
else
944+
if [ $((counter % 30)) -eq 1 ]; then
945+
logmsg "WARNING: Cluster UUID mismatch, may have duplicate Cluster IP address! Our UUID: $cluster_uuid, Reported UUID: $reported_uuid"
946+
logmsg "Attempt $counter: Cluster UUID does not match. Waiting for 10 seconds..."
947+
fi
948+
fi
834949
else
835950
if [ $((counter % 30)) -eq 1 ]; then
836-
logmsg "Attempt $counter: Server is not in 'cluster' status. Waiting for 10 seconds..."
951+
logmsg "Attempt $counter: Server is not in 'cluster' status (got: $status). Waiting for 10 seconds..."
837952
fi
838953
fi
839954
else
@@ -849,12 +964,18 @@ EOF
849964
logmsg "Attempt $counter: curl to Endpoint https://$join_serverIP:6443 failed (ping $join_serverIP: $ping_result, success=$ping_success_count, fail=$ping_fail_count). Waiting for 10 seconds..."
850965
fi
851966
fi
967+
if [ ! -f "$enc_status_file" ]; then
968+
logmsg "EdgeNodeClusterStatus file disappeared, exit the loop query bootstrap status"
969+
rm "$CLUSTER_WAIT_FILE"
970+
return 1
971+
fi
852972
sleep 10
853973
done
854974
else
855975
logmsg "restart case with k3s already installed, no need to wait"
856976
fi
857977
fi
978+
return 0
858979
}
859980

860981
DATESTR=$(date)

pkg/pillar/cmd/zedkube/clusterstatus.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,10 @@ func (z *zedkube) stopClusterStatusServer() {
271271
log.Noticef("Cluster status server goroutine has stopped")
272272
}
273273

274+
// clusterStatusHTTPHandler handles HTTP requests for the cluster status
275+
// If the node is a master and etcd node, it returns the cluster status in the format:
276+
// cluster:<cluster-uuid>
277+
// Otherwise, it returns an empty response.
274278
func (z *zedkube) clusterStatusHTTPHandler(w http.ResponseWriter, r *http.Request) {
275279
// Check if the request method is GET
276280
if r.Method != http.MethodGet {
@@ -304,7 +308,9 @@ func (z *zedkube) clusterStatusHTTPHandler(w http.ResponseWriter, r *http.Reques
304308
}
305309

306310
if isMaster && useEtcd {
307-
fmt.Fprint(w, "cluster")
311+
// Return cluster status with cluster UUID: cluster:<cluster-uuid>
312+
clusterUUID := z.clusterConfig.ClusterID.UUID.String()
313+
fmt.Fprintf(w, "cluster:%s", clusterUUID)
308314
return
309315
}
310316
log.Functionf("clusterStatusHTTPHandler: not master or etcd")

pkg/pillar/types/zedagenttypes.go

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -250,21 +250,22 @@ type BootReason uint8
250250
const (
251251
BootReasonNone BootReason = iota
252252

253-
BootReasonFirst // Normal - was not yet onboarded
254-
BootReasonRebootCmd // Normal - result of a reboot command in the API
255-
BootReasonUpdate // Normal - from an EVE image update in the API
256-
BootReasonFallback // Fallback from a failed EVE image update
257-
BootReasonDisconnect // Disconnected from controller for too long
258-
BootReasonFatal // Fatal error causing log.Fatal
259-
BootReasonOOM // OOM causing process to be killed
260-
BootReasonWatchdogHung // Software watchdog due stuck agent
261-
BootReasonWatchdogPid // Software watchdog due to e.g., golang panic
262-
BootReasonKernel // Set by dump-capture kernel, see docs/KERNEL-DUMPS.md and pkg/kdump/kdump.sh for details
263-
BootReasonPowerFail // Known power failure e.g., from disk controller S.M.A.R.T counter increase
264-
BootReasonUnknown // Could be power failure, kernel panic, or hardware watchdog
265-
BootReasonVaultFailure // Vault was not ready within the expected time
266-
BootReasonPoweroffCmd // Start after Local Profile Server poweroff
267-
BootReasonParseFail = 255 // BootReasonFromString didn't find match
253+
BootReasonFirst // Normal - was not yet onboarded
254+
BootReasonRebootCmd // Normal - result of a reboot command in the API
255+
BootReasonUpdate // Normal - from an EVE image update in the API
256+
BootReasonFallback // Fallback from a failed EVE image update
257+
BootReasonDisconnect // Disconnected from controller for too long
258+
BootReasonFatal // Fatal error causing log.Fatal
259+
BootReasonOOM // OOM causing process to be killed
260+
BootReasonWatchdogHung // Software watchdog due stuck agent
261+
BootReasonWatchdogPid // Software watchdog due to e.g., golang panic
262+
BootReasonKernel // Set by dump-capture kernel, see docs/KERNEL-DUMPS.md and pkg/kdump/kdump.sh for details
263+
BootReasonPowerFail // Known power failure e.g., from disk controller S.M.A.R.T counter increase
264+
BootReasonUnknown // Could be power failure, kernel panic, or hardware watchdog
265+
BootReasonVaultFailure // Vault was not ready within the expected time
266+
BootReasonPoweroffCmd // Start after Local Profile Server poweroff
267+
BootReasonKubeTransition // Transition to/from kubernetes single/cluster modes
268+
BootReasonParseFail = 255 // BootReasonFromString didn't find match
268269
)
269270

270271
// String returns the string name
@@ -300,6 +301,8 @@ func (br BootReason) String() string {
300301
return "BootReasonVaultFailure"
301302
case BootReasonPoweroffCmd:
302303
return "BootReasonPoweroffCmd"
304+
case BootReasonKubeTransition:
305+
return "BootReasonKubeTransition"
303306
default:
304307
return fmt.Sprintf("Unknown BootReason %d", br)
305308
}
@@ -340,6 +343,8 @@ func (br BootReason) StartWithSavedConfig() bool {
340343
return false
341344
case BootReasonPoweroffCmd:
342345
return true
346+
case BootReasonKubeTransition:
347+
return true
343348
default:
344349
return false
345350
}
@@ -381,6 +386,8 @@ func BootReasonFromString(str string) BootReason {
381386
return BootReasonVaultFailure
382387
case "BootReasonPoweroffCmd":
383388
return BootReasonPoweroffCmd
389+
case "BootReasonKubeTransition":
390+
return BootReasonKubeTransition
384391
default:
385392
return BootReasonParseFail
386393
}

0 commit comments

Comments
 (0)