diff --git a/pkg/kube/Dockerfile b/pkg/kube/Dockerfile index d7c3035c86..4628ce8437 100644 --- a/pkg/kube/Dockerfile +++ b/pkg/kube/Dockerfile @@ -100,6 +100,12 @@ ARG TARGETARCH # Actual k3s install and config happens when this container starts during EVE bootup, look at cluster-init.sh ### NOTE: the version of virtctl should match the version of kubevirt in cluster_init.sh, else PVC creation might fail due to incompatibility +COPY k3s-control.sh /usr/bin/k3s-control +RUN chmod +x /usr/bin/k3s-control && \ + ln -s /usr/bin/k3s-control /usr/bin/k3s-stop && \ + ln -s /usr/bin/k3s-control /usr/bin/k3s-start && \ + ln -s /usr/bin/k3s-control /usr/bin/k3s-status + ENV VIRTCTL_VERSION v1.6.0 ADD https://github.com/kubevirt/kubevirt/releases/download/${VIRTCTL_VERSION}/virtctl-${VIRTCTL_VERSION}-linux-${TARGETARCH} . diff --git a/pkg/kube/cluster-init.sh b/pkg/kube/cluster-init.sh index 30c2f47d38..4588705f28 100755 --- a/pkg/kube/cluster-init.sh +++ b/pkg/kube/cluster-init.sh @@ -202,6 +202,7 @@ setup_prereqs () { modprobe iscsi_tcp #Needed for iscsi tools mkdir -p /run/lock + mkdir -p /run/kube rm -rf /var/log ln -s "$K3S_LOG_DIR" /var/log mkdir -p "$K3S_CONFIG_DIR" @@ -251,6 +252,9 @@ config_cluster_roles() { } check_start_k3s() { + if [ -f "$K3S_STOP_FLAG" ]; then + return 1 + fi # If cluster is in transition, wait until transition is complete if [ -f "$TRANSITION_FLAG_FILE" ]; then logmsg "Cluster transition in progress, waiting before starting k3s" @@ -274,6 +278,13 @@ check_start_k3s() { pgrep -f "$K3S_SERVER_CMD" > /dev/null 2>&1 if [ $? -eq 1 ]; then + # Reset backoff if a manual start was requested + if [ -f "$K3S_MANUAL_START_FLAG" ]; then + logmsg "Manual start requested, resetting restart backoff" + rm -f "$K3S_MANUAL_START_FLAG" + RESTART_COUNT=0 + current_wait_time=$INITIAL_WAIT_TIME + fi # do exponential backoff for k3s restart, but not more than MAX_WAIT_TIME RESTART_COUNT=$((RESTART_COUNT+1)) logmsg "k3s server not running, restart wait time $current_wait_time, restart count: $RESTART_COUNT" diff --git a/pkg/kube/cluster-utils.sh b/pkg/kube/cluster-utils.sh index d6b55b8109..13f5f7be40 100755 --- a/pkg/kube/cluster-utils.sh +++ b/pkg/kube/cluster-utils.sh @@ -10,6 +10,10 @@ LOG_SIZE=$((5*1024*1024)) K3s_LOG_FILE="k3s.log" SAVE_KUBE_VAR_LIB_DIR="/persist/kube-save-var-lib" K3S_SERVER_CMD="k3s server" +# shellcheck disable=SC2034 +K3S_STOP_FLAG="/var/lib/k3s-stop" +# shellcheck disable=SC2034 +K3S_MANUAL_START_FLAG="/run/kube/k3s-start" TIE_BREAKER_NODE_LABEL="tie-breaker-node" TIE_BREAKER_NODE_LABEL_SET_VALUE="true" TIE_BREAKER_NODE_LABEL_UNSET_VALUE="false" diff --git a/pkg/kube/k3s-control.sh b/pkg/kube/k3s-control.sh new file mode 100755 index 0000000000..c7011a3c17 --- /dev/null +++ b/pkg/kube/k3s-control.sh @@ -0,0 +1,69 @@ +#!/bin/sh +# +# Copyright (c) 2026 Zededa, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# Script to manually stop/start k3s for debugging purposes +# + +K3S_LOG_DIR="/persist/kubelog" +INSTALL_LOG="${K3S_LOG_DIR}/k3s-install.log" + +# shellcheck source=pkg/kube/cluster-utils.sh +. /usr/bin/cluster-utils.sh + +# Wait for /var/lib to be ready (it might be a mount point) +# But don't wait forever +MAX_WAIT=30 +WAITED=0 +while [ ! -d /var/lib ] && [ $WAITED -lt $MAX_WAIT ]; do + sleep 1 + WAITED=$((WAITED + 1)) +done + +ACTION="$1" +if [ -z "$ACTION" ]; then + # Detect action from filename (e.g., k3s-stop -> stop) + ACTION=$(basename "$0" | sed 's/k3s-//') +fi + +case "$ACTION" in + stop) + logmsg "Manual k3s stop requested" + mkdir -p "$(dirname "$K3S_STOP_FLAG")" + touch "$K3S_STOP_FLAG" + if terminate_k3s; then + logmsg "Manual k3s stop completed" + echo "k3s stopped" + else + logmsg "Manual k3s stop failed" + echo "Failed to stop k3s" + exit 1 + fi + ;; + start) + logmsg "Manual k3s start requested" + rm -f "$K3S_STOP_FLAG" + touch "$K3S_MANUAL_START_FLAG" + logmsg "Removed stop flag, k3s should restart shortly" + echo "k3s start requested (monitor status with k3s-status)" + ;; + status) + pids=$(pgrep -f "$K3S_SERVER_CMD") + if [ -n "$pids" ]; then + echo "Status: Running (PIDs: $pids)" + else + echo "Status: Stopped" + fi + + if [ -f "$K3S_STOP_FLAG" ]; then + echo "Stop Flag: Present ($K3S_STOP_FLAG)" + else + echo "Stop Flag: Absent" + fi + ;; + *) + echo "Usage: $(basename "$0") {stop|start|status}" + exit 1 + ;; +esac