|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -euo pipefail |
| 4 | +trap 'echo "❌ Script failed at line $LINENO: $BASH_COMMAND"; exit 1' ERR |
| 5 | + |
| 6 | +# Constants |
| 7 | +readonly ETCD_CERT_DIR="/etc/etcd" |
| 8 | +readonly BACKUP_DATE=$(date '+%Y%m%d_%H%M%S') |
| 9 | +readonly BACKUP_DIR="${ETCD_CERT_DIR}/pki.bak_${BACKUP_DATE}" |
| 10 | +readonly CONTROL_PLANE_CERT_DIR="/etc/kubernetes/pki" |
| 11 | +readonly TEMP_LOCAL_ETCD_CERTS_DIR="etcd-client-certs" |
| 12 | +readonly CONTROL_PLANE_MANIFESTS_DIR="/etc/kubernetes/manifests" |
| 13 | +readonly SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" |
| 14 | +readonly BACKUP_FOLDER="./certificate_backup_${BACKUP_DATE}" |
| 15 | +readonly KUBEADM_CONFIG_BACKUP="${BACKUP_FOLDER}/kubeadm-config.yaml" |
| 16 | + |
| 17 | +# Global variables for cluster access |
| 18 | +declare cluster_name |
| 19 | +declare ssh_user |
| 20 | +declare ssh_key |
| 21 | + |
| 22 | +# Input validation |
| 23 | +function validate_inputs() { |
| 24 | + if [[ $# -ne 3 ]]; then |
| 25 | + echo "Usage: $0 <cluster-name> <ssh-user> <path-to-ssh-key>" >&2 |
| 26 | + exit 1 |
| 27 | + fi |
| 28 | + |
| 29 | + # Set global variables after validation |
| 30 | + cluster_name="$1" |
| 31 | + ssh_user="$2" |
| 32 | + ssh_key="$3" |
| 33 | +} |
| 34 | + |
| 35 | +function check_sudo_access() { |
| 36 | + if ! sudo -n true 2>/dev/null; then |
| 37 | + echo "❌ Error: This script requires sudo access. Please run with a user that has sudo privileges." >&2 |
| 38 | + exit 1 |
| 39 | + fi |
| 40 | +} |
| 41 | + |
| 42 | + |
| 43 | +# Node retrieval functions |
| 44 | +function get_etcd_nodes() { |
| 45 | + echo "Retrieving etcd node IPs for cluster: ${cluster_name}..." |
| 46 | + |
| 47 | + ETCD_NODES=($(kubectl -n eksa-system get machines \ |
| 48 | + --selector "cluster.x-k8s.io/cluster-name=${cluster_name},cluster.x-k8s.io/etcd-cluster=${cluster_name}-etcd" \ |
| 49 | + -ojsonpath='{.items[*].status.addresses[?(@.type=="ExternalIP")].address}')) |
| 50 | +} |
| 51 | + |
| 52 | +function get_control_plane_nodes() { |
| 53 | + echo "Retrieving control plane node IPs for cluster: ${cluster_name}..." |
| 54 | + |
| 55 | + CONTROL_PLANE_NODES=($(kubectl -n eksa-system get machines \ |
| 56 | + --selector "cluster.x-k8s.io/cluster-name=${cluster_name},cluster.x-k8s.io/control-plane" \ |
| 57 | + -o json | jq -r '.items[].status.addresses | map(select(.type=="ExternalIP"))[0].address')) |
| 58 | + |
| 59 | +} |
| 60 | + |
| 61 | +# Certificate management functions |
| 62 | +function backup_etcd_certs() { |
| 63 | + cat <<EOF |
| 64 | +# Backup certificates |
| 65 | +cd ${ETCD_CERT_DIR} |
| 66 | +sudo cp -r pki pki.bak_${BACKUP_DATE} |
| 67 | +sudo rm -rf pki/* |
| 68 | +sudo cp pki.bak_${BACKUP_DATE}/ca.* pki/ |
| 69 | +EOF |
| 70 | +} |
| 71 | + |
| 72 | +function renew_etcd_certs() { |
| 73 | + cat <<EOF |
| 74 | +# Renew certificates |
| 75 | +sudo etcdadm join phase certificates http://eks-a-etcd-dumb-url |
| 76 | +EOF |
| 77 | +} |
| 78 | + |
| 79 | +function validate_etcd_renewal() { |
| 80 | + cat <<EOF |
| 81 | +# Validate certificates |
| 82 | +sudo etcdctl --cacert=${ETCD_CERT_DIR}/pki/ca.crt \ |
| 83 | + --cert=${ETCD_CERT_DIR}/pki/etcdctl-etcd-client.crt \ |
| 84 | + --key=${ETCD_CERT_DIR}/pki/etcdctl-etcd-client.key \ |
| 85 | + endpoint health |
| 86 | +EOF |
| 87 | +} |
| 88 | + |
| 89 | +function process_etcd_node() { |
| 90 | + local node_ip="$1" |
| 91 | + |
| 92 | + echo "Processing etcd node: ${node_ip}..." |
| 93 | + |
| 94 | + ssh ${SSH_OPTS} -i "${ssh_key}" "${ssh_user}@${node_ip}" bash <<EOF |
| 95 | +set -euo pipefail |
| 96 | +$(backup_etcd_certs) |
| 97 | +$(renew_etcd_certs) |
| 98 | +
|
| 99 | +sudo cp /etc/etcd/pki/apiserver-etcd-client.key /home/${ssh_user}/ |
| 100 | +sudo chown ${ssh_user}:${ssh_user} /home/${ssh_user}/apiserver-etcd-client.key |
| 101 | +$(validate_etcd_renewal) |
| 102 | +EOF |
| 103 | + |
| 104 | + scp ${SSH_OPTS} -i "${ssh_key}" "${ssh_user}@${node_ip}:${ETCD_CERT_DIR}/pki/apiserver-etcd-client.crt" "${BACKUP_FOLDER}/${TEMP_LOCAL_ETCD_CERTS_DIR}/" || exit 1 |
| 105 | + scp ${SSH_OPTS} -i "${ssh_key}" "${ssh_user}@${node_ip}:/home/${ssh_user}/apiserver-etcd-client.key" "${BACKUP_FOLDER}/${TEMP_LOCAL_ETCD_CERTS_DIR}/" || exit 1 |
| 106 | + |
| 107 | + ssh ${SSH_OPTS} -i "${ssh_key}" "${ssh_user}@${node_ip}" "rm -f /home/${ssh_user}/apiserver-etcd-client.key" |
| 108 | + |
| 109 | + echo "✅ Completed renewing certificate for the ETCD node: ${node_ip}." |
| 110 | + echo "---------------------------------------------" |
| 111 | +} |
| 112 | + |
| 113 | +function update_apiserver_etcd_client_secret() { |
| 114 | + local base64_cmd |
| 115 | + |
| 116 | + if [[ "$OSTYPE" == "darwin"* ]]; then |
| 117 | + base64_cmd="base64 | tr -d '\n'" |
| 118 | + else |
| 119 | + base64_cmd="base64 -w 0" |
| 120 | + fi |
| 121 | + |
| 122 | + local crt_base64 |
| 123 | + local key_base64 |
| 124 | + crt_base64=$(cat "${BACKUP_FOLDER}/${TEMP_LOCAL_ETCD_CERTS_DIR}/apiserver-etcd-client.crt" | eval "${base64_cmd}") |
| 125 | + key_base64=$(cat "${BACKUP_FOLDER}/${TEMP_LOCAL_ETCD_CERTS_DIR}/apiserver-etcd-client.key" | eval "${base64_cmd}") |
| 126 | + |
| 127 | + kubectl patch secret "${cluster_name}-apiserver-etcd-client" -n eksa-system --type='merge' -p=" |
| 128 | +data: |
| 129 | + tls.crt: ${crt_base64} |
| 130 | + tls.key: ${key_base64} |
| 131 | +" |
| 132 | + echo "✅ Successfully updated ${cluster_name}-apiserver-etcd-client secret." |
| 133 | +} |
| 134 | + |
| 135 | +function transfer_certs_to_control_plane() { |
| 136 | + local node_ip="$1" |
| 137 | + |
| 138 | + echo "Transferring apiserver-etcd-client certificates to control plane node: ${node_ip}..." |
| 139 | + sudo scp ${SSH_OPTS} -i "${ssh_key}" -r "${BACKUP_FOLDER}/${TEMP_LOCAL_ETCD_CERTS_DIR}" "${ssh_user}@${node_ip}:." |
| 140 | + echo "External certificates transferred to control plane node: ${node_ip}." |
| 141 | +} |
| 142 | + |
| 143 | +function process_control_plane_node() { |
| 144 | + local node_ip="$1" |
| 145 | + |
| 146 | + echo "Processing control plane node: ${node_ip}..." |
| 147 | + |
| 148 | + ssh ${SSH_OPTS} -i "${ssh_key}" "${ssh_user}@${node_ip}" bash <<EOF |
| 149 | +set -euo pipefail |
| 150 | +
|
| 151 | +sudo cp -r '${CONTROL_PLANE_CERT_DIR}' '/etc/kubernetes/pki.bak_${BACKUP_DATE}' |
| 152 | +
|
| 153 | +sudo kubeadm certs renew all |
| 154 | +sudo kubeadm certs check-expiration |
| 155 | +
|
| 156 | +# Only copy etcd client certificates if external etcd exists |
| 157 | +if [[ -d "${TEMP_LOCAL_ETCD_CERTS_DIR}" ]]; then |
| 158 | + sudo cp '${TEMP_LOCAL_ETCD_CERTS_DIR}/apiserver-etcd-client.crt' '${CONTROL_PLANE_CERT_DIR}' |
| 159 | + sudo cp '${TEMP_LOCAL_ETCD_CERTS_DIR}/apiserver-etcd-client.key' '${CONTROL_PLANE_CERT_DIR}' |
| 160 | + rm -rf ${TEMP_LOCAL_ETCD_CERTS_DIR} |
| 161 | +fi |
| 162 | +
|
| 163 | +sudo mkdir -p /tmp/manifests |
| 164 | +sudo mv ${CONTROL_PLANE_MANIFESTS_DIR}/* /tmp/manifests/ |
| 165 | +sleep 20 |
| 166 | +sudo mv /tmp/manifests/* ${CONTROL_PLANE_MANIFESTS_DIR}/ |
| 167 | +
|
| 168 | +EOF |
| 169 | + |
| 170 | + echo "✅ Completed renewing certificate for the control node: ${node_ip}." |
| 171 | + echo "---------------------------------------------" |
| 172 | +} |
| 173 | + |
| 174 | +function check_api_server_reachability() { |
| 175 | + echo "✅ Checking if Kubernetes API server is reachable..." |
| 176 | + for i in {1..5}; do |
| 177 | + kubectl version --request-timeout=2m &>/dev/null && return 0 |
| 178 | + sleep 10 |
| 179 | + done |
| 180 | + |
| 181 | + echo "❌ Error: Kubernetes API server is not reachable. Aborting." >&2 |
| 182 | + exit 1 |
| 183 | +} |
| 184 | + |
| 185 | +function backup_kubeadm_config() { |
| 186 | + mkdir -p "${BACKUP_FOLDER}" |
| 187 | + echo "✅ Backing up kubeadm-config ConfigMap..." |
| 188 | + |
| 189 | + if ! kubectl -n kube-system get cm kubeadm-config -o yaml > "${KUBEADM_CONFIG_BACKUP}"; then |
| 190 | + echo "❌ Failed to backup kubeadm-config." >&2 |
| 191 | + exit 1 |
| 192 | + fi |
| 193 | + |
| 194 | + echo "✅ kubeadm-config backed up to ${KUBEADM_CONFIG_BACKUP}" |
| 195 | +} |
| 196 | + |
| 197 | +function cleanup_on_success() { |
| 198 | + if check_api_server_reachability; then |
| 199 | + echo "✅ Cleaning up temporary files..." |
| 200 | + rm -rf "${BACKUP_FOLDER}" |
| 201 | + echo "✅ All temporary files removed." |
| 202 | + else |
| 203 | + echo "❌ API server unreachable — skipping cleanup to preserve debug data." >&2 |
| 204 | + fi |
| 205 | +} |
| 206 | + |
| 207 | +function main() { |
| 208 | + validate_inputs "$@" |
| 209 | + check_sudo_access |
| 210 | + check_api_server_reachability |
| 211 | + backup_kubeadm_config |
| 212 | + |
| 213 | + # ETCD cert renewal |
| 214 | + echo "Starting etcd certificate renewal process..." |
| 215 | + get_etcd_nodes |
| 216 | + mkdir -p "${BACKUP_FOLDER}" |
| 217 | + mkdir -p "${BACKUP_FOLDER}/${TEMP_LOCAL_ETCD_CERTS_DIR}" |
| 218 | + |
| 219 | + if [[ ${#ETCD_NODES[@]} -eq 0 ]]; then |
| 220 | + echo "Cluster ${cluster_name} does not have external ETCD." >&2 |
| 221 | + else |
| 222 | + for node_ip in "${ETCD_NODES[@]}"; do |
| 223 | + process_etcd_node "${node_ip}" |
| 224 | + done |
| 225 | + |
| 226 | + update_apiserver_etcd_client_secret |
| 227 | + echo "🎉 Etcd certificate renewal process completed successfully." |
| 228 | + fi |
| 229 | + |
| 230 | + # Control plane cert renewal |
| 231 | + echo "Starting control plane certificate renewal process..." |
| 232 | + get_control_plane_nodes |
| 233 | + |
| 234 | + if [[ ${#CONTROL_PLANE_NODES[@]} -eq 0 ]]; then |
| 235 | + echo "❌ Error: No control plane node IPs found for cluster: ${cluster_name}." >&2 |
| 236 | + exit 1 |
| 237 | + else |
| 238 | + for node_ip in "${CONTROL_PLANE_NODES[@]}"; do |
| 239 | + if [[ ${#ETCD_NODES[@]} -ne 0 ]]; then |
| 240 | + transfer_certs_to_control_plane "${node_ip}" |
| 241 | + fi |
| 242 | + check_api_server_reachability |
| 243 | + process_control_plane_node "${node_ip}" |
| 244 | + done |
| 245 | + fi |
| 246 | + |
| 247 | + echo "🎉 Control plane certificate renewal process completed successfully." |
| 248 | + |
| 249 | + cleanup_on_success |
| 250 | +} |
| 251 | + |
| 252 | +main "$@" |
0 commit comments