Skip to content
This repository was archived by the owner on Sep 30, 2020. It is now read-only.

Commit 6bf0e67

Browse files
Implement a simple upgradeHelper plugin which will disable existing controllers when a new kubernetes release is being rolled out. (#1678)
Save mutating and validating webhooks before install-kube-system runs and restore again afterwards. Webhook feature can be toggled using 'disableWebhooks' boolean plugin config entry.
1 parent f073d3d commit 6bf0e67

File tree

4 files changed

+327
-1
lines changed

4 files changed

+327
-1
lines changed

builtin/files/cluster.yaml.tmpl

+15-1
Original file line numberDiff line numberDiff line change
@@ -1573,7 +1573,9 @@ kubeAwsPlugins:
15731573
# See plugins/aws-iam-authenticator/plugin.yaml for more info
15741574
awsIamAuthenticator:
15751575
enabled: false
1576-
# see plugins/cluster-autoscaler/plugin.yaml for more info
1576+
1577+
# clusterAutoscaler provides kubernetes cluster-autoscaler functionality - https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
1578+
# Replaces original built-in functionality with a plugin and upgrades to the latest version
15771579
clusterAutoscaler:
15781580
enabled: false
15791581
replicas: 2
@@ -1604,3 +1606,15 @@ kubeAwsPlugins:
16041606
# selectors for autodiscovery
16051607
selector:
16061608
prometheus: monitoring
1609+
1610+
# upgradeHelper - assists when rolling out new versions of kubernetes
1611+
# It actively disables old controllers and temporarily removes mutating/validating webhooks whilst
1612+
# the upgraded controller is starting up.
1613+
# NOTE: You will normally not need this plugin - so ONLY enable if you are experiencing issues testing migrating across versions.
1614+
# It will only kill controller that are a different release from currently spinning up version, e.g. :-
1615+
# it will kill v1.13.2 controllers when rolling out v1.14.0
1616+
# it will NOT kill v1.14.0 controllers when rolling out v1.14.3
1617+
upgradeHelper:
1618+
enabled: false
1619+
# disableWebhooks can be used to turn off the webhook feature if required
1620+
disableWebhooks: true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
# Restore webhooks that were exported and then deleted by upgrade-helper.sh
3+
4+
retries=5
5+
hyperkube_image="{{ .Config.HyperkubeImage.RepoWithTag }}"
6+
disable_webhooks="{{ if .Values.disableWebhooks }}true{{else}}false{{end}}"
7+
8+
kubectl() {
9+
/usr/bin/docker run -i --rm -v /etc/kubernetes:/etc/kubernetes:ro --net=host ${hyperkube_image} /hyperkube kubectl --kubeconfig=/etc/kubernetes/kubeconfig/admin.yaml "$@"
10+
}
11+
12+
list_not_empty() {
13+
local file=$1
14+
if ! [[ -s $file ]]; then
15+
return 1
16+
fi
17+
if cat $file | grep -se 'items: \[\]'; then
18+
return 1
19+
fi
20+
return 0
21+
}
22+
23+
applyall() {
24+
kubectl apply --force -f $(echo "$@" | tr ' ' ',')
25+
}
26+
27+
restore_webhooks() {
28+
local type=$1
29+
local file=$2
30+
31+
if list_not_empty $file; then
32+
echo "Restoring all ${type} webhooks from ${file}"
33+
applyall $file
34+
else
35+
echo "no webhooks to restore in $file"
36+
fi
37+
}
38+
39+
if [[ "${disable_webhooks}" == "true" ]]; then
40+
echo "Restoring all validating and mutating webhooks..."
41+
restore_webhooks validating /srv/kubernetes/validating_webhooks.yaml
42+
restore_webhooks mutating /srv/kubernetes/mutating_webhooks.yaml
43+
fi
44+
exit 0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
#!/bin/bash
2+
# Smooths upgrades/roll-backs where the release of kubernetes jumps a release
3+
# It kills old controllers so that this one takes over all api functions, so we don't get an
4+
# extended period of old and new running side-by-side and the incompatibilities that this can bring.
5+
# It also removes any mutating and validating webhooks in the system so that install-kube-system can run without interference.
6+
#
7+
# A request to disable is a configmap matching the hostname and kubernetes version containing a list of core service to stop: -
8+
# apiVersion: v1
9+
# kind: ConfigMap
10+
# metadata:
11+
# name: kube-aws-migration-disable-ip-10-29-26-83.us-west-2.compute.internal
12+
# namespace: kube-system
13+
# data:
14+
# kubernetesVersion: v1.9.3
15+
# disable: "kube-apiserver kube-controller-manager kube-scheduler"
16+
17+
retries=5
18+
hyperkube_image="{{ .Config.HyperkubeImage.RepoWithTag }}"
19+
my_kubernetes_version="{{ .Config.HyperkubeImage.Tag }}"
20+
myhostname=$(hostname -f)
21+
disable_webhooks="{{ if .Values.disableWebhooks }}true{{else}}false{{end}}"
22+
23+
kubectl() {
24+
/usr/bin/docker run -i --rm -v /etc/kubernetes:/etc/kubernetes:ro --net=host ${hyperkube_image} /hyperkube kubectl --kubeconfig=/etc/kubernetes/kubeconfig/admin.yaml "$@"
25+
}
26+
27+
kubectl_with_retries() {
28+
local tries=0
29+
local result_text=""
30+
local return_code=0
31+
32+
while [ "$tries" -lt "$retries" ]; do
33+
result_text=$(kubectl "$@")
34+
return_code=$?
35+
if [ "$return_code" -eq "0" ]; then
36+
echo "${result_text}"
37+
break
38+
fi
39+
sleep 10
40+
tries=$((tries+1))
41+
done
42+
return $return_code
43+
}
44+
45+
log() {
46+
echo "$@" >&2
47+
}
48+
49+
get_masters() {
50+
kubectl get nodes -l kubernetes.io/role=master --no-headers -o custom-columns=NAME:metadata.name,VERSION:status.nodeInfo.kubeletVersion | awk '{printf "%s:%s\n", $1, $2}'
51+
}
52+
53+
valid_version() {
54+
match=$(echo $1 | awk -e '(/^v[0-9]+\.[0-9]+\.[0-9]+/){print "match"}')
55+
[[ "$match" == "match" ]]
56+
}
57+
58+
version_jumps() {
59+
# only a minor release change is NOT a version jump
60+
if [[ "${1%.*}" != "${2%.*}" ]]; then
61+
return 0
62+
fi
63+
return 1
64+
}
65+
66+
# stop a controller by writing a special kube-aws disable service configmap
67+
disable_controller() {
68+
local controller=$1
69+
local version=$2
70+
71+
local request="$(cat <<EOT
72+
apiVersion: v1
73+
kind: ConfigMap
74+
metadata:
75+
name: kube-aws-migration-disable-${controller}
76+
namespace: kube-system
77+
data:
78+
kubernetesVersion: ${version}
79+
disable: "kube-controller-manager kube-scheduler kube-apiserver"
80+
EOT
81+
)"
82+
83+
log "Creating disable service configmap kube-system/kube-aws-migration-disable-${controller}"
84+
echo "${request}" | kubectl_with_retries -n kube-system apply -f - || return 1
85+
return 0
86+
}
87+
88+
find_pod() {
89+
local name=$1
90+
local host=$2
91+
92+
kubectl -n kube-system get pod "${name}-${host}" --no-headers -o wide --ignore-not-found
93+
}
94+
95+
node_running() {
96+
local node=$1
97+
98+
ready=$(kubectl -n kube-system get node "${node}" --no-headers --ignore-not-found | awk '{print $2}')
99+
if [[ "${ready}" == "Ready" ]]; then
100+
return 0
101+
fi
102+
103+
return 1
104+
}
105+
106+
wait_stopped() {
107+
local controllers=$1
108+
log ""
109+
log "WAITING FOR ALL MATCHED CONTROLLERS TO STOP:-"
110+
log "${controllers}"
111+
log ""
112+
113+
local test=1
114+
while [ "$test" -eq "1" ]; do
115+
test=0
116+
117+
for cont in $controllers; do
118+
if node_running $cont; then
119+
test=1
120+
fi
121+
done
122+
123+
if [ "$test" -eq "1" ]; then
124+
log "Controllers still active, waiting 5 seconds..."
125+
sleep 5
126+
fi
127+
done
128+
}
129+
130+
save_webhooks() {
131+
local type=$1
132+
local file=$2
133+
134+
echo "Storing and removing all ${type} webhooks to ${file}"
135+
if [[ -s $file ]]; then
136+
echo "$file already saved"
137+
else
138+
kubectl get ${type}webhookconfigurations -o yaml --export >$file
139+
if list_not_empty $file; then
140+
echo "deleting $type webhooks..."
141+
ensuredelete $file
142+
fi
143+
fi
144+
}
145+
146+
list_not_empty() {
147+
local file=$1
148+
if ! [[ -s $file ]]; then
149+
return 1
150+
fi
151+
if cat $file | grep -se 'items: \[\]'; then
152+
return 1
153+
fi
154+
return 0
155+
}
156+
157+
ensuredelete() {
158+
kubectl delete --cascade=true --ignore-not-found=true -f $(echo "$@" | tr ' ' ',')
159+
}
160+
161+
# MAIN
162+
163+
if ! $(valid_version ${my_kubernetes_version}); then
164+
log "My kubernetes version ${my_kubernetes_version} is invalid - aborting!"
165+
exit 1
166+
fi
167+
168+
while ! kubectl get ns kube-system; do
169+
echo "waiting for apiserver to be available..."
170+
sleep 3
171+
done
172+
173+
# Disable all mutating and validating webhooks because they can interfere with the stack migration)
174+
if [[ "${disable_webhooks}" == "true" ]]; then
175+
echo "Storing and removing all validating and mutating webhooks..."
176+
save_webhooks validating /srv/kubernetes/validating_webhooks.yaml
177+
save_webhooks mutating /srv/kubernetes/mutating_webhooks.yaml
178+
fi
179+
180+
log ""
181+
log "CHECKING CONTROLLER VERSIONS..."
182+
log ""
183+
found=""
184+
for controller in $(get_masters); do
185+
controller_name=$(echo "${controller%%:*}")
186+
controller_version=$(echo "${controller##*:}")
187+
if [[ "${controller_name}" != "$myhostname" ]]; then
188+
if ! $(valid_version ${controller_version}); then
189+
log "Controller ${controller_name} has an invalid version number ${controller_version}!"
190+
continue
191+
fi
192+
193+
if $(version_jumps ${my_kubernetes_version} ${controller_version}); then
194+
log "Detected a version jump on ${controller_name}: my version is ${my_kubernetes_version} and theirs is ${controller_version}"
195+
log "Disabling kube-apiserver, kube-scheduler and kube-controller-manager..."
196+
if [[ -z "${found}" ]]; then
197+
found="${controller_name}"
198+
else
199+
found="${found} ${controller_name}"
200+
fi
201+
disable_controller ${controller_name} ${controller_version}
202+
else
203+
log "No version jump on ${controller_name}: my version is ${my_kubernetes_version} and theirs is ${controller_version}"
204+
fi
205+
fi
206+
done
207+
208+
if [[ -n "${found}" ]]; then
209+
log ""
210+
log "WAITING FOR FOUND CONTROLLERS TO STOP..."
211+
log ""
212+
wait_stopped "${found}"
213+
fi
214+
exit 0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
metadata:
2+
name: upgrade-helper
3+
version: 0.1.0
4+
spec:
5+
cluster:
6+
values:
7+
disableWebhooks: true
8+
machine:
9+
roles:
10+
controller:
11+
files:
12+
- path: /etc/systemd/system/install-kube-system.service.d/10-upgrade-helper-dependency.conf
13+
permissions: 0644
14+
content: |
15+
[Unit]
16+
Requires=kube-aws-upgrade-helper.service
17+
After=kube-aws-upgrade-helper.service
18+
Before=restore-webhooks.service
19+
20+
[Service]
21+
ExecStartPre=/usr/bin/bash -c "until /usr/bin/systemctl is-active kube-aws-upgrade-helper.service; do echo waiting until kube-aws-upgrade-helper.service starts; sleep 10; done"
22+
- path: /opt/bin/upgrade-helper.sh
23+
permissions: 0755
24+
source:
25+
path: assets/upgrade-helper.sh
26+
- path: /opt/bin/restore-webhooks.sh
27+
permissions: 0755
28+
source:
29+
path: assets/restore-webhooks.sh
30+
systemd:
31+
units:
32+
- name: kube-aws-upgrade-helper.service
33+
content: |
34+
[Unit]
35+
Requires=kubelet.service
36+
After=kubelet.service
37+
Before=install-kube-system.service
38+
39+
[Service]
40+
Type=oneshot
41+
StartLimitInterval=0
42+
RemainAfterExit=true
43+
ExecStart=/usr/bin/bash -c '/opt/bin/upgrade-helper.sh'
44+
- name: restore-webhooks.service
45+
content: |
46+
[Unit]
47+
Requires=install-kube-system.service
48+
After=install-kube-system.service
49+
50+
[Service]
51+
Type=oneshot
52+
StartLimitInterval=0
53+
RemainAfterExit=true
54+
ExecStart=/usr/bin/bash -c '/opt/bin/restore-webhooks.sh'

0 commit comments

Comments
 (0)