1
+ #! /bin/bash
2
+ # Smooths upgrades/roll-backs where the release of kubernetes jumps a release
3
+ # It kills old controllers so that this one takes over all api functions, so we don't get an
4
+ # extended period of old and new running side-by-side and the incompatibilities that this can bring.
5
+ # It also removes any mutating and validating webhooks in the system so that install-kube-system can run without interference.
6
+ #
7
+ # A request to disable is a configmap matching the hostname and kubernetes version containing a list of core service to stop: -
8
+ # apiVersion: v1
9
+ # kind: ConfigMap
10
+ # metadata:
11
+ # name: kube-aws-migration-disable-ip-10-29-26-83.us-west-2.compute.internal
12
+ # namespace: kube-system
13
+ # data:
14
+ # kubernetesVersion: v1.9.3
15
+ # disable: "kube-apiserver kube-controller-manager kube-scheduler"
16
+
17
+ retries=5
18
+ hyperkube_image=" {{ .Config.HyperkubeImage.RepoWithTag }}"
19
+ my_kubernetes_version=" {{ .Config.HyperkubeImage.Tag }}"
20
+ myhostname=$( hostname -f)
21
+ disable_webhooks=" {{ if .Values.disableWebhooks }}true{{else}}false{{end}}"
22
+
23
+ kubectl () {
24
+ /usr/bin/docker run -i --rm -v /etc/kubernetes:/etc/kubernetes:ro --net=host ${hyperkube_image} /hyperkube kubectl --kubeconfig=/etc/kubernetes/kubeconfig/admin.yaml " $@ "
25
+ }
26
+
27
+ kubectl_with_retries () {
28
+ local tries=0
29
+ local result_text=" "
30
+ local return_code=0
31
+
32
+ while [ " $tries " -lt " $retries " ]; do
33
+ result_text=$( kubectl " $@ " )
34
+ return_code=$?
35
+ if [ " $return_code " -eq " 0" ]; then
36
+ echo " ${result_text} "
37
+ break
38
+ fi
39
+ sleep 10
40
+ tries=$(( tries+ 1 ))
41
+ done
42
+ return $return_code
43
+ }
44
+
45
+ log () {
46
+ echo " $@ " >&2
47
+ }
48
+
49
+ get_masters () {
50
+ kubectl get nodes -l kubernetes.io/role=master --no-headers -o custom-columns=NAME:metadata.name,VERSION:status.nodeInfo.kubeletVersion | awk ' {printf "%s:%s\n", $1, $2}'
51
+ }
52
+
53
+ valid_version () {
54
+ match=$( echo $1 | awk -e ' (/^v[0-9]+\.[0-9]+\.[0-9]+/){print "match"}' )
55
+ [[ " $match " == " match" ]]
56
+ }
57
+
58
+ version_jumps () {
59
+ # only a minor release change is NOT a version jump
60
+ if [[ " ${1% .* } " != " ${2% .* } " ]]; then
61
+ return 0
62
+ fi
63
+ return 1
64
+ }
65
+
66
+ # stop a controller by writing a special kube-aws disable service configmap
67
+ disable_controller () {
68
+ local controller=$1
69
+ local version=$2
70
+
71
+ local request=" $( cat << EOT
72
+ apiVersion: v1
73
+ kind: ConfigMap
74
+ metadata:
75
+ name: kube-aws-migration-disable-${controller}
76
+ namespace: kube-system
77
+ data:
78
+ kubernetesVersion: ${version}
79
+ disable: "kube-controller-manager kube-scheduler kube-apiserver"
80
+ EOT
81
+ ) "
82
+
83
+ log " Creating disable service configmap kube-system/kube-aws-migration-disable-${controller} "
84
+ echo " ${request} " | kubectl_with_retries -n kube-system apply -f - || return 1
85
+ return 0
86
+ }
87
+
88
+ find_pod () {
89
+ local name=$1
90
+ local host=$2
91
+
92
+ kubectl -n kube-system get pod " ${name} -${host} " --no-headers -o wide --ignore-not-found
93
+ }
94
+
95
+ node_running () {
96
+ local node=$1
97
+
98
+ ready=$( kubectl -n kube-system get node " ${node} " --no-headers --ignore-not-found | awk ' {print $2}' )
99
+ if [[ " ${ready} " == " Ready" ]]; then
100
+ return 0
101
+ fi
102
+
103
+ return 1
104
+ }
105
+
106
+ wait_stopped () {
107
+ local controllers=$1
108
+ log " "
109
+ log " WAITING FOR ALL MATCHED CONTROLLERS TO STOP:-"
110
+ log " ${controllers} "
111
+ log " "
112
+
113
+ local test=1
114
+ while [ " $test " -eq " 1" ]; do
115
+ test=0
116
+
117
+ for cont in $controllers ; do
118
+ if node_running $cont ; then
119
+ test=1
120
+ fi
121
+ done
122
+
123
+ if [ " $test " -eq " 1" ]; then
124
+ log " Controllers still active, waiting 5 seconds..."
125
+ sleep 5
126
+ fi
127
+ done
128
+ }
129
+
130
+ save_webhooks () {
131
+ local type=$1
132
+ local file=$2
133
+
134
+ echo " Storing and removing all ${type} webhooks to ${file} "
135
+ if [[ -s $file ]]; then
136
+ echo " $file already saved"
137
+ else
138
+ kubectl get ${type} webhookconfigurations -o yaml --export > $file
139
+ if list_not_empty $file ; then
140
+ echo " deleting $type webhooks..."
141
+ ensuredelete $file
142
+ fi
143
+ fi
144
+ }
145
+
146
+ list_not_empty () {
147
+ local file=$1
148
+ if ! [[ -s $file ]]; then
149
+ return 1
150
+ fi
151
+ if cat $file | grep -se ' items: \[\]' ; then
152
+ return 1
153
+ fi
154
+ return 0
155
+ }
156
+
157
+ ensuredelete () {
158
+ kubectl delete --cascade=true --ignore-not-found=true -f $( echo " $@ " | tr ' ' ' ,' )
159
+ }
160
+
161
+ # MAIN
162
+
163
+ if ! $( valid_version ${my_kubernetes_version} ) ; then
164
+ log " My kubernetes version ${my_kubernetes_version} is invalid - aborting!"
165
+ exit 1
166
+ fi
167
+
168
+ while ! kubectl get ns kube-system; do
169
+ echo " waiting for apiserver to be available..."
170
+ sleep 3
171
+ done
172
+
173
+ # Disable all mutating and validating webhooks because they can interfere with the stack migration)
174
+ if [[ " ${disable_webhooks} " == " true" ]]; then
175
+ echo " Storing and removing all validating and mutating webhooks..."
176
+ save_webhooks validating /srv/kubernetes/validating_webhooks.yaml
177
+ save_webhooks mutating /srv/kubernetes/mutating_webhooks.yaml
178
+ fi
179
+
180
+ log " "
181
+ log " CHECKING CONTROLLER VERSIONS..."
182
+ log " "
183
+ found=" "
184
+ for controller in $( get_masters) ; do
185
+ controller_name=$( echo " ${controller%%:* } " )
186
+ controller_version=$( echo " ${controller##*: } " )
187
+ if [[ " ${controller_name} " != " $myhostname " ]]; then
188
+ if ! $( valid_version ${controller_version} ) ; then
189
+ log " Controller ${controller_name} has an invalid version number ${controller_version} !"
190
+ continue
191
+ fi
192
+
193
+ if $( version_jumps ${my_kubernetes_version} ${controller_version} ) ; then
194
+ log " Detected a version jump on ${controller_name} : my version is ${my_kubernetes_version} and theirs is ${controller_version} "
195
+ log " Disabling kube-apiserver, kube-scheduler and kube-controller-manager..."
196
+ if [[ -z " ${found} " ]]; then
197
+ found=" ${controller_name} "
198
+ else
199
+ found=" ${found} ${controller_name} "
200
+ fi
201
+ disable_controller ${controller_name} ${controller_version}
202
+ else
203
+ log " No version jump on ${controller_name} : my version is ${my_kubernetes_version} and theirs is ${controller_version} "
204
+ fi
205
+ fi
206
+ done
207
+
208
+ if [[ -n " ${found} " ]]; then
209
+ log " "
210
+ log " WAITING FOR FOUND CONTROLLERS TO STOP..."
211
+ log " "
212
+ wait_stopped " ${found} "
213
+ fi
214
+ exit 0
0 commit comments