Skip to content

Commit 40c0b4c

Browse files
xmudriikubermatic-bot
authored andcommitted
Implement Retryable tasks (#328)
Signed-off-by: Marko Mudrinić <[email protected]>
1 parent cbda3e6 commit 40c0b4c

File tree

3 files changed

+98
-39
lines changed

3 files changed

+98
-39
lines changed

pkg/installer/installation/install.go

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,38 +21,36 @@ import (
2121

2222
"github.com/kubermatic/kubeone/pkg/certificate"
2323
"github.com/kubermatic/kubeone/pkg/features"
24+
"github.com/kubermatic/kubeone/pkg/task"
2425
"github.com/kubermatic/kubeone/pkg/templates/machinecontroller"
2526
"github.com/kubermatic/kubeone/pkg/util"
2627
)
2728

2829
// Install performs all the steps required to install Kubernetes on
2930
// an empty, pristine machine.
3031
func Install(ctx *util.Context) error {
31-
installSteps := []struct {
32-
fn func(*util.Context) error
33-
errMsg string
34-
}{
35-
{fn: installPrerequisites, errMsg: "failed to install prerequisites"},
36-
{fn: generateKubeadm, errMsg: "failed to generate kubeadm config files"},
37-
{fn: kubeadmCertsOnLeader, errMsg: "failed to provision certs and etcd on leader"},
38-
{fn: certificate.DownloadCA, errMsg: "unable to download ca from leader"},
39-
{fn: deployCA, errMsg: "unable to deploy ca on nodes"},
40-
{fn: kubeadmCertsOnFollower, errMsg: "failed to provision certs and etcd on followers"},
41-
{fn: initKubernetesLeader, errMsg: "failed to init kubernetes on leader"},
42-
{fn: joinControlplaneNode, errMsg: "unable to join other masters a cluster"},
43-
{fn: copyKubeconfig, errMsg: "unable to copy kubeconfig to home directory"},
44-
{fn: saveKubeconfig, errMsg: "unable to save kubeconfig to the local machine"},
45-
{fn: util.BuildKubernetesClientset, errMsg: "unable to build kubernetes clientset"},
46-
{fn: features.Activate, errMsg: "unable to activate features"},
47-
{fn: applyCanalCNI, errMsg: "failed to install cni plugin canal"},
48-
{fn: machinecontroller.EnsureMachineController, errMsg: "failed to install machine-controller"},
49-
{fn: machinecontroller.WaitReady, errMsg: "failed to wait for machine-controller"},
50-
{fn: createWorkerMachines, errMsg: "failed to create worker machines"},
32+
installSteps := []task.Task{
33+
{Fn: installPrerequisites, ErrMsg: "failed to install prerequisites"},
34+
{Fn: generateKubeadm, ErrMsg: "failed to generate kubeadm config files"},
35+
{Fn: kubeadmCertsOnLeader, ErrMsg: "failed to provision certs and etcd on leader"},
36+
{Fn: certificate.DownloadCA, ErrMsg: "unable to download ca from leader", Retries: 3},
37+
{Fn: deployCA, ErrMsg: "unable to deploy ca on nodes", Retries: 3},
38+
{Fn: kubeadmCertsOnFollower, ErrMsg: "failed to provision certs and etcd on followers"},
39+
{Fn: initKubernetesLeader, ErrMsg: "failed to init kubernetes on leader"},
40+
{Fn: joinControlplaneNode, ErrMsg: "unable to join other masters a cluster"},
41+
{Fn: copyKubeconfig, ErrMsg: "unable to copy kubeconfig to home directory", Retries: 3},
42+
{Fn: saveKubeconfig, ErrMsg: "unable to save kubeconfig to the local machine", Retries: 3},
43+
{Fn: util.BuildKubernetesClientset, ErrMsg: "unable to build kubernetes clientset", Retries: 3},
44+
{Fn: features.Activate, ErrMsg: "unable to activate features"},
45+
{Fn: applyCanalCNI, ErrMsg: "failed to install cni plugin canal", Retries: 3},
46+
{Fn: machinecontroller.EnsureMachineController, ErrMsg: "failed to install machine-controller", Retries: 3},
47+
{Fn: machinecontroller.WaitReady, ErrMsg: "failed to wait for machine-controller", Retries: 3},
48+
{Fn: createWorkerMachines, ErrMsg: "failed to create worker machines", Retries: 3},
5149
}
5250

5351
for _, step := range installSteps {
54-
if err := step.fn(ctx); err != nil {
55-
return errors.Wrap(err, step.errMsg)
52+
if err := step.Run(ctx); err != nil {
53+
return errors.Wrap(err, step.ErrMsg)
5654
}
5755
}
5856

pkg/task/task.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
Copyright 2019 The KubeOne Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package task
18+
19+
import (
20+
"time"
21+
22+
"github.com/kubermatic/kubeone/pkg/util"
23+
24+
"k8s.io/apimachinery/pkg/util/wait"
25+
)
26+
27+
// defaultRetryBackoff is backoff with with duration of 5 seconds and factor of 2.0
28+
func defaultRetryBackoff(retries int) wait.Backoff {
29+
return wait.Backoff{
30+
Steps: retries,
31+
Duration: 5 * time.Second,
32+
Factor: 2.0,
33+
}
34+
}
35+
36+
// Task is a runnable task
37+
type Task struct {
38+
Fn func(*util.Context) error
39+
ErrMsg string
40+
Retries int
41+
}
42+
43+
// RunTask runs a task
44+
func (t *Task) Run(ctx *util.Context) error {
45+
if t.Retries == 0 {
46+
t.Retries = 1
47+
}
48+
backoff := defaultRetryBackoff(t.Retries)
49+
50+
var lastError error
51+
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
52+
lastError = t.Fn(ctx)
53+
if lastError != nil {
54+
ctx.Logger.Warn("Task failed, retrying…")
55+
return false, nil
56+
}
57+
return true, nil
58+
})
59+
if err == wait.ErrWaitTimeout {
60+
err = lastError
61+
}
62+
return err
63+
}

pkg/upgrader/upgrade/upgrade.go

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"github.com/kubermatic/kubeone/pkg/certificate"
2525
"github.com/kubermatic/kubeone/pkg/features"
26+
"github.com/kubermatic/kubeone/pkg/task"
2627
"github.com/kubermatic/kubeone/pkg/templates/machinecontroller"
2728
"github.com/kubermatic/kubeone/pkg/util"
2829
)
@@ -42,26 +43,23 @@ const (
4243
// cluster provisioned using KubeOne
4344
func Upgrade(ctx *util.Context) error {
4445
// commonSteps are same for all worker nodes and they are safe to be run in parallel
45-
commonSteps := []struct {
46-
fn func(ctx *util.Context) error
47-
errMsg string
48-
}{
49-
{fn: util.BuildKubernetesClientset, errMsg: "unable to build kubernetes clientset"},
50-
{fn: determineHostname, errMsg: "unable to determine hostname"},
51-
{fn: determineOS, errMsg: "unable to determine operating system"},
52-
{fn: runPreflightChecks, errMsg: "preflight checks failed"},
53-
{fn: upgradeLeader, errMsg: "unable to upgrade leader control plane"},
54-
{fn: upgradeFollower, errMsg: "unable to upgrade follower control plane"},
55-
{fn: features.Activate, errMsg: "unable to activate features"},
56-
{fn: certificate.DownloadCA, errMsg: "unable to download ca from leader"},
57-
{fn: machinecontroller.EnsureMachineController, errMsg: "failed to update machine-controller"},
58-
{fn: machinecontroller.WaitReady, errMsg: "failed to wait for machine-controller"},
59-
{fn: upgradeMachineDeployments, errMsg: "unable to upgrade MachineDeployments"},
46+
commonSteps := []task.Task{
47+
{Fn: util.BuildKubernetesClientset, ErrMsg: "unable to build kubernetes clientset"},
48+
{Fn: determineHostname, ErrMsg: "unable to determine hostname"},
49+
{Fn: determineOS, ErrMsg: "unable to determine operating system"},
50+
{Fn: runPreflightChecks, ErrMsg: "preflight checks failed"},
51+
{Fn: upgradeLeader, ErrMsg: "unable to upgrade leader control plane", Retries: 3},
52+
{Fn: upgradeFollower, ErrMsg: "unable to upgrade follower control plane", Retries: 3},
53+
{Fn: features.Activate, ErrMsg: "unable to activate features"},
54+
{Fn: certificate.DownloadCA, ErrMsg: "unable to download ca from leader", Retries: 3},
55+
{Fn: machinecontroller.EnsureMachineController, ErrMsg: "failed to update machine-controller", Retries: 3},
56+
{Fn: machinecontroller.WaitReady, ErrMsg: "failed to wait for machine-controller", Retries: 3},
57+
{Fn: upgradeMachineDeployments, ErrMsg: "unable to upgrade MachineDeployments", Retries: 3},
6058
}
6159

6260
for _, step := range commonSteps {
63-
if err := step.fn(ctx); err != nil {
64-
return errors.Wrap(err, step.errMsg)
61+
if err := step.Run(ctx); err != nil {
62+
return errors.Wrap(err, step.ErrMsg)
6563
}
6664
}
6765

0 commit comments

Comments
 (0)