Skip to content

Commit 6d01ef5

Browse files
authored
Merge pull request #188 from alexander-demicev/maxsurge
Add maxsurge option for control plane upgrade
2 parents 337ecdf + 37320ae commit 6d01ef5

10 files changed

Lines changed: 212 additions & 14 deletions

File tree

controlplane/api/v1alpha1/rke2controlplane_types.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package v1alpha1
1919
import (
2020
corev1 "k8s.io/api/core/v1"
2121
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+
"k8s.io/apimachinery/pkg/util/intstr"
2223

2324
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2425

@@ -71,6 +72,11 @@ type RKE2ControlPlaneSpec struct {
7172
// the registration type is "address". Its for scenarios where a load-balancer or VIP is used.
7273
// +optional
7374
RegistrationAddress string `json:"registrationAddress,omitempty"`
75+
76+
// The RolloutStrategy to use to replace control plane machines with new ones.
77+
// +optional
78+
// +kubebuilder:default={type: "RollingUpdate", rollingUpdate: {maxSurge: 1}}
79+
RolloutStrategy *RolloutStrategy `json:"rolloutStrategy,omitempty"`
7480
}
7581

7682
// RKE2ServerConfig specifies configuration for the agent nodes.
@@ -343,6 +349,40 @@ const (
343349
MetricsServer DisabledPluginComponent = "rke2-metrics-server"
344350
)
345351

352+
// RolloutStrategy describes how to replace existing machines
353+
// with new ones.
354+
type RolloutStrategy struct {
355+
// Type of rollout. Currently the only supported strategy is "RollingUpdate".
356+
// Default is RollingUpdate.
357+
// +optional
358+
Type RolloutStrategyType `json:"type,omitempty"`
359+
360+
// Rolling update config params. Present only if RolloutStrategyType = RollingUpdate.
361+
// +optional
362+
RollingUpdate *RollingUpdate `json:"rollingUpdate,omitempty"`
363+
}
364+
365+
// RollingUpdate is used to control the desired behavior of rolling update.
366+
type RollingUpdate struct {
367+
// The maximum number of control planes that can be scheduled above or under the
368+
// desired number of control planes.
369+
// Value can be an absolute number 1 or 0.
370+
// Defaults to 1.
371+
// Example: when this is set to 1, the control plane can be scaled
372+
// up immediately when the rolling update starts.
373+
// +optional
374+
MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty"`
375+
}
376+
377+
// RolloutStrategyType defines the rollout strategies for a RKE2ControlPlane.
378+
type RolloutStrategyType string
379+
380+
const (
381+
// RollingUpdateStrategyType replaces the old control planes by new one using rolling update
382+
// i.e. gradually scale up or down the old control planes and scale up or down the new one.
383+
RollingUpdateStrategyType RolloutStrategyType = "RollingUpdate"
384+
)
385+
346386
func init() { //nolint:gochecknoinits
347387
SchemeBuilder.Register(&RKE2ControlPlane{}, &RKE2ControlPlaneList{})
348388
}

controlplane/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 46 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_rke2controlplanes.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,34 @@ spec:
605605
description: Replicas is the number of replicas for the Control Plane.
606606
format: int32
607607
type: integer
608+
rolloutStrategy:
609+
default:
610+
rollingUpdate:
611+
maxSurge: 1
612+
type: RollingUpdate
613+
description: The RolloutStrategy to use to replace control plane machines
614+
with new ones.
615+
properties:
616+
rollingUpdate:
617+
description: Rolling update config params. Present only if RolloutStrategyType
618+
= RollingUpdate.
619+
properties:
620+
maxSurge:
621+
anyOf:
622+
- type: integer
623+
- type: string
624+
description: 'The maximum number of control planes that can
625+
be scheduled above or under the desired number of control
626+
planes. Value can be an absolute number 1 or 0. Defaults
627+
to 1. Example: when this is set to 1, the control plane
628+
can be scaled up immediately when the rolling update starts.'
629+
x-kubernetes-int-or-string: true
630+
type: object
631+
type:
632+
description: Type of rollout. Currently the only supported strategy
633+
is "RollingUpdate". Default is RollingUpdate.
634+
type: string
635+
type: object
608636
serverConfig:
609637
description: ServerConfig specifies configuration for the agent nodes.
610638
properties:

controlplane/internal/controllers/rke2controlplane_controller.go

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ func patchRKE2ControlPlane(ctx context.Context, patchHelper *patch.Helper, rcp *
217217
func (r *RKE2ControlPlaneReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
218218
c, err := ctrl.NewControllerManagedBy(mgr).
219219
For(&controlplanev1.RKE2ControlPlane{}).
220+
Owns(&clusterv1.Machine{}).
220221
Build(r)
221222
if err != nil {
222223
return errors.Wrap(err, "failed setting up with a controller manager")
@@ -742,14 +743,22 @@ func (r *RKE2ControlPlaneReconciler) upgradeControlPlane(
742743
return ctrl.Result{}, err
743744
}
744745

745-
status := workloadCluster.ClusterStatus()
746+
switch rcp.Spec.RolloutStrategy.Type {
747+
case controlplanev1.RollingUpdateStrategyType:
748+
// RolloutStrategy is currently defaulted and validated to be RollingUpdate.
749+
maxNodes := *rcp.Spec.Replicas + int32(rcp.Spec.RolloutStrategy.RollingUpdate.MaxSurge.IntValue())
750+
if int32(controlPlane.Machines.Len()) < maxNodes {
751+
// scaleUpControlPlane ensures that we don't continue scaling up while waiting for Machines to have NodeRefs
752+
return r.scaleUpControlPlane(ctx, cluster, rcp, controlPlane)
753+
}
746754

747-
if status.Nodes <= *rcp.Spec.Replicas {
748-
// scaleUp ensures that we don't continue scaling up while waiting for Machines to have NodeRefs
749-
return r.scaleUpControlPlane(ctx, cluster, rcp, controlPlane)
750-
}
755+
return r.scaleDownControlPlane(ctx, cluster, rcp, controlPlane, machinesRequireUpgrade)
756+
default:
757+
err := fmt.Errorf("unknown rollout strategy type %q", rcp.Spec.RolloutStrategy.Type)
758+
logger.Error(err, "RolloutStrategy type is not set to RollingUpdateStrategyType, unable to determine the strategy for rolling out machines")
751759

752-
return r.scaleDownControlPlane(ctx, cluster, rcp, controlPlane, machinesRequireUpgrade)
760+
return ctrl.Result{}, nil
761+
}
753762
}
754763

755764
// ClusterToRKE2ControlPlane is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation

test/e2e/common.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ import (
4040
const (
4141
KubernetesVersionManagement = "KUBERNETES_VERSION_MANAGEMENT"
4242
KubernetesVersion = "KUBERNETES_VERSION"
43-
KubernetesVersionUpgradeFrom = "KUBERNETES_VERSION_UPGRADE_FROM"
4443
KubernetesVersionUpgradeTo = "KUBERNETES_VERSION_UPGRADE_TO"
4544
CPMachineTemplateUpgradeTo = "CONTROL_PLANE_MACHINE_TEMPLATE_UPGRADE_TO"
4645
WorkersMachineTemplateUpgradeTo = "WORKERS_MACHINE_TEMPLATE_UPGRADE_TO"
4746
IPFamily = "IP_FAMILY"
47+
KindImageVersion = "KIND_IMAGE_VERSION"
4848
)
4949

5050
func Byf(format string, a ...interface{}) {

test/e2e/config/e2e_conf.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,13 @@ providers:
6969
new: "--leader-elect=false"
7070

7171
variables:
72-
KUBERNETES_VERSION_MANAGEMENT: "v1.24.4"
73-
KUBERNETES_VERSION: "v1.24.4"
72+
KUBERNETES_VERSION_MANAGEMENT: "v1.28.0"
73+
KUBERNETES_VERSION: "v1.28.1"
74+
KIND_IMAGE_VERSION: "v1.28.0"
7475
NODE_DRAIN_TIMEOUT: "60s"
7576
CONFORMANCE_WORKER_MACHINE_COUNT: "2"
7677
CONFORMANCE_CONTROL_PLANE_MACHINE_COUNT: "1"
77-
KUBERNETES_VERSION_UPGRADE_TO: "v1.24.2"
78-
KUBERNETES_VERSION_UPGRADE_FROM: "v1.23.8"
78+
KUBERNETES_VERSION_UPGRADE_TO: "v1.28.2"
7979
KUBERNETES_UPGRADE_OCI_IMAGE_ID: "${KUBERNETES_UPGRADE_OCI_IMAGE_ID}"
8080
IP_FAMILY: "IPv4"
8181
EXP_CLUSTER_RESOURCE_SET: "true"

test/e2e/data/infrastructure/cluster-template-docker.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ metadata:
4848
name: "${CLUSTER_NAME}-control-plane"
4949
spec:
5050
template:
51-
spec: {}
51+
spec:
52+
customImage: kindest/node:${KIND_IMAGE_VERSION}
5253
---
5354
apiVersion: cluster.x-k8s.io/v1beta1
5455
kind: MachineDeployment
@@ -80,7 +81,8 @@ metadata:
8081
name: ${CLUSTER_NAME}-md-0
8182
spec:
8283
template:
83-
spec: {}
84+
spec:
85+
customImage: kindest/node:${KIND_IMAGE_VERSION}
8486
---
8587
apiVersion: bootstrap.cluster.x-k8s.io/v1alpha1
8688
kind: RKE2ConfigTemplate
@@ -92,4 +94,4 @@ spec:
9294
agentConfig:
9395
version: ${KUBERNETES_VERSION}+rke2r1
9496
nodeAnnotations:
95-
test: "true"
97+
test: "true"

test/e2e/e2e_suite_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
. "github.com/onsi/ginkgo/v2"
3232
. "github.com/onsi/gomega"
3333
"k8s.io/apimachinery/pkg/runtime"
34+
"k8s.io/klog/v2"
3435

3536
bootstrapv1 "github.com/rancher-sandbox/cluster-api-provider-rke2/bootstrap/api/v1alpha1"
3637
controlplanev1 "github.com/rancher-sandbox/cluster-api-provider-rke2/controlplane/api/v1alpha1"
@@ -104,6 +105,8 @@ func init() {
104105
func TestE2E(t *testing.T) {
105106
RegisterFailHandler(Fail)
106107

108+
ctrl.SetLogger(klog.Background())
109+
107110
RunSpecs(t, "caprke2-e2e")
108111
}
109112

test/e2e/e2e_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,38 @@ var _ = Describe("Workload cluster creation", func() {
148148
ControlPlane: result.ControlPlane,
149149
}, e2eConfig.GetIntervals(specName, "wait-control-plane")...)
150150

151+
By("Upgrading control plane and worker machines")
152+
ApplyClusterTemplateAndWait(ctx, ApplyClusterTemplateAndWaitInput{
153+
ClusterProxy: bootstrapClusterProxy,
154+
ConfigCluster: clusterctl.ConfigClusterInput{
155+
LogFolder: clusterctlLogFolder,
156+
ClusterctlConfigPath: clusterctlConfigPath,
157+
KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(),
158+
InfrastructureProvider: "docker",
159+
Flavor: "docker",
160+
Namespace: namespace.Name,
161+
ClusterName: clusterName,
162+
KubernetesVersion: e2eConfig.GetVariable(KubernetesVersionUpgradeTo),
163+
ControlPlaneMachineCount: pointer.Int64Ptr(3),
164+
WorkerMachineCount: pointer.Int64Ptr(3),
165+
},
166+
WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"),
167+
WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"),
168+
WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"),
169+
}, result)
170+
171+
WaitForClusterToUpgrade(ctx, WaitForClusterToUpgradeInput{
172+
Lister: bootstrapClusterProxy.GetClient(),
173+
ControlPlane: result.ControlPlane,
174+
MachineDeployments: result.MachineDeployments,
175+
VersionAfterUpgrade: e2eConfig.GetVariable(KubernetesVersionUpgradeTo),
176+
}, e2eConfig.GetIntervals(specName, "wait-control-plane")...)
177+
178+
WaitForControlPlaneToBeReady(ctx, WaitForControlPlaneToBeReadyInput{
179+
Getter: bootstrapClusterProxy.GetClient(),
180+
ControlPlane: result.ControlPlane,
181+
}, e2eConfig.GetIntervals(specName, "wait-control-plane")...)
182+
151183
// TODO: this can be uncommented when control plane scaling down is working
152184

153185
// By("Scaling control plane nodes to 1")

test/e2e/helpers.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package e2e
2020

2121
import (
2222
"context"
23+
"fmt"
2324
"time"
2425

2526
. "github.com/onsi/ginkgo/v2"
@@ -251,6 +252,43 @@ func WaitForMachineConditions(ctx context.Context, input WaitForMachineCondition
251252
}, intervals...).Should(BeTrue(), framework.PrettyPrint(input.Machine)+"\n")
252253
}
253254

255+
// WaitForClusterToUpgradeInput is the input for WaitForClusterToUpgrade.
256+
type WaitForClusterToUpgradeInput struct {
257+
Lister framework.Lister
258+
ControlPlane *controlplanev1.RKE2ControlPlane
259+
MachineDeployments []*clusterv1.MachineDeployment
260+
VersionAfterUpgrade string
261+
}
262+
263+
// WaitForClusterToUpgrade will wait for a cluster to be upgraded.
264+
func WaitForClusterToUpgrade(ctx context.Context, input WaitForClusterToUpgradeInput, intervals ...interface{}) {
265+
By("Waiting for machines to update")
266+
267+
totallMachineCount := *input.ControlPlane.Spec.Replicas
268+
for _, md := range input.MachineDeployments {
269+
totallMachineCount += *md.Spec.Replicas
270+
}
271+
272+
Eventually(func() (bool, error) {
273+
machineList := &clusterv1.MachineList{}
274+
if err := input.Lister.List(ctx, machineList); err != nil {
275+
return false, fmt.Errorf("failed to list machines: %w", err)
276+
}
277+
278+
if len(machineList.Items) != int(totallMachineCount) { // not all replicas are created
279+
return false, nil
280+
}
281+
282+
for _, machine := range machineList.Items {
283+
if machine.Spec.Version != nil && *machine.Spec.Version != input.VersionAfterUpgrade {
284+
return false, nil
285+
}
286+
}
287+
288+
return true, nil
289+
}, intervals...).Should(BeTrue(), framework.PrettyPrint(input.ControlPlane)+"\n")
290+
}
291+
254292
func setDefaults(input *ApplyClusterTemplateAndWaitInput) {
255293
if input.WaitForControlPlaneInitialized == nil {
256294
input.WaitForControlPlaneInitialized = func(ctx context.Context, input ApplyClusterTemplateAndWaitInput, result *ApplyClusterTemplateAndWaitResult) {

0 commit comments

Comments
 (0)