@@ -19,6 +19,7 @@ package controllers
1919import (
2020 "context"
2121 "fmt"
22+ "strings"
2223 "time"
2324
2425 "github.com/blang/semver/v4"
@@ -32,6 +33,7 @@ import (
3233 kerrors "k8s.io/apimachinery/pkg/util/errors"
3334 "k8s.io/apimachinery/pkg/util/intstr"
3435 "k8s.io/client-go/tools/record"
36+ "k8s.io/klog/v2"
3537 ctrl "sigs.k8s.io/controller-runtime"
3638 "sigs.k8s.io/controller-runtime/pkg/client"
3739 "sigs.k8s.io/controller-runtime/pkg/controller"
@@ -514,6 +516,10 @@ func (r *RKE2ControlPlaneReconciler) reconcileNormal(
514516 return ctrl.Result {}, err
515517 }
516518
519+ if result , err := r .reconcilePreTerminateHook (ctx , controlPlane ); err != nil || ! result .IsZero () {
520+ return result , err
521+ }
522+
517523 // Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
518524 needRollout := controlPlane .MachinesNeedingRollout ()
519525
@@ -698,14 +704,31 @@ func (r *RKE2ControlPlaneReconciler) reconcileDelete(ctx context.Context,
698704 }
699705
700706 // Delete control plane machines in parallel
701- machinesToDelete := ownedMachines . Filter ( collections . Not ( collections . HasDeletionTimestamp ))
707+ machinesToDelete := ownedMachines
702708
703709 var errs []error
704710
705711 for i := range machinesToDelete {
706712 m := machinesToDelete [i ]
707713 logger := logger .WithValues ("machine" , m )
708714
715+ // During RKE2CP deletion we don't care about forwarding etcd leadership or removing etcd members.
716+ // So we are removing the pre-terminate hook.
717+ // This is important because when deleting RKE2CP we will delete all members of etcd and it's not possible
718+ // to forward etcd leadership without any member left after we went through the Machine deletion.
719+ // Also in this case the reconcileDelete code of the Machine controller won't execute Node drain
720+ // and wait for volume detach.
721+ if err := r .removePreTerminateHookAnnotationFromMachine (ctx , m ); err != nil {
722+ errs = append (errs , err )
723+
724+ continue
725+ }
726+
727+ if ! m .DeletionTimestamp .IsZero () {
728+ // Nothing to do, Machine already has deletionTimestamp set.
729+ continue
730+ }
731+
709732 if err := r .Client .Delete (ctx , machinesToDelete [i ]); err != nil && ! apierrors .IsNotFound (err ) {
710733 logger .Error (err , "Failed to cleanup owned machine" )
711734 errs = append (errs , err )
@@ -720,6 +743,8 @@ func (r *RKE2ControlPlaneReconciler) reconcileDelete(ctx context.Context,
720743 return ctrl.Result {}, err
721744 }
722745
746+ logger .Info ("Waiting for control plane Machines to not exist anymore" )
747+
723748 conditions .MarkFalse (rcp , controlplanev1 .ResizedCondition , clusterv1 .DeletingReason , clusterv1 .ConditionSeverityInfo , "" )
724749
725750 return ctrl.Result {RequeueAfter : deleteRequeueAfter }, nil
@@ -909,6 +934,108 @@ func (r *RKE2ControlPlaneReconciler) ClusterToRKE2ControlPlane(ctx context.Conte
909934 }
910935}
911936
937+ func (r * RKE2ControlPlaneReconciler ) reconcilePreTerminateHook (ctx context.Context , controlPlane * rke2.ControlPlane ) (ctrl.Result , error ) {
938+ // Ensure that every active machine has the drain hook set
939+ patchHookAnnotation := false
940+
941+ for _ , machine := range controlPlane .Machines .Filter (collections .ActiveMachines ) {
942+ if _ , exists := machine .Annotations [controlplanev1 .PreTerminateHookCleanupAnnotation ]; ! exists {
943+ machine .Annotations [controlplanev1 .PreTerminateHookCleanupAnnotation ] = ""
944+ patchHookAnnotation = true
945+ }
946+ }
947+
948+ if patchHookAnnotation {
949+ // Patch machine annoations
950+ if err := controlPlane .PatchMachines (ctx ); err != nil {
951+ return ctrl.Result {}, err
952+ }
953+ }
954+
955+ if ! controlPlane .HasDeletingMachine () {
956+ return ctrl.Result {}, nil
957+ }
958+
959+ log := ctrl .LoggerFrom (ctx )
960+
961+ // Return early, if there is already a deleting Machine without the pre-terminate hook.
962+ // We are going to wait until this Machine goes away before running the pre-terminate hook on other Machines.
963+ for _ , deletingMachine := range controlPlane .DeletingMachines () {
964+ if _ , exists := deletingMachine .Annotations [controlplanev1 .PreTerminateHookCleanupAnnotation ]; ! exists {
965+ return ctrl.Result {RequeueAfter : deleteRequeueAfter }, nil
966+ }
967+ }
968+
969+ // Pick the Machine with the oldest deletionTimestamp to keep this function deterministic / reentrant
970+ // so we only remove the pre-terminate hook from one Machine at a time.
971+ deletingMachines := controlPlane .DeletingMachines ()
972+ deletingMachine := controlPlane .SortedByDeletionTimestamp (deletingMachines )[0 ]
973+
974+ log = log .WithValues ("Machine" , klog .KObj (deletingMachine ))
975+ ctx = ctrl .LoggerInto (ctx , log )
976+
977+ // Return early if there are other pre-terminate hooks for the Machine.
978+ // The CAPRKE2 pre-terminate hook should be the one executed last, so that kubelet
979+ // is still working while other pre-terminate hooks are run.
980+ if machineHasOtherPreTerminateHooks (deletingMachine ) {
981+ return ctrl.Result {RequeueAfter : deleteRequeueAfter }, nil
982+ }
983+
984+ // Return early because the Machine controller is not yet waiting for the pre-terminate hook.
985+ c := conditions .Get (deletingMachine , clusterv1 .PreTerminateDeleteHookSucceededCondition )
986+ if c == nil || c .Status != corev1 .ConditionFalse || c .Reason != clusterv1 .WaitingExternalHookReason {
987+ return ctrl.Result {RequeueAfter : deleteRequeueAfter }, nil
988+ }
989+
990+ // The following will execute and remove the pre-terminate hook from the Machine.
991+
992+ // If we have more than 1 Machine and etcd is managed we forward etcd leadership and remove the member
993+ // to keep the etcd cluster healthy.
994+ if controlPlane .Machines .Len () > 1 {
995+ workloadCluster , err := r .GetWorkloadCluster (ctx , controlPlane )
996+ if err != nil {
997+ return ctrl.Result {}, errors .Wrapf (err ,
998+ "failed to remove etcd member for deleting Machine %s: failed to create client to workload cluster" , klog .KObj (deletingMachine ))
999+ }
1000+
1001+ // Note: In regular deletion cases (remediation, scale down) the leader should have been already moved.
1002+ // We're doing this again here in case the Machine became leader again or the Machine deletion was
1003+ // triggered in another way (e.g. a user running kubectl delete machine)
1004+ etcdLeaderCandidate := controlPlane .Machines .Filter (collections .Not (collections .HasDeletionTimestamp )).Newest ()
1005+ if etcdLeaderCandidate != nil {
1006+ if err := workloadCluster .ForwardEtcdLeadership (ctx , deletingMachine , etcdLeaderCandidate ); err != nil {
1007+ return ctrl.Result {}, errors .Wrapf (err , "failed to move leadership to candidate Machine %s" , etcdLeaderCandidate .Name )
1008+ }
1009+ } else {
1010+ log .Info ("Skip forwarding etcd leadership, because there is no other control plane Machine without a deletionTimestamp" )
1011+ }
1012+
1013+ // Note: Removing the etcd member will lead to the etcd and the kube-apiserver Pod on the Machine shutting down.
1014+ if err := workloadCluster .RemoveEtcdMemberForMachine (ctx , deletingMachine ); err != nil {
1015+ return ctrl.Result {}, errors .Wrapf (err , "failed to remove etcd member for deleting Machine %s" , klog .KObj (deletingMachine ))
1016+ }
1017+ }
1018+
1019+ if err := r .removePreTerminateHookAnnotationFromMachine (ctx , deletingMachine ); err != nil {
1020+ return ctrl.Result {}, err
1021+ }
1022+
1023+ log .Info ("Waiting for Machines to be deleted" , "machines" ,
1024+ strings .Join (controlPlane .Machines .Filter (collections .HasDeletionTimestamp ).Names (), ", " ))
1025+
1026+ return ctrl.Result {RequeueAfter : deleteRequeueAfter }, nil
1027+ }
1028+
1029+ func machineHasOtherPreTerminateHooks (machine * clusterv1.Machine ) bool {
1030+ for k := range machine .Annotations {
1031+ if strings .HasPrefix (k , clusterv1 .PreTerminateDeleteHookAnnotationPrefix ) && k != controlplanev1 .PreTerminateHookCleanupAnnotation {
1032+ return true
1033+ }
1034+ }
1035+
1036+ return false
1037+ }
1038+
9121039// getWorkloadCluster gets a cluster object.
9131040// The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine.
9141041func (r * RKE2ControlPlaneReconciler ) getWorkloadCluster (ctx context.Context , clusterKey types.NamespacedName ) (rke2.WorkloadCluster , error ) {
0 commit comments