@@ -20,8 +20,13 @@ package controllers
2020import (
2121 "bytes"
2222 "context"
23+ "fmt"
24+ "os"
2325 "time"
2426
27+ "github.com/aws/aws-sdk-go/aws"
28+ "github.com/aws/aws-sdk-go/aws/session"
29+ "github.com/aws/aws-sdk-go/service/eks"
2530 "github.com/pkg/errors"
2631 corev1 "k8s.io/api/core/v1"
2732 apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -39,18 +44,24 @@ import (
3944 eksbootstrapv1 "sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/api/v1beta2"
4045 "sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/internal/userdata"
4146 ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2"
47+ expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
4248 "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
49+ "sigs.k8s.io/cluster-api-provider-aws/v2/util/paused"
4350 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
4451 bsutil "sigs.k8s.io/cluster-api/bootstrap/util"
4552 expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
4653 "sigs.k8s.io/cluster-api/feature"
4754 "sigs.k8s.io/cluster-api/util"
48- "sigs.k8s.io/cluster-api/util/annotations"
4955 "sigs.k8s.io/cluster-api/util/conditions"
5056 "sigs.k8s.io/cluster-api/util/patch"
5157 "sigs.k8s.io/cluster-api/util/predicates"
5258)
5359
60+ const (
61+ // NodeTypeAL2023 represents the AL2023 node type.
62+ NodeTypeAL2023 = "al2023"
63+ )
64+
5465// EKSConfigReconciler reconciles a EKSConfig object.
5566type EKSConfigReconciler struct {
5667 client.Client
@@ -113,9 +124,8 @@ func (r *EKSConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
113124 }
114125 log = log .WithValues ("cluster" , klog .KObj (cluster ))
115126
116- if annotations .IsPaused (cluster , config ) {
117- log .Info ("Reconciliation is paused for this object" )
118- return ctrl.Result {}, nil
127+ if isPaused , conditionChanged , err := paused .EnsurePausedCondition (ctx , r .Client , cluster , config ); err != nil || isPaused || conditionChanged {
128+ return ctrl.Result {}, err
119129 }
120130
121131 patchHelper , err := patch .NewHelper (config , r .Client )
@@ -144,7 +154,7 @@ func (r *EKSConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
144154 }
145155 }()
146156
147- return ctrl. Result {}, r .joinWorker (ctx , cluster , config , configOwner )
157+ return r .joinWorker (ctx , cluster , config , configOwner )
148158}
149159
150160func (r * EKSConfigReconciler ) resolveFiles (ctx context.Context , cfg * eksbootstrapv1.EKSConfig ) ([]eksbootstrapv1.File , error ) {
@@ -182,8 +192,9 @@ func (r *EKSConfigReconciler) resolveSecretFileContent(ctx context.Context, ns s
182192 return data , nil
183193}
184194
185- func (r * EKSConfigReconciler ) joinWorker (ctx context.Context , cluster * clusterv1.Cluster , config * eksbootstrapv1.EKSConfig , configOwner * bsutil.ConfigOwner ) error {
195+ func (r * EKSConfigReconciler ) joinWorker (ctx context.Context , cluster * clusterv1.Cluster , config * eksbootstrapv1.EKSConfig , configOwner * bsutil.ConfigOwner ) (ctrl. Result , error ) {
186196 log := logger .FromContext (ctx )
197+ log .Info ("joinWorker called" , "config" , config .Name , "nodeType" , config .Spec .NodeType , "cluster" , cluster .Name )
187198
188199 // only need to reconcile the secret for Machine kinds once, but MachinePools need updates for new launch templates
189200 if config .Status .DataSecretName != nil && configOwner .GetKind () == "Machine" {
@@ -196,15 +207,15 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
196207 err := r .Client .Get (ctx , secretKey , existingSecret )
197208 switch {
198209 case err == nil :
199- return nil
210+ return ctrl. Result {}, nil
200211 case ! apierrors .IsNotFound (err ):
201212 log .Error (err , "unable to check for existing bootstrap secret" )
202- return err
213+ return ctrl. Result {}, err
203214 }
204215 }
205216
206217 if cluster .Spec .ControlPlaneRef == nil || cluster .Spec .ControlPlaneRef .Kind != "AWSManagedControlPlane" {
207- return errors .New ("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider" )
218+ return ctrl. Result {}, errors .New ("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider" )
208219 }
209220
210221 if ! cluster .Status .InfrastructureReady {
@@ -213,30 +224,54 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
213224 eksbootstrapv1 .DataSecretAvailableCondition ,
214225 eksbootstrapv1 .WaitingForClusterInfrastructureReason ,
215226 clusterv1 .ConditionSeverityInfo , "" )
216- return nil
227+ return ctrl. Result {}, nil
217228 }
218229
219230 if ! conditions .IsTrue (cluster , clusterv1 .ControlPlaneInitializedCondition ) {
220- log .Info ("Control Plane has not yet been initialized" )
221- conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition , eksbootstrapv1 .WaitingForControlPlaneInitializationReason , clusterv1 .ConditionSeverityInfo , "" )
222- return nil
231+ conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition ,
232+ eksbootstrapv1 .DataSecretGenerationFailedReason ,
233+ clusterv1 .ConditionSeverityInfo , "Control plane is not initialized yet" )
234+
235+ // For AL2023, requeue to ensure we retry when control plane is ready
236+ // For AL2, follow upstream behavior and return nil
237+ if config .Spec .NodeType == NodeTypeAL2023 {
238+ log .Info ("AL2023 detected, returning requeue after 30 seconds" )
239+ return ctrl.Result {RequeueAfter : 30 * time .Second }, nil
240+ }
241+ log .Info ("AL2 detected, returning no requeue" )
242+ return ctrl.Result {}, nil
223243 }
224244
245+ // Get the AWSManagedControlPlane
225246 controlPlane := & ekscontrolplanev1.AWSManagedControlPlane {}
226247 if err := r .Get (ctx , client.ObjectKey {Name : cluster .Spec .ControlPlaneRef .Name , Namespace : cluster .Spec .ControlPlaneRef .Namespace }, controlPlane ); err != nil {
227- return err
248+ return ctrl.Result {}, errors .Wrap (err , "failed to get control plane" )
249+ }
250+
251+ // Check if control plane is ready (skip in test environments for AL2023)
252+ if config .Spec .NodeType == NodeTypeAL2023 && ! conditions .IsTrue (controlPlane , ekscontrolplanev1 .EKSControlPlaneReadyCondition ) {
253+ // Skip control plane readiness check for AL2023 in test environment
254+ if os .Getenv ("TEST_ENV" ) != "true" {
255+ log .Info ("AL2023 detected, waiting for control plane to be ready" )
256+ conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition ,
257+ eksbootstrapv1 .DataSecretGenerationFailedReason ,
258+ clusterv1 .ConditionSeverityInfo , "Control plane is not ready yet" )
259+ return ctrl.Result {RequeueAfter : 30 * time .Second }, nil
260+ }
261+ log .Info ("Skipping control plane readiness check for AL2023 in test environment" )
228262 }
263+ log .Info ("Control plane is ready, proceeding with userdata generation" )
229264
230265 log .Info ("Generating userdata" )
231266 files , err := r .resolveFiles (ctx , config )
232267 if err != nil {
233268 log .Info ("Failed to resolve files for user data" )
234269 conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition , eksbootstrapv1 .DataSecretGenerationFailedReason , clusterv1 .ConditionSeverityWarning , "%s" , err .Error ())
235- return err
270+ return ctrl. Result {}, err
236271 }
237272
273+ // Create unified NodeInput for both AL2 and AL2023
238274 nodeInput := & userdata.NodeInput {
239- // AWSManagedControlPlane webhooks default and validate EKSClusterName
240275 ClusterName : controlPlane .Spec .EKSClusterName ,
241276 KubeletExtraArgs : config .Spec .KubeletExtraArgs ,
242277 ContainerRuntime : config .Spec .ContainerRuntime ,
@@ -252,7 +287,9 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
252287 DiskSetup : config .Spec .DiskSetup ,
253288 Mounts : config .Spec .Mounts ,
254289 Files : files ,
290+ ClusterCIDR : controlPlane .Spec .NetworkSpec .VPC .CidrBlock ,
255291 }
292+
256293 if config .Spec .PauseContainer != nil {
257294 nodeInput .PauseContainerAccount = & config .Spec .PauseContainer .AccountNumber
258295 nodeInput .PauseContainerVersion = & config .Spec .PauseContainer .Version
@@ -272,29 +309,106 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
272309 nodeInput .IPFamily = ptr.To [string ]("ipv6" )
273310 }
274311
275- // generate userdata
312+ // Set AMI family type and AL2023-specific fields if needed
313+ if config .Spec .NodeType == NodeTypeAL2023 {
314+ log .Info ("Processing AL2023 node type" )
315+ nodeInput .AMIFamilyType = userdata .AMIFamilyAL2023
316+
317+ // Set AL2023-specific fields
318+ nodeInput .APIServerEndpoint = controlPlane .Spec .ControlPlaneEndpoint .Host
319+ nodeInput .NodeGroupName = config .Name
320+
321+ // In test environments, provide a mock CA certificate
322+ if os .Getenv ("TEST_ENV" ) == "true" {
323+ log .Info ("Using mock CA certificate for test environment" )
324+ nodeInput .CACert = "mock-ca-certificate-for-testing"
325+ } else {
326+ // Fetch CA cert from EKS API
327+ sess , err := session .NewSession (& aws.Config {Region : aws .String (controlPlane .Spec .Region )})
328+ if err != nil {
329+ log .Error (err , "Failed to create AWS session for EKS API" )
330+ conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition ,
331+ eksbootstrapv1 .DataSecretGenerationFailedReason ,
332+ clusterv1 .ConditionSeverityWarning ,
333+ "Failed to create AWS session: %v" , err )
334+ return ctrl.Result {}, err
335+ }
336+ eksClient := eks .New (sess )
337+ describeInput := & eks.DescribeClusterInput {Name : aws .String (controlPlane .Spec .EKSClusterName )}
338+ clusterOut , err := eksClient .DescribeCluster (describeInput )
339+ if err != nil {
340+ log .Error (err , "Failed to describe EKS cluster for CA cert fetch" )
341+ conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition ,
342+ eksbootstrapv1 .DataSecretGenerationFailedReason ,
343+ clusterv1 .ConditionSeverityWarning ,
344+ "Failed to describe EKS cluster: %v" , err )
345+ return ctrl.Result {}, err
346+ } else if clusterOut .Cluster != nil && clusterOut .Cluster .CertificateAuthority != nil && clusterOut .Cluster .CertificateAuthority .Data != nil {
347+ nodeInput .CACert = * clusterOut .Cluster .CertificateAuthority .Data
348+ } else {
349+ log .Error (nil , "CA certificate not found in EKS cluster response" )
350+ conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition ,
351+ eksbootstrapv1 .DataSecretGenerationFailedReason ,
352+ clusterv1 .ConditionSeverityWarning ,
353+ "CA certificate not found in EKS cluster response" )
354+ return ctrl.Result {}, fmt .Errorf ("CA certificate not found in EKS cluster response" )
355+ }
356+ }
357+
358+ // Get AMI ID from AWSManagedMachinePool's launch template if specified
359+ if configOwner .GetKind () == "MachinePool" {
360+ amp := & expinfrav1.AWSManagedMachinePool {}
361+ if err := r .Get (ctx , client.ObjectKey {Namespace : config .Namespace , Name : configOwner .GetName ()}, amp ); err == nil {
362+ log .Info ("Found AWSManagedMachinePool" , "name" , amp .Name , "launchTemplate" , amp .Spec .AWSLaunchTemplate != nil )
363+ if amp .Spec .AWSLaunchTemplate != nil && amp .Spec .AWSLaunchTemplate .AMI .ID != nil {
364+ nodeInput .AMIImageID = * amp .Spec .AWSLaunchTemplate .AMI .ID
365+ log .Info ("Set AMI ID from launch template" , "amiID" , nodeInput .AMIImageID )
366+ } else {
367+ log .Info ("No AMI ID found in launch template" )
368+ }
369+ if amp .Spec .CapacityType != nil {
370+ nodeInput .CapacityType = amp .Spec .CapacityType
371+ log .Info ("Set capacity type from AWSManagedMachinePool" , "capacityType" , * amp .Spec .CapacityType )
372+ } else {
373+ log .Info ("No capacity type found in AWSManagedMachinePool" )
374+ }
375+ } else {
376+ log .Info ("Failed to get AWSManagedMachinePool" , "error" , err )
377+ }
378+ }
379+
380+ log .Info ("Generating AL2023 userdata" ,
381+ "cluster" , controlPlane .Spec .EKSClusterName ,
382+ "endpoint" , nodeInput .APIServerEndpoint )
383+ } else {
384+ nodeInput .AMIFamilyType = userdata .AMIFamilyAL2
385+ log .Info ("Generating standard userdata for node type" , "type" , config .Spec .NodeType )
386+ }
387+
388+ // Generate userdata using unified approach
276389 userDataScript , err := userdata .NewNode (nodeInput )
277390 if err != nil {
278391 log .Error (err , "Failed to create a worker join configuration" )
279392 conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition , eksbootstrapv1 .DataSecretGenerationFailedReason , clusterv1 .ConditionSeverityWarning , "" )
280- return err
393+ return ctrl. Result {}, err
281394 }
282395
283- // store userdata as secret
396+ // Store the userdata in a secret
284397 if err := r .storeBootstrapData (ctx , cluster , config , userDataScript ); err != nil {
285398 log .Error (err , "Failed to store bootstrap data" )
286399 conditions .MarkFalse (config , eksbootstrapv1 .DataSecretAvailableCondition , eksbootstrapv1 .DataSecretGenerationFailedReason , clusterv1 .ConditionSeverityWarning , "" )
287- return err
400+ return ctrl. Result {}, err
288401 }
289402
290- return nil
403+ conditions .MarkTrue (config , eksbootstrapv1 .DataSecretAvailableCondition )
404+ return ctrl.Result {}, nil
291405}
292406
293407func (r * EKSConfigReconciler ) SetupWithManager (ctx context.Context , mgr ctrl.Manager , option controller.Options ) error {
294408 b := ctrl .NewControllerManagedBy (mgr ).
295409 For (& eksbootstrapv1.EKSConfig {}).
296410 WithOptions (option ).
297- WithEventFilter (predicates .ResourceNotPausedAndHasFilterLabel (mgr .GetScheme (), logger .FromContext (ctx ).GetLogger (), r .WatchFilterValue )).
411+ WithEventFilter (predicates .ResourceHasFilterLabel (mgr .GetScheme (), logger .FromContext (ctx ).GetLogger (), r .WatchFilterValue )).
298412 Watches (
299413 & clusterv1.Machine {},
300414 handler .EnqueueRequestsFromMapFunc (r .MachineToBootstrapMapFunc ),
0 commit comments