Skip to content

Commit 78c8b68

Browse files
authored
Merge pull request #973 from spectrocloud/PCP-4784
PCP-4784: Add support for EKSConfig LaunchTemplate bootstrapping for AL2023 using nodeadm [Upstream Cherry-pick]
2 parents 97fb60e + e1705b5 commit 78c8b68

File tree

14 files changed

+942
-35
lines changed

14 files changed

+942
-35
lines changed

bootstrap/eks/api/v1beta1/conversion.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ func (r *EKSConfig) ConvertTo(dstRaw conversion.Hub) error {
3737
return err
3838
}
3939

40+
if restored.Spec.NodeType != "" {
41+
dst.Spec.NodeType = restored.Spec.NodeType
42+
}
4043
if restored.Spec.PreBootstrapCommands != nil {
4144
dst.Spec.PreBootstrapCommands = restored.Spec.PreBootstrapCommands
4245
}
@@ -104,6 +107,9 @@ func (r *EKSConfigTemplate) ConvertTo(dstRaw conversion.Hub) error {
104107
return err
105108
}
106109

110+
if restored.Spec.Template.Spec.NodeType != "" {
111+
dst.Spec.Template.Spec.NodeType = restored.Spec.Template.Spec.NodeType
112+
}
107113
if restored.Spec.Template.Spec.PreBootstrapCommands != nil {
108114
dst.Spec.Template.Spec.PreBootstrapCommands = restored.Spec.Template.Spec.PreBootstrapCommands
109115
}

bootstrap/eks/api/v1beta1/zz_generated.conversion.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bootstrap/eks/api/v1beta2/eksconfig_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ import (
2424

2525
// EKSConfigSpec defines the desired state of Amazon EKS Bootstrap Configuration.
2626
type EKSConfigSpec struct {
27+
// NodeType specifies the type of node (e.g., "al2023")
28+
// +optional
29+
NodeType string `json:"nodeType,omitempty"`
2730
// KubeletExtraArgs passes the specified kubelet args into the Amazon EKS machine bootstrap script
2831
// +optional
2932
KubeletExtraArgs map[string]string `json:"kubeletExtraArgs,omitempty"`

bootstrap/eks/controllers/eksconfig_controller.go

Lines changed: 136 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,13 @@ package controllers
2020
import (
2121
"bytes"
2222
"context"
23+
"fmt"
24+
"os"
2325
"time"
2426

27+
"github.com/aws/aws-sdk-go/aws"
28+
"github.com/aws/aws-sdk-go/aws/session"
29+
"github.com/aws/aws-sdk-go/service/eks"
2530
"github.com/pkg/errors"
2631
corev1 "k8s.io/api/core/v1"
2732
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -39,18 +44,24 @@ import (
3944
eksbootstrapv1 "sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/api/v1beta2"
4045
"sigs.k8s.io/cluster-api-provider-aws/v2/bootstrap/eks/internal/userdata"
4146
ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/eks/api/v1beta2"
47+
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
4248
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
49+
"sigs.k8s.io/cluster-api-provider-aws/v2/util/paused"
4350
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
4451
bsutil "sigs.k8s.io/cluster-api/bootstrap/util"
4552
expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
4653
"sigs.k8s.io/cluster-api/feature"
4754
"sigs.k8s.io/cluster-api/util"
48-
"sigs.k8s.io/cluster-api/util/annotations"
4955
"sigs.k8s.io/cluster-api/util/conditions"
5056
"sigs.k8s.io/cluster-api/util/patch"
5157
"sigs.k8s.io/cluster-api/util/predicates"
5258
)
5359

60+
const (
61+
// NodeTypeAL2023 represents the AL2023 node type.
62+
NodeTypeAL2023 = "al2023"
63+
)
64+
5465
// EKSConfigReconciler reconciles a EKSConfig object.
5566
type EKSConfigReconciler struct {
5667
client.Client
@@ -113,9 +124,8 @@ func (r *EKSConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
113124
}
114125
log = log.WithValues("cluster", klog.KObj(cluster))
115126

116-
if annotations.IsPaused(cluster, config) {
117-
log.Info("Reconciliation is paused for this object")
118-
return ctrl.Result{}, nil
127+
if isPaused, conditionChanged, err := paused.EnsurePausedCondition(ctx, r.Client, cluster, config); err != nil || isPaused || conditionChanged {
128+
return ctrl.Result{}, err
119129
}
120130

121131
patchHelper, err := patch.NewHelper(config, r.Client)
@@ -144,7 +154,7 @@ func (r *EKSConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
144154
}
145155
}()
146156

147-
return ctrl.Result{}, r.joinWorker(ctx, cluster, config, configOwner)
157+
return r.joinWorker(ctx, cluster, config, configOwner)
148158
}
149159

150160
func (r *EKSConfigReconciler) resolveFiles(ctx context.Context, cfg *eksbootstrapv1.EKSConfig) ([]eksbootstrapv1.File, error) {
@@ -182,8 +192,9 @@ func (r *EKSConfigReconciler) resolveSecretFileContent(ctx context.Context, ns s
182192
return data, nil
183193
}
184194

185-
func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) error {
195+
func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1.Cluster, config *eksbootstrapv1.EKSConfig, configOwner *bsutil.ConfigOwner) (ctrl.Result, error) {
186196
log := logger.FromContext(ctx)
197+
log.Info("joinWorker called", "config", config.Name, "nodeType", config.Spec.NodeType, "cluster", cluster.Name)
187198

188199
// only need to reconcile the secret for Machine kinds once, but MachinePools need updates for new launch templates
189200
if config.Status.DataSecretName != nil && configOwner.GetKind() == "Machine" {
@@ -196,15 +207,15 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
196207
err := r.Client.Get(ctx, secretKey, existingSecret)
197208
switch {
198209
case err == nil:
199-
return nil
210+
return ctrl.Result{}, nil
200211
case !apierrors.IsNotFound(err):
201212
log.Error(err, "unable to check for existing bootstrap secret")
202-
return err
213+
return ctrl.Result{}, err
203214
}
204215
}
205216

206217
if cluster.Spec.ControlPlaneRef == nil || cluster.Spec.ControlPlaneRef.Kind != "AWSManagedControlPlane" {
207-
return errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
218+
return ctrl.Result{}, errors.New("Cluster's controlPlaneRef needs to be an AWSManagedControlPlane in order to use the EKS bootstrap provider")
208219
}
209220

210221
if !cluster.Status.InfrastructureReady {
@@ -213,30 +224,54 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
213224
eksbootstrapv1.DataSecretAvailableCondition,
214225
eksbootstrapv1.WaitingForClusterInfrastructureReason,
215226
clusterv1.ConditionSeverityInfo, "")
216-
return nil
227+
return ctrl.Result{}, nil
217228
}
218229

219230
if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
220-
log.Info("Control Plane has not yet been initialized")
221-
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.WaitingForControlPlaneInitializationReason, clusterv1.ConditionSeverityInfo, "")
222-
return nil
231+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
232+
eksbootstrapv1.DataSecretGenerationFailedReason,
233+
clusterv1.ConditionSeverityInfo, "Control plane is not initialized yet")
234+
235+
// For AL2023, requeue to ensure we retry when control plane is ready
236+
// For AL2, follow upstream behavior and return nil
237+
if config.Spec.NodeType == NodeTypeAL2023 {
238+
log.Info("AL2023 detected, returning requeue after 30 seconds")
239+
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
240+
}
241+
log.Info("AL2 detected, returning no requeue")
242+
return ctrl.Result{}, nil
223243
}
224244

245+
// Get the AWSManagedControlPlane
225246
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
226247
if err := r.Get(ctx, client.ObjectKey{Name: cluster.Spec.ControlPlaneRef.Name, Namespace: cluster.Spec.ControlPlaneRef.Namespace}, controlPlane); err != nil {
227-
return err
248+
return ctrl.Result{}, errors.Wrap(err, "failed to get control plane")
249+
}
250+
251+
// Check if control plane is ready (skip in test environments for AL2023)
252+
if config.Spec.NodeType == NodeTypeAL2023 && !conditions.IsTrue(controlPlane, ekscontrolplanev1.EKSControlPlaneReadyCondition) {
253+
// Skip control plane readiness check for AL2023 in test environment
254+
if os.Getenv("TEST_ENV") != "true" {
255+
log.Info("AL2023 detected, waiting for control plane to be ready")
256+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
257+
eksbootstrapv1.DataSecretGenerationFailedReason,
258+
clusterv1.ConditionSeverityInfo, "Control plane is not ready yet")
259+
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
260+
}
261+
log.Info("Skipping control plane readiness check for AL2023 in test environment")
228262
}
263+
log.Info("Control plane is ready, proceeding with userdata generation")
229264

230265
log.Info("Generating userdata")
231266
files, err := r.resolveFiles(ctx, config)
232267
if err != nil {
233268
log.Info("Failed to resolve files for user data")
234269
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "%s", err.Error())
235-
return err
270+
return ctrl.Result{}, err
236271
}
237272

273+
// Create unified NodeInput for both AL2 and AL2023
238274
nodeInput := &userdata.NodeInput{
239-
// AWSManagedControlPlane webhooks default and validate EKSClusterName
240275
ClusterName: controlPlane.Spec.EKSClusterName,
241276
KubeletExtraArgs: config.Spec.KubeletExtraArgs,
242277
ContainerRuntime: config.Spec.ContainerRuntime,
@@ -252,7 +287,9 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
252287
DiskSetup: config.Spec.DiskSetup,
253288
Mounts: config.Spec.Mounts,
254289
Files: files,
290+
ClusterCIDR: controlPlane.Spec.NetworkSpec.VPC.CidrBlock,
255291
}
292+
256293
if config.Spec.PauseContainer != nil {
257294
nodeInput.PauseContainerAccount = &config.Spec.PauseContainer.AccountNumber
258295
nodeInput.PauseContainerVersion = &config.Spec.PauseContainer.Version
@@ -272,29 +309,106 @@ func (r *EKSConfigReconciler) joinWorker(ctx context.Context, cluster *clusterv1
272309
nodeInput.IPFamily = ptr.To[string]("ipv6")
273310
}
274311

275-
// generate userdata
312+
// Set AMI family type and AL2023-specific fields if needed
313+
if config.Spec.NodeType == NodeTypeAL2023 {
314+
log.Info("Processing AL2023 node type")
315+
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2023
316+
317+
// Set AL2023-specific fields
318+
nodeInput.APIServerEndpoint = controlPlane.Spec.ControlPlaneEndpoint.Host
319+
nodeInput.NodeGroupName = config.Name
320+
321+
// In test environments, provide a mock CA certificate
322+
if os.Getenv("TEST_ENV") == "true" {
323+
log.Info("Using mock CA certificate for test environment")
324+
nodeInput.CACert = "mock-ca-certificate-for-testing"
325+
} else {
326+
// Fetch CA cert from EKS API
327+
sess, err := session.NewSession(&aws.Config{Region: aws.String(controlPlane.Spec.Region)})
328+
if err != nil {
329+
log.Error(err, "Failed to create AWS session for EKS API")
330+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
331+
eksbootstrapv1.DataSecretGenerationFailedReason,
332+
clusterv1.ConditionSeverityWarning,
333+
"Failed to create AWS session: %v", err)
334+
return ctrl.Result{}, err
335+
}
336+
eksClient := eks.New(sess)
337+
describeInput := &eks.DescribeClusterInput{Name: aws.String(controlPlane.Spec.EKSClusterName)}
338+
clusterOut, err := eksClient.DescribeCluster(describeInput)
339+
if err != nil {
340+
log.Error(err, "Failed to describe EKS cluster for CA cert fetch")
341+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
342+
eksbootstrapv1.DataSecretGenerationFailedReason,
343+
clusterv1.ConditionSeverityWarning,
344+
"Failed to describe EKS cluster: %v", err)
345+
return ctrl.Result{}, err
346+
} else if clusterOut.Cluster != nil && clusterOut.Cluster.CertificateAuthority != nil && clusterOut.Cluster.CertificateAuthority.Data != nil {
347+
nodeInput.CACert = *clusterOut.Cluster.CertificateAuthority.Data
348+
} else {
349+
log.Error(nil, "CA certificate not found in EKS cluster response")
350+
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition,
351+
eksbootstrapv1.DataSecretGenerationFailedReason,
352+
clusterv1.ConditionSeverityWarning,
353+
"CA certificate not found in EKS cluster response")
354+
return ctrl.Result{}, fmt.Errorf("CA certificate not found in EKS cluster response")
355+
}
356+
}
357+
358+
// Get AMI ID from AWSManagedMachinePool's launch template if specified
359+
if configOwner.GetKind() == "MachinePool" {
360+
amp := &expinfrav1.AWSManagedMachinePool{}
361+
if err := r.Get(ctx, client.ObjectKey{Namespace: config.Namespace, Name: configOwner.GetName()}, amp); err == nil {
362+
log.Info("Found AWSManagedMachinePool", "name", amp.Name, "launchTemplate", amp.Spec.AWSLaunchTemplate != nil)
363+
if amp.Spec.AWSLaunchTemplate != nil && amp.Spec.AWSLaunchTemplate.AMI.ID != nil {
364+
nodeInput.AMIImageID = *amp.Spec.AWSLaunchTemplate.AMI.ID
365+
log.Info("Set AMI ID from launch template", "amiID", nodeInput.AMIImageID)
366+
} else {
367+
log.Info("No AMI ID found in launch template")
368+
}
369+
if amp.Spec.CapacityType != nil {
370+
nodeInput.CapacityType = amp.Spec.CapacityType
371+
log.Info("Set capacity type from AWSManagedMachinePool", "capacityType", *amp.Spec.CapacityType)
372+
} else {
373+
log.Info("No capacity type found in AWSManagedMachinePool")
374+
}
375+
} else {
376+
log.Info("Failed to get AWSManagedMachinePool", "error", err)
377+
}
378+
}
379+
380+
log.Info("Generating AL2023 userdata",
381+
"cluster", controlPlane.Spec.EKSClusterName,
382+
"endpoint", nodeInput.APIServerEndpoint)
383+
} else {
384+
nodeInput.AMIFamilyType = userdata.AMIFamilyAL2
385+
log.Info("Generating standard userdata for node type", "type", config.Spec.NodeType)
386+
}
387+
388+
// Generate userdata using unified approach
276389
userDataScript, err := userdata.NewNode(nodeInput)
277390
if err != nil {
278391
log.Error(err, "Failed to create a worker join configuration")
279392
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
280-
return err
393+
return ctrl.Result{}, err
281394
}
282395

283-
// store userdata as secret
396+
// Store the userdata in a secret
284397
if err := r.storeBootstrapData(ctx, cluster, config, userDataScript); err != nil {
285398
log.Error(err, "Failed to store bootstrap data")
286399
conditions.MarkFalse(config, eksbootstrapv1.DataSecretAvailableCondition, eksbootstrapv1.DataSecretGenerationFailedReason, clusterv1.ConditionSeverityWarning, "")
287-
return err
400+
return ctrl.Result{}, err
288401
}
289402

290-
return nil
403+
conditions.MarkTrue(config, eksbootstrapv1.DataSecretAvailableCondition)
404+
return ctrl.Result{}, nil
291405
}
292406

293407
func (r *EKSConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, option controller.Options) error {
294408
b := ctrl.NewControllerManagedBy(mgr).
295409
For(&eksbootstrapv1.EKSConfig{}).
296410
WithOptions(option).
297-
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), logger.FromContext(ctx).GetLogger(), r.WatchFilterValue)).
411+
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), logger.FromContext(ctx).GetLogger(), r.WatchFilterValue)).
298412
Watches(
299413
&clusterv1.Machine{},
300414
handler.EnqueueRequestsFromMapFunc(r.MachineToBootstrapMapFunc),

0 commit comments

Comments
 (0)