Skip to content

Commit 7cac3a3

Browse files
Allow the NTHManagedAsg tag to be configurable (#287)
* Make the tag to check for on a managed ASG configurable * Fix the arguments print that occurs at program boot * Fix broken tests from missing constant * ManagedAsgTag needs a definition * Add managedAsgTag support to the Helm chart
1 parent 592a428 commit 7cac3a3

File tree

8 files changed

+25
-7
lines changed

8 files changed

+25
-7
lines changed

cmd/node-termination-handler.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ func main() {
150150

151151
sqsMonitor := sqsevent.SQSMonitor{
152152
CheckIfManaged: nthConfig.CheckASGTagBeforeDraining,
153+
ManagedAsgTag: nthConfig.ManagedAsgTag,
153154
QueueURL: nthConfig.QueueURL,
154155
InterruptionChan: interruptionChan,
155156
CancelChan: cancelChan,

config/helm/aws-node-termination-handler/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ Parameter | Description | Default
8686
`enableSqsTerminationDraining` | If true, this turns on queue-processor mode which drains nodes when an SQS termination event is received| `false`
8787
`queueURL` | Listens for messages on the specified SQS queue URL | None
8888
`awsRegion` | If specified, use the AWS region for AWS API calls, else NTH will try to find the region through AWS_REGION env var, IMDS, or the specified queue URL | ``
89+
`checkASGTagBeforeDraining` | If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node | `true`
90+
`managedAsgTag` | The tag to ensure is on a node if checkASGTagBeforeDraining is true | `aws-node-termination-handler/managed`
8991

9092
### AWS Node Termination Handler - IMDS Mode Configuration
9193

config/helm/aws-node-termination-handler/templates/deployment.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ spec:
140140
{{- end }}
141141
- name: CHECK_ASG_TAG_BEFORE_DRAINING
142142
value: {{ .Values.checkASGTagBeforeDraining | quote }}
143+
- name: MANAGED_ASG_TAG
144+
value: {{ .Values.managedAsgTag | quote }}
143145
resources:
144146
{{- toYaml .Values.resources | nindent 12 }}
145147
{{- if .Values.enablePrometheusServer }}

config/helm/aws-node-termination-handler/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ queueURL: ""
4545
# checkASGTagBeforeDraining If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node
4646
checkASGTagBeforeDraining: true
4747

48+
# managedAsgTag The tag to ensure is on a node if checkASGTagBeforeDraining is true
49+
managedAsgTag: "aws-node-termination-handler/managed"
50+
4851
# awsRegion If specified, use the AWS region for AWS API calls
4952
awsRegion: ""
5053

pkg/config/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ const (
5858
enableRebalanceMonitoringDefault = false
5959
checkASGTagBeforeDrainingConfigKey = "CHECK_ASG_TAG_BEFORE_DRAINING"
6060
checkASGTagBeforeDrainingDefault = true
61+
managedAsgTagConfigKey = "MANAGED_ASG_TAG"
62+
managedAsgTagDefault = "aws-node-termination-handler/managed"
6163
metadataTriesConfigKey = "METADATA_TRIES"
6264
metadataTriesDefault = 3
6365
cordonOnly = "CORDON_ONLY"
@@ -102,6 +104,7 @@ type Config struct {
102104
EnableSQSTerminationDraining bool
103105
EnableRebalanceMonitoring bool
104106
CheckASGTagBeforeDraining bool
107+
ManagedAsgTag string
105108
MetadataTries int
106109
CordonOnly bool
107110
TaintNode bool
@@ -147,6 +150,7 @@ func ParseCliArgs() (config Config, err error) {
147150
flag.BoolVar(&config.EnableSQSTerminationDraining, "enable-sqs-termination-draining", getBoolEnv(enableSQSTerminationDrainingConfigKey, enableSQSTerminationDrainingDefault), "If true, drain nodes when an SQS termination event is received")
148151
flag.BoolVar(&config.EnableRebalanceMonitoring, "enable-rebalance-monitoring", getBoolEnv(enableRebalanceMonitoringConfigKey, enableRebalanceMonitoringDefault), "If true, cordon nodes when the rebalance recommendation notice is received")
149152
flag.BoolVar(&config.CheckASGTagBeforeDraining, "check-asg-tag-before-draining", getBoolEnv(checkASGTagBeforeDrainingConfigKey, checkASGTagBeforeDrainingDefault), "If true, check that the instance is tagged with \"aws-node-termination-handler/managed\" as the key before draining the node")
153+
flag.StringVar(&config.ManagedAsgTag, "managed-asg-tag", getEnv(managedAsgTagConfigKey, managedAsgTagDefault), "Sets the tag to check for on instances that is propogated from the ASG before taking action, default to aws-node-termination-handler/managed")
150154
flag.IntVar(&config.MetadataTries, "metadata-tries", getIntEnv(metadataTriesConfigKey, metadataTriesDefault), "The number of times to try requesting metadata. If you would like 2 retries, set metadata-tries to 3.")
151155
flag.BoolVar(&config.CordonOnly, "cordon-only", getBoolEnv(cordonOnly, false), "If true, nodes will be cordoned but not drained when an interruption event occurs.")
152156
flag.BoolVar(&config.TaintNode, "taint-node", getBoolEnv(taintNode, false), "If true, nodes will be tainted when an interruption event occurs.")
@@ -245,6 +249,7 @@ func (c Config) PrintJsonConfigArgs() {
245249
Str("aws_endpoint", c.AWSEndpoint).
246250
Str("queue_url", c.QueueURL).
247251
Bool("check_asg_tag_before_draining", c.CheckASGTagBeforeDraining).
252+
Str("ManagedAsgTag", c.ManagedAsgTag).
248253
Msg("aws-node-termination-handler arguments")
249254
}
250255

@@ -285,6 +290,7 @@ func (c Config) PrintHumanConfigArgs() {
285290
"\taws-region: %s,\n"+
286291
"\tqueue-url: %s,\n"+
287292
"\tcheck-asg-tag-before-draining: %t,\n"+
293+
"\tmanaged-asg-tag: %s,\n"+
288294
"\taws-endpoint: %s,\n",
289295
c.DryRun,
290296
c.NodeName,
@@ -314,6 +320,7 @@ func (c Config) PrintHumanConfigArgs() {
314320
c.AWSRegion,
315321
c.QueueURL,
316322
c.CheckASGTagBeforeDraining,
323+
c.ManagedAsgTag,
317324
c.AWSEndpoint,
318325
)
319326
}

pkg/monitor/sqsevent/sqs-monitor.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ import (
3131
const (
3232
// SQSTerminateKind is a const to define an SQS termination kind of interruption event
3333
SQSTerminateKind = "SQS_TERMINATE"
34-
// NTHManagedASG is the ASG tag key to determine if NTH is managing the ASG
35-
NTHManagedASG = "aws-node-termination-handler/managed"
3634
)
3735

3836
// SQSMonitor is a struct definition that knows how to process events from Amazon EventBridge
@@ -44,6 +42,7 @@ type SQSMonitor struct {
4442
ASG autoscalingiface.AutoScalingAPI
4543
EC2 ec2iface.EC2API
4644
CheckIfManaged bool
45+
ManagedAsgTag string
4746
}
4847

4948
// Kind denotes the kind of event that is processed
@@ -207,7 +206,7 @@ func (m SQSMonitor) isInstanceManaged(instanceID string) (bool, error) {
207206
isManaged := false
208207
err = m.ASG.DescribeTagsPages(&asgDescribeTagsInput, func(resp *autoscaling.DescribeTagsOutput, next bool) bool {
209208
for _, tag := range resp.Tags {
210-
if *tag.Key == NTHManagedASG {
209+
if *tag.Key == m.ManagedAsgTag {
211210
isManaged = true
212211
// breaks paging loop
213212
return false
@@ -220,7 +219,7 @@ func (m SQSMonitor) isInstanceManaged(instanceID string) (bool, error) {
220219
if !isManaged {
221220
log.Debug().
222221
Str("instance_id", instanceID).
223-
Msgf("The instance's Auto Scaling Group is not tagged as managed with tag key: %s", NTHManagedASG)
222+
Msgf("The instance's Auto Scaling Group is not tagged as managed with tag key: %s", m.ManagedAsgTag)
224223
}
225224
return isManaged, err
226225
}

pkg/monitor/sqsevent/sqs-monitor_internal_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,15 @@ func TestIsInstanceManaged(t *testing.T) {
4848
},
4949
DescribeTagsPagesResp: autoscaling.DescribeTagsOutput{
5050
Tags: []*autoscaling.TagDescription{
51-
{Key: aws.String(NTHManagedASG)},
51+
{Key: aws.String("aws-node-termination-handler/managed")},
5252
},
5353
},
5454
}
55-
monitor := SQSMonitor{ASG: asgMock}
55+
monitor := SQSMonitor{
56+
ASG: asgMock,
57+
CheckIfManaged: true,
58+
ManagedAsgTag: "aws-node-termination-handler/managed",
59+
}
5660
isManaged, err := monitor.isInstanceManaged("i-0123456789")
5761
h.Ok(t, err)
5862
h.Equals(t, true, isManaged)

pkg/monitor/sqsevent/sqs-monitor_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ func mockIsManagedTrue(asg *h.MockedASG) h.MockedASG {
539539
}
540540
asg.DescribeTagsPagesResp = autoscaling.DescribeTagsOutput{
541541
Tags: []*autoscaling.TagDescription{
542-
{Key: aws.String(sqsevent.NTHManagedASG)},
542+
{Key: aws.String("aws-node-termination-handler/managed")},
543543
},
544544
}
545545
return *asg

0 commit comments

Comments
 (0)