@@ -56,35 +56,48 @@ func (m SQSMonitor) Kind() string {
56
56
57
57
// Monitor continuously monitors SQS for events and sends interruption events to the passed in channel
58
58
func (m SQSMonitor ) Monitor () error {
59
- interruptionEvent , err := m .checkForSQSMessage ()
59
+ log .Debug ().Msg ("Checking for queue messages" )
60
+ messages , err := m .receiveQueueMessages (m .QueueURL )
60
61
if err != nil {
61
- if errors .Is (err , ErrNodeStateNotRunning ) {
62
- log .Warn ().Err (err ).Msg ("dropping event for an already terminated node" )
63
- return nil
64
- }
65
62
return err
66
63
}
67
- if interruptionEvent != nil && interruptionEvent .Kind == SQSTerminateKind {
68
- log .Debug ().Msgf ("Sending %s interruption event to the interruption channel" , SQSTerminateKind )
69
- m .InterruptionChan <- * interruptionEvent
70
- }
71
- return nil
72
- }
73
64
74
- // checkForSpotInterruptionNotice checks sqs for new messages and returns interruption events
75
- func (m SQSMonitor ) checkForSQSMessage () (* monitor.InterruptionEvent , error ) {
65
+ failedEvents := 0
66
+ for _ , message := range messages {
67
+ interruptionEvent , err := m .processSQSMessage (message )
68
+ switch {
69
+ case errors .Is (err , ErrNodeStateNotRunning ):
70
+ // If the node is no longer running, just log and delete the message. If message deletion fails, count it as an error.
71
+ log .Warn ().Err (err ).Msg ("dropping event for an already terminated node" )
72
+ errs := m .deleteMessages ([]* sqs.Message {message })
73
+ if len (errs ) > 0 {
74
+ log .Warn ().Err (errs [0 ]).Msg ("error deleting event for already terminated node" )
75
+ failedEvents ++
76
+ }
76
77
77
- log .Debug ().Msg ("Checking for queue messages" )
78
- messages , err := m .receiveQueueMessages (m .QueueURL )
79
- if err != nil {
80
- return nil , err
78
+ case err != nil :
79
+ // Log errors and record as failed events
80
+ log .Warn ().Err (err ).Msg ("ignoring event due to error" )
81
+ failedEvents ++
82
+
83
+ case err == nil && interruptionEvent != nil && interruptionEvent .Kind == SQSTerminateKind :
84
+ // Successfully processed SQS message into a SQSTerminateKind interruption event
85
+ log .Debug ().Msgf ("Sending %s interruption event to the interruption channel" , SQSTerminateKind )
86
+ m .InterruptionChan <- * interruptionEvent
87
+ }
81
88
}
82
- if len (messages ) == 0 {
83
- return nil , nil
89
+
90
+ if len (messages ) > 0 && failedEvents == len (messages ) {
91
+ return fmt .Errorf ("All of the waiting queue events could not be processed" )
84
92
}
85
93
94
+ return nil
95
+ }
96
+
97
+ // processSQSMessage checks sqs for new messages and returns interruption events
98
+ func (m SQSMonitor ) processSQSMessage (message * sqs.Message ) (* monitor.InterruptionEvent , error ) {
86
99
event := EventBridgeEvent {}
87
- err = json .Unmarshal ([]byte (* messages [ 0 ] .Body ), & event )
100
+ err : = json .Unmarshal ([]byte (* message .Body ), & event )
88
101
if err != nil {
89
102
return nil , err
90
103
}
@@ -93,17 +106,17 @@ func (m SQSMonitor) checkForSQSMessage() (*monitor.InterruptionEvent, error) {
93
106
94
107
switch event .Source {
95
108
case "aws.autoscaling" :
96
- interruptionEvent , err = m .asgTerminationToInterruptionEvent (event , messages )
109
+ interruptionEvent , err = m .asgTerminationToInterruptionEvent (event , message )
97
110
if err != nil {
98
111
return nil , err
99
112
}
100
113
case "aws.ec2" :
101
114
if event .DetailType == "EC2 Instance State-change Notification" {
102
- interruptionEvent , err = m .ec2StateChangeToInterruptionEvent (event , messages )
115
+ interruptionEvent , err = m .ec2StateChangeToInterruptionEvent (event , message )
103
116
} else if event .DetailType == "EC2 Spot Instance Interruption Warning" {
104
- interruptionEvent , err = m .spotITNTerminationToInterruptionEvent (event , messages )
117
+ interruptionEvent , err = m .spotITNTerminationToInterruptionEvent (event , message )
105
118
} else if event .DetailType == "EC2 Instance Rebalance Recommendation" {
106
- interruptionEvent , err = m .rebalanceRecommendationToInterruptionEvent (event , messages )
119
+ interruptionEvent , err = m .rebalanceRecommendationToInterruptionEvent (event , message )
107
120
}
108
121
if err != nil {
109
122
return nil , err
@@ -140,7 +153,7 @@ func (m SQSMonitor) receiveQueueMessages(qURL string) ([]*sqs.Message, error) {
140
153
aws .String (sqs .QueueAttributeNameAll ),
141
154
},
142
155
QueueUrl : & qURL ,
143
- MaxNumberOfMessages : aws .Int64 (2 ),
156
+ MaxNumberOfMessages : aws .Int64 (5 ),
144
157
VisibilityTimeout : aws .Int64 (20 ), // 20 seconds
145
158
WaitTimeSeconds : aws .Int64 (0 ),
146
159
})
0 commit comments