-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcwagent_alarms.tf
More file actions
218 lines (204 loc) · 8.44 KB
/
cwagent_alarms.tf
File metadata and controls
218 lines (204 loc) · 8.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# ------------------------------------------------------------------------------
# Create a set of standard CloudWatch alarms for an EC2 instance.
# These alarms are based on metrics that come from the CloudWatch
# agent running on the instance (i.e., from the CWAgent metric
# namespace).
# ------------------------------------------------------------------------------
# Alarm for memory utilization.
resource "aws_cloudwatch_metric_alarm" "memory_utilization" {
for_each = var.memory_utilization_alarm_parameters.create_alarm && var.create_cloudwatch_agent_alarms ? toset([var.instance_id]) : toset([])
alarm_actions = var.alarm_actions
alarm_description = "Monitor EC2 instance memory utilization"
alarm_name = "ec2_memory_utilization_${each.value}"
comparison_operator = "GreaterThanThreshold"
datapoints_to_alarm = var.memory_utilization_alarm_parameters.datapoints_to_alarm
dimensions = {
InstanceId = each.value
}
evaluation_periods = var.memory_utilization_alarm_parameters.evaluation_periods
insufficient_data_actions = var.insufficient_data_actions
metric_name = "mem_used_percent"
namespace = "CWAgent"
period = var.memory_utilization_alarm_parameters.period
statistic = var.memory_utilization_alarm_parameters.statistic
ok_actions = var.ok_actions
threshold = var.memory_utilization_alarm_parameters.threshold
}
# Alarm for disk utilization.
resource "aws_cloudwatch_metric_alarm" "disk_utilization" {
for_each = var.disk_utilization_alarm_parameters.create_alarm && var.create_cloudwatch_agent_alarms ? toset([var.instance_id]) : toset([])
alarm_actions = var.alarm_actions
alarm_description = "Monitor EC2 instance disk utilization"
alarm_name = "ec2_disk_utilization_${each.key}"
comparison_operator = "GreaterThanThreshold"
datapoints_to_alarm = var.disk_utilization_alarm_parameters.datapoints_to_alarm
dimensions = {
InstanceId = each.value
}
evaluation_periods = var.disk_utilization_alarm_parameters.evaluation_periods
insufficient_data_actions = var.insufficient_data_actions
metric_name = "disk_used_percent"
namespace = "CWAgent"
period = var.disk_utilization_alarm_parameters.period
statistic = var.disk_utilization_alarm_parameters.statistic
ok_actions = var.ok_actions
threshold = var.disk_utilization_alarm_parameters.threshold
}
# Alarm each time any packets are queued and/or dropped because the
# inbound aggregate bandwidth exceeded the maximum for the instance.
resource "aws_cloudwatch_metric_alarm" "bw_in_allowance_exceeded" {
for_each = var.create_cloudwatch_agent_alarms ? toset([var.instance_id]) : toset([])
alarm_actions = var.alarm_actions
alarm_description = "Monitor EC2 instance inbound bandwidth allowance"
alarm_name = "bw_in_allowance_exceeded_${each.key}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
insufficient_data_actions = var.insufficient_data_actions
metric_query {
id = "bw_in_allowance_exceeded_rate"
expression = "RATE(bw_in_allowance_exceeded_count)"
label = "Inbound Bandwidth Allowance Exceeded Rate of Change"
return_data = true
}
metric_query {
id = "bw_in_allowance_exceeded_count"
metric {
dimensions = {
InstanceId = each.value
}
metric_name = "ethtool_bw_in_allowance_exceeded"
namespace = "CWAgent"
period = 60
stat = "Maximum"
}
}
ok_actions = var.ok_actions
threshold = 0
}
# Alarm each time any packets are queued and/or dropped because the
# outbound aggregate bandwidth exceeded the maximum for the instance.
resource "aws_cloudwatch_metric_alarm" "bw_out_allowance_exceeded" {
for_each = var.create_cloudwatch_agent_alarms ? toset([var.instance_id]) : toset([])
alarm_actions = var.alarm_actions
alarm_description = "Monitor EC2 instance outbound bandwidth allowance"
alarm_name = "bw_out_allowance_exceeded_${each.key}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
insufficient_data_actions = var.insufficient_data_actions
metric_query {
id = "bw_out_allowance_exceeded_rate"
expression = "RATE(bw_out_allowance_exceeded_count)"
label = "Outbound Bandwidth Allowance Exceeded Rate of Change"
return_data = true
}
metric_query {
id = "bw_out_allowance_exceeded_count"
metric {
dimensions = {
InstanceId = each.value
}
metric_name = "ethtool_bw_out_allowance_exceeded"
namespace = "CWAgent"
period = 60
stat = "Maximum"
}
}
ok_actions = var.ok_actions
threshold = 0
}
# Alarm each time any packets are dropped because connection tracking
# exceeded the maximum for the instance and new connections could not
# be established.
resource "aws_cloudwatch_metric_alarm" "conntrack_allowance_exceeded" {
for_each = var.create_cloudwatch_agent_alarms ? toset([var.instance_id]) : toset([])
alarm_actions = var.alarm_actions
alarm_description = "Monitor EC2 instance conntrack allowance"
alarm_name = "conntrack_allowance_exceeded_${each.key}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
insufficient_data_actions = var.insufficient_data_actions
metric_query {
id = "conntrack_allowance_exceeded_rate"
expression = "RATE(conntrack_allowance_exceeded_count)"
label = "Conntrack Allowance Exceeded Rate of Change"
return_data = true
}
metric_query {
id = "conntrack_allowance_exceeded_count"
metric {
dimensions = {
InstanceId = each.value
}
metric_name = "ethtool_conntrack_allowance_exceeded"
namespace = "CWAgent"
period = 60
stat = "Maximum"
}
}
ok_actions = var.ok_actions
threshold = 0
}
# Alarm each time any packets are dropped because the PPS of the
# traffic to local proxy services exceeded the maximum for the network
# interface. This impacts traffic to the DNS service, the Instance
# Metadata Service, and the Amazon Time Sync Service.
resource "aws_cloudwatch_metric_alarm" "linklocal_allowance_exceeded" {
for_each = var.create_cloudwatch_agent_alarms ? toset([var.instance_id]) : toset([])
alarm_actions = var.alarm_actions
alarm_description = "Monitor EC2 instance linklocal allowance"
alarm_name = "linklocal_allowance_exceeded_${each.key}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
insufficient_data_actions = var.insufficient_data_actions
metric_query {
id = "linklocal_allowance_exceeded_rate"
expression = "RATE(linklocal_allowance_exceeded_count)"
label = "Linklocal Allowance Exceeded Rate of Change"
return_data = true
}
metric_query {
id = "linklocal_allowance_exceeded_count"
metric {
dimensions = {
InstanceId = each.value
}
metric_name = "ethtool_linklocal_allowance_exceeded"
namespace = "CWAgent"
period = 60
stat = "Maximum"
}
}
ok_actions = var.ok_actions
threshold = 0
}
# Alarm each time any packets are queued and/or dropped because the
# bidirectional PPS exceeded the maximum for the instance.
resource "aws_cloudwatch_metric_alarm" "pps_allowance_exceeded" {
for_each = var.create_cloudwatch_agent_alarms ? toset([var.instance_id]) : toset([])
alarm_actions = var.alarm_actions
alarm_description = "Monitor EC2 instance PPS allowance"
alarm_name = "pps_allowance_exceeded_${each.key}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
insufficient_data_actions = var.insufficient_data_actions
metric_query {
id = "pps_allowance_exceeded_rate"
expression = "RATE(pps_allowance_exceeded_count)"
label = "PPS Allowance Exceeded Rate of Change"
return_data = true
}
metric_query {
id = "pps_allowance_exceeded_count"
metric {
dimensions = {
InstanceId = each.value
}
metric_name = "ethtool_pps_allowance_exceeded"
namespace = "CWAgent"
period = 60
stat = "Maximum"
}
}
ok_actions = var.ok_actions
threshold = 0
}