Skip to content

Commit bb42deb

Browse files
authored
Merge branch 'main' into GDB-11618
2 parents aa85f77 + 4b91b20 commit bb42deb

10 files changed

+248
-12
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
* Added new variable, deploy_tag to be used for tagging resources as part of the deployment. This allows for stricter IAM policies on certain (dangerous) actions
1414
* Changed graphdb_instance_volume policy to restrict ec2:AttachVolume and ec2:CreateVolume for only specifically tagged volumes
1515
* Extended graphdb_instance_volume_tagging by adding an additional constraint on ec2:CreateTags to allow instances that are already tagged with deploy_tag to be tagged with a Name
16+
* Removed access to aws cli for users other than root
17+
* Added a toggle for enabling/disabling the availability tests in CloudWatch
1618

1719
## 1.3.3
1820

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ Before you begin using this Terraform module, ensure you meet the following prer
155155
| monitoring\_route53\_health\_check\_aws\_region | Define the region in which you want the monitoring to be deployed. It is used to define where the Route53 Availability Check will be deployed, since if it is not specified it will deploy the check in us-east-1 and if you deploy in different region it will not find the dimensions. | `string` | `"us-east-1"` | no |
156156
| monitoring\_route53\_availability\_http\_port | Define the HTTP port for the Route53 availability check | `number` | `80` | no |
157157
| monitoring\_route53\_availability\_https\_port | Define the HTTPS port for the Route53 availability check | `number` | `443` | no |
158+
| monitoring\_enable\_availability\_tests | Enable Route 53 availability tests and alarms | `bool` | `true` | no |
158159
| graphdb\_properties\_path | Path to a local file containing GraphDB properties (graphdb.properties) that would be appended to the default in the VM. | `string` | `null` | no |
159160
| graphdb\_java\_options | GraphDB options to pass to GraphDB with GRAPHDB\_JAVA\_OPTS environment variable. | `string` | `null` | no |
160161
| deploy\_logging\_module | Enable or disable logging module | `bool` | `false` | no |

main.tf

+1
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ module "monitoring" {
222222
cmk_key_alias = var.sns_cmk_key_alias
223223
parameter_store_kms_key_arn = local.calculated_parameter_store_kms_key_arn
224224
cloudwatch_log_group_retention_in_days = var.monitoring_log_group_retention_in_days
225+
enable_availability_tests = var.monitoring_enable_availability_tests
225226

226227
route53_availability_check_region = var.monitoring_route53_health_check_aws_region
227228
route53_availability_request_url = var.graphdb_node_count > 1 ? var.graphdb_external_dns : module.load_balancer.lb_dns_name

modules/graphdb/templates/05_gdb_backup_conf.sh.tpl

+5-4
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ echo "# Configuring the GraphDB backup cron job #"
1717
echo "#################################################"
1818

1919
if [ ${deploy_backup} == "true" ]; then
20+
GRAPHDB_ADMIN_PASSWORD="$(aws --cli-connect-timeout 300 ssm get-parameter --region ${region} --name "/${name}/graphdb/admin_password" --with-decryption | jq -r .Parameter.Value | base64 -d)"
2021
cat <<-EOF >/usr/bin/graphdb_backup
2122
#!/bin/bash
2223
2324
set -euo pipefail
24-
25-
GRAPHDB_ADMIN_PASSWORD="\$(aws --cli-connect-timeout 300 ssm get-parameter --region ${region} --name "/${name}/graphdb/admin_password" --with-decryption | jq -r .Parameter.Value | base64 -d)"
25+
GRAPHDB_ADMIN_PASSWORD="\$1"
2626
NODE_STATE="\$(curl --silent -u "admin:\$GRAPHDB_ADMIN_PASSWORD" http://localhost:7201/rest/cluster/node/status | jq -r .nodeState)"
2727
2828
function trigger_backup {
@@ -78,12 +78,13 @@ elif [ "\$IS_CLUSTER" -ne 200 ]; then
7878
(trigger_backup && echo "") | tee -a /var/opt/graphdb/node/graphdb_backup.log
7979
fi
8080
81-
rotate_backups
81+
rotate_backups
8282
8383
EOF
8484

8585
chmod +x /usr/bin/graphdb_backup
86-
echo "${backup_schedule} graphdb /usr/bin/graphdb_backup" >/etc/cron.d/graphdb_backup
86+
echo "${backup_schedule} graphdb /usr/bin/graphdb_backup $GRAPHDB_ADMIN_PASSWORD" >/etc/cron.d/graphdb_backup
87+
chmod og-rwx /etc/cron.d/graphdb_backup
8788

8889
log_with_timestamp "Cron job created"
8990
else

modules/graphdb/user_data.tf

+10
Original file line numberDiff line numberDiff line change
@@ -153,4 +153,14 @@ data "cloudinit_config" "graphdb_user_data" {
153153
EOF
154154
}
155155
}
156+
157+
# 12 Make aws-cli accessible only for root user
158+
part {
159+
content_type = "text/x-shellscript"
160+
content = <<-EOF
161+
#!/bin/bash
162+
set -euo pipefail
163+
chmod -R og-rwx /usr/local/aws-cli/
164+
EOF
165+
}
156166
}

modules/monitoring/availability_tests.tf

+11-5
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,27 @@
33
# SNS Topic for the Route53 Health Check Alarm
44

55
resource "aws_sns_topic" "graphdb_route53_sns_topic" {
6+
count = var.enable_availability_tests ? 1 : 0
7+
68
provider = aws.useast1
79
name = "${var.resource_name_prefix}-route53-sns-notifications"
810
kms_master_key_id = var.sns_external_kms_key != "" ? var.sns_external_kms_key : (var.enable_sns_kms_key ? aws_kms_key.sns_cmk[0].arn : var.sns_default_kms_key)
911
}
1012

1113
resource "aws_sns_topic_subscription" "graphdb_route53_sns_topic_subscription" {
14+
count = var.enable_availability_tests ? 1 : 0
15+
1216
provider = aws.useast1
13-
topic_arn = aws_sns_topic.graphdb_route53_sns_topic.id
17+
topic_arn = aws_sns_topic.graphdb_route53_sns_topic[0].id
1418
protocol = var.sns_protocol
1519
endpoint = var.sns_topic_endpoint
1620
endpoint_auto_confirms = var.sns_endpoint_auto_confirms
1721
}
1822

1923
# Route 53 Availability Check
20-
2124
resource "aws_route53_health_check" "graphdb_availability_check" {
25+
count = var.enable_availability_tests ? 1 : 0
26+
2227
provider = aws.useast1
2328
failure_threshold = var.route53_availability_timeout
2429
fqdn = var.route53_availability_request_url != "" ? var.route53_availability_request_url : var.lb_dns_name
@@ -31,8 +36,9 @@ resource "aws_route53_health_check" "graphdb_availability_check" {
3136
}
3237

3338
# Availability Alert
34-
3539
resource "aws_cloudwatch_metric_alarm" "graphdb_availability_alert" {
40+
count = var.enable_availability_tests ? 1 : 0
41+
3642
provider = aws.useast1
3743
alarm_name = "al-${var.resource_name_prefix}-availability"
3844
alarm_description = "Alarm will trigger if availability goes beneath 100"
@@ -44,9 +50,9 @@ resource "aws_cloudwatch_metric_alarm" "graphdb_availability_alert" {
4450
statistic = "Average"
4551
threshold = "100"
4652
actions_enabled = var.cloudwatch_alarms_actions_enabled
47-
alarm_actions = [aws_sns_topic.graphdb_route53_sns_topic.arn]
53+
alarm_actions = [aws_sns_topic.graphdb_route53_sns_topic[0].arn]
4854

4955
dimensions = {
50-
HealthCheckId = aws_route53_health_check.graphdb_availability_check.id
56+
HealthCheckId = aws_route53_health_check.graphdb_availability_check[0].id
5157
}
5258
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
{
2+
"widgets": [
3+
{
4+
"height": 6,
5+
"width": 6,
6+
"y": 0,
7+
"x": 6,
8+
"type": "metric",
9+
"properties": {
10+
"metrics": [
11+
[ { "expression": "SELECT MAX(CPUUtilization) FROM \"AWS/EC2\" GROUP BY AutoScalingGroupName", "label": "Query1", "id": "q1", "region": "${aws_region}" } ]
12+
],
13+
"region": "${aws_region}",
14+
"stacked": false,
15+
"view": "timeSeries",
16+
"period": 300,
17+
"stat": "Average",
18+
"yAxis": {
19+
"left": {
20+
"max": 100,
21+
"label": "Percent",
22+
"showUnits": false
23+
},
24+
"right": {
25+
"label": "",
26+
"showUnits": false
27+
}
28+
},
29+
"title": "CPU utilization for the Auto Scaling Group"
30+
}
31+
},
32+
{
33+
"height": 6,
34+
"width": 6,
35+
"y": 0,
36+
"x": 12,
37+
"type": "metric",
38+
"properties": {
39+
"metrics": [
40+
[ { "expression": "SELECT MAX(mem_used_percent) FROM \"CWAgent\" GROUP BY AutoScalingGroupName", "label": "Query1", "id": "q1", "region": "${aws_region}" } ]
41+
],
42+
"region": "${aws_region}",
43+
"stacked": false,
44+
"view": "timeSeries",
45+
"period": 300,
46+
"stat": "Average",
47+
"yAxis": {
48+
"left": {
49+
"max": 100,
50+
"label": "Percent",
51+
"showUnits": false
52+
},
53+
"right": {
54+
"label": "",
55+
"showUnits": false
56+
}
57+
},
58+
"title": "GraphDB Memory Used % for the Auto Scaling Group"
59+
}
60+
},
61+
{
62+
"height": 6,
63+
"width": 6,
64+
"y": 6,
65+
"x": 6,
66+
"type": "metric",
67+
"properties": {
68+
"metrics": [
69+
[ { "expression": "SELECT AVG(graphdb_data_dir_free) FROM \"${resource_name_prefix}\" GROUP BY host", "label": "Query1", "id": "q1", "region": "${aws_region}" } ]
70+
],
71+
"region": "${aws_region}",
72+
"stacked": false,
73+
"view": "timeSeries",
74+
"period": 300,
75+
"stat": "Average",
76+
"yAxis": {
77+
"left": {
78+
"label": "Gigabytes",
79+
"showUnits": false
80+
},
81+
"right": {
82+
"label": "",
83+
"showUnits": false
84+
}
85+
},
86+
"title": "GraphDB Data Dir Free per instance"
87+
}
88+
},
89+
{
90+
"height": 6,
91+
"width": 6,
92+
"y": 6,
93+
"x": 0,
94+
"type": "metric",
95+
"properties": {
96+
"metrics": [
97+
[ { "expression": "SELECT AVG(graphdb_data_dir_used) FROM \"${resource_name_prefix}\" GROUP BY host", "label": "Query1", "id": "q1", "region": "${aws_region}" } ]
98+
],
99+
"region": "${aws_region}",
100+
"stacked": false,
101+
"view": "timeSeries",
102+
"period": 300,
103+
"stat": "Average",
104+
"yAxis": {
105+
"left": {
106+
"label": "Gigabytes",
107+
"showUnits": false
108+
},
109+
"right": {
110+
"label": "",
111+
"showUnits": false
112+
}
113+
},
114+
"title": "GraphDB Data Dir Used per instance"
115+
}
116+
},
117+
{
118+
"height": 6,
119+
"width": 6,
120+
"y": 0,
121+
"x": 18,
122+
"type": "metric",
123+
"properties": {
124+
"metrics": [
125+
[ { "expression": "SELECT COUNT(graphdb_failure_recoveries_count) FROM \"${resource_name_prefix}\"", "label": "Query1", "id": "q1", "region": "${aws_region}" } ]
126+
],
127+
"region": "${aws_region}",
128+
"stacked": false,
129+
"view": "timeSeries",
130+
"period": 300,
131+
"stat": "Average",
132+
"yAxis": {
133+
"left": {
134+
"label": "Count",
135+
"showUnits": false
136+
},
137+
"right": {
138+
"label": "",
139+
"showUnits": false
140+
}
141+
},
142+
"title": "GraphDB Failure Recoveries"
143+
}
144+
},
145+
{
146+
"height": 6,
147+
"width": 6,
148+
"y": 6,
149+
"x": 18,
150+
"type": "metric",
151+
"properties": {
152+
"metrics": [
153+
[ { "expression": "SELECT MAX(graphdb_nodes_disconnected) FROM \"${resource_name_prefix}\"", "label": "Query1", "id": "q1", "region": "${aws_region}" } ]
154+
],
155+
"region": "${aws_region}",
156+
"stacked": false,
157+
"view": "timeSeries",
158+
"period": 300,
159+
"stat": "Average",
160+
"yAxis": {
161+
"left": {
162+
"label": "Count",
163+
"showUnits": false
164+
},
165+
"right": {
166+
"label": "",
167+
"showUnits": false
168+
}
169+
},
170+
"title": "GraphDB nodes disconnected"
171+
}
172+
},
173+
{
174+
"height": 6,
175+
"width": 6,
176+
"y": 6,
177+
"x": 12,
178+
"type": "metric",
179+
"properties": {
180+
"metrics": [
181+
[ { "expression": "SELECT MAX(graphdb_nodes_out_of_sync) FROM \"${resource_name_prefix}\"", "label": "Query1", "id": "q1", "region": "${aws_region}" } ]
182+
],
183+
"region": "${aws_region}",
184+
"stacked": false,
185+
"view": "timeSeries",
186+
"period": 300,
187+
"stat": "Average",
188+
"yAxis": {
189+
"left": {
190+
"label": "Count",
191+
"showUnits": false
192+
},
193+
"right": {
194+
"label": "",
195+
"showUnits": false
196+
}
197+
},
198+
"title": "GraphDB nodes out of sync"
199+
}
200+
}
201+
]
202+
}

modules/monitoring/main.tf

+5-3
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ resource "aws_ssm_parameter" "graphdb_cloudwatch_agent_config" {
2323

2424
resource "aws_cloudwatch_dashboard" "graphdb_dashboard" {
2525
dashboard_name = var.resource_name_prefix
26-
dashboard_body = templatefile("${path.module}/graphdb_dashboard.json", {
27-
health_check_id = aws_route53_health_check.graphdb_availability_check.id
26+
dashboard_body = var.enable_availability_tests ? templatefile("${path.module}/graphdb_dashboard.json", {
27+
health_check_id = aws_route53_health_check.graphdb_availability_check[0].id
2828
resource_name_prefix = var.resource_name_prefix
2929
aws_region = var.aws_region
3030
route53_availability_check_region = var.route53_availability_check_region
31+
}) : templatefile("${path.module}/graphdb_dashboard_no_availability.json", {
32+
resource_name_prefix = var.resource_name_prefix
33+
aws_region = var.aws_region
3134
})
3235
}
3336

34-

modules/monitoring/variables.tf

+5
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,8 @@ variable "lb_dns_name" {
202202
description = "Define the LB DNS name"
203203
type = string
204204
}
205+
206+
variable "enable_availability_tests" {
207+
description = "Enable Route 53 availability tests and alarms"
208+
type = bool
209+
}

variables.tf

+6
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,12 @@ variable "monitoring_route53_availability_https_port" {
418418
default = 443
419419
}
420420

421+
variable "monitoring_enable_availability_tests" {
422+
description = "Enable Route 53 availability tests and alarms"
423+
type = bool
424+
default = true
425+
}
426+
421427
# GraphDB overrides
422428

423429
variable "graphdb_properties_path" {

0 commit comments

Comments
 (0)