Skip to content

Commit 3dd208f

Browse files
javsanbel2javbeltran_expedia
andauthored
Add Glue stats automatic collection (#328)
Co-authored-by: javbeltran_expedia <javbeltran@expediagroup.com>
1 parent b5754f7 commit 3dd208f

File tree

6 files changed

+82
-11
lines changed

6 files changed

+82
-11
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file.
33

44
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
55

6+
## [7.12.0] - 2025-10-10
7+
### Changed
8+
- Add Glue stats automatic collection.
9+
610
## [7.11.1] - 2025-08-04
711
### Changed
812
- Fixed conditional_consumer_iamroles permisisons for iceberg.

VARIABLES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
| apiary\_common\_producer\_iamroles | AWS IAM roles allowed general (not tied to schema) write access to managed Apiary S3 buckets. | `list(string)` | `[]` | no |
3030
| disable\_glue\_db\_init | Glue databases are created programatically by default in hms-readwrite bootstrap init action. Setting this variable to true will disable the hms-readwrite bootstrap init action and create Glue databases via Terraform. | `bool` | `false` | no |
3131
| create\_lf\_resource | All available schemas will be registered in Lake Formation as resources if this is enabled. | `bool` | `false` | no |
32+
| enable\_glue\_stats | Enable Glue Stats automatic collection. Glue mode should be enabled first. | `bool` | `false` | no |
3233
| lf\_hybrid\_access\_enabled | Lake Formation Hybrid access will be set to `true` in Lake formation resources. | `bool` | `false` | no |
3334
| lf\_catalog\_client\_arns | AWS IAM role ARNs granted describe permissions on all glue databases and tables using LakeFormation. | `list(string)` | `[]` | no |
3435
| lf\_customer\_accounts | AWS account IDs granted describe permissions on all glue databases using LakeFormation. | `list(string)` | `[]` | no |

glue-stats.tf

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
resource "null_resource" "automatic_glue_stats_collector_script" {
2+
count = var.enable_glue_stats ? 1 : 0
3+
4+
triggers = {
5+
account_id = data.aws_caller_identity.current.account_id
6+
region = var.aws_region
7+
role_arn = aws_iam_role.glue_stats_service_role[0].arn
8+
}
9+
10+
provisioner "local-exec" {
11+
command = "${path.module}/scripts/enable-glue-stats.sh"
12+
environment = {
13+
ACCOUNT_ID = data.aws_caller_identity.current.account_id
14+
ROLE_ARN = aws_iam_role.glue_service_role[0].arn
15+
}
16+
}
17+
}

glue.tf

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,3 @@ resource "aws_glue_catalog_database" "apiary_system_glue_database" {
2424
name = "${local.gluedb_prefix}${var.system_schema_name}"
2525
description = "Managed by Apiary terraform"
2626
}
27-
28-
resource "null_resource" "automatic_glue_stats_collector_script" {
29-
count = var.enable_gluesync ? 1 : 0
30-
provisioner "local-exec" {
31-
command = "${path.module}/scripts/enable-glue-stats.sh"
32-
environment = {
33-
ACCOUNT_ID = data.aws_caller_identity.current.account_id
34-
ROLE_ARN = aws_iam_role.glue_service_role[0].arn
35-
}
36-
}
37-
}

iam-glue-stats-role.tf

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
resource "aws_iam_role" "glue_stats_service_role" {
2+
count = var.enable_glue_stats ? 1 : 0
3+
4+
name = "glue-stats-service-role"
5+
6+
assume_role_policy = jsonencode({
7+
Version = "2012-10-17"
8+
Statement = [
9+
{
10+
Action = "sts:AssumeRole"
11+
Effect = "Allow"
12+
Principal = {
13+
Service = "glue.amazonaws.com"
14+
}
15+
}
16+
]
17+
})
18+
}
19+
20+
resource "aws_iam_role_policy_attachment" "glue_stats_service_role_policy" {
21+
count = var.enable_glue_stats ? 1 : 0
22+
23+
role = aws_iam_role.glue_stats_service_role[0].name
24+
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole"
25+
}
26+
27+
resource "aws_iam_role_policy" "glue_stats_service_role_lf_policy" {
28+
count = var.enable_glue_stats ? 1 : 0
29+
30+
role = aws_iam_role.glue_stats_service_role[0].name
31+
policy = <<EOF
32+
{
33+
"Version": "2012-10-17",
34+
"Statement": [
35+
{
36+
"Sid": "LakeFormationDataAccess",
37+
"Effect": "Allow",
38+
"Action": "lakeformation:GetDataAccess",
39+
"Resource": [
40+
"*"
41+
]
42+
},
43+
{
44+
"Sid": "PassRole",
45+
"Effect": "Allow",
46+
"Action": "iam:PassRole",
47+
"Resource": [
48+
"${aws_iam_role.glue_stats_service_role[0].arn}"
49+
]
50+
}
51+
]
52+
}
53+
EOF
54+
}

variables.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,12 @@ variable "enable_splunk_logging" {
12031203
default = false
12041204
}
12051205

1206+
variable "enable_glue_stats" {
1207+
description = "Enable automatic Glue column statistics collection."
1208+
type = bool
1209+
default = false
1210+
}
1211+
12061212
variable "splunk_hec_token" {
12071213
description = "The token used for authentication with the Splunk HTTP Event Collector (HEC). This is required for sending logs to Splunk. Compatible with both EC2 and FARGATE ECS task definitions."
12081214
type = string

0 commit comments

Comments
 (0)