diff --git a/infra/deployments/forms/forms-runner/main.tf b/infra/deployments/forms/forms-runner/main.tf index 6bec2f877..e7a7d58f1 100644 --- a/infra/deployments/forms/forms-runner/main.tf +++ b/infra/deployments/forms/forms-runner/main.tf @@ -67,4 +67,5 @@ module "forms_runner" { send_logs_to_cyber = var.send_logs_to_cyber bounces_and_complaints_kms_key_arn = data.terraform_remote_state.forms_ses.outputs.submission_email_bounces_and_complaints_kms_key_arn deliveries_kms_key_arn = data.terraform_remote_state.forms_ses.outputs.submission_email_successful_deliveries_kms_key_arn + queue_worker_capacity = var.forms_runner_settings.queue_worker_capacity } diff --git a/infra/deployments/forms/inputs.tf b/infra/deployments/forms/inputs.tf index 7f8d3bb3b..7c4c2176d 100644 --- a/infra/deployments/forms/inputs.tf +++ b/infra/deployments/forms/inputs.tf @@ -182,6 +182,7 @@ variable "forms_runner_settings" { allow_human_readonly_roles_to_assume_submissions_to_runner_role = bool ses_submission_email_from_email_address = string ses_submission_email_reply_to_email_address = string + queue_worker_capacity = string }) } diff --git a/infra/deployments/forms/tfvars/dev.tfvars b/infra/deployments/forms/tfvars/dev.tfvars index 8da441889..9d03ed636 100644 --- a/infra/deployments/forms/tfvars/dev.tfvars +++ b/infra/deployments/forms/tfvars/dev.tfvars @@ -105,6 +105,7 @@ forms_runner_settings = { allow_human_readonly_roles_to_assume_submissions_to_runner_role = true ses_submission_email_from_email_address = "no-reply@dev.forms.service.gov.uk" ses_submission_email_reply_to_email_address = "no-reply@dev.forms.service.gov.uk" + queue_worker_capacity = 1 } scheduled_smoke_tests_settings = { enable_scheduled_smoke_tests = true diff --git a/infra/deployments/forms/tfvars/production.tfvars b/infra/deployments/forms/tfvars/production.tfvars index df49d67f4..76587d367 100644 --- a/infra/deployments/forms/tfvars/production.tfvars +++ b/infra/deployments/forms/tfvars/production.tfvars @@ -150,6 +150,7 @@ forms_runner_settings = { allow_human_readonly_roles_to_assume_submissions_to_runner_role = false ses_submission_email_from_email_address = "no-reply@forms.service.gov.uk" ses_submission_email_reply_to_email_address = "no-reply@forms.service.gov.uk" + queue_worker_capacity = 6 } scheduled_smoke_tests_settings = { enable_scheduled_smoke_tests = true diff --git a/infra/deployments/forms/tfvars/staging.tfvars b/infra/deployments/forms/tfvars/staging.tfvars index 8294e6a14..0fdcde308 100644 --- a/infra/deployments/forms/tfvars/staging.tfvars +++ b/infra/deployments/forms/tfvars/staging.tfvars @@ -70,6 +70,7 @@ forms_runner_settings = { allow_human_readonly_roles_to_assume_submissions_to_runner_role = false ses_submission_email_from_email_address = "no-reply@staging.forms.service.gov.uk" ses_submission_email_reply_to_email_address = "no-reply@staging.forms.service.gov.uk" + queue_worker_capacity = 1 } scheduled_smoke_tests_settings = { enable_scheduled_smoke_tests = true diff --git a/infra/deployments/forms/tfvars/user-research.tfvars b/infra/deployments/forms/tfvars/user-research.tfvars index 0d71fa55d..2865e5e0e 100644 --- a/infra/deployments/forms/tfvars/user-research.tfvars +++ b/infra/deployments/forms/tfvars/user-research.tfvars @@ -68,6 +68,7 @@ forms_runner_settings = { ses_submission_email_reply_to_email_address = "no-reply@research.forms.service.gov.uk" allow_human_readonly_roles_to_assume_submissions_to_s3_role = false allow_human_readonly_roles_to_assume_submissions_to_runner_role = false + queue_worker_capacity = 1 } scheduled_smoke_tests_settings = { enable_scheduled_smoke_tests = false diff --git a/infra/modules/forms-runner/queue-worker.tf b/infra/modules/forms-runner/queue-worker.tf deleted file mode 100644 index f761a4d03..000000000 --- a/infra/modules/forms-runner/queue-worker.tf +++ /dev/null @@ -1,17 +0,0 @@ -locals { - queue_worker_name = "forms-runner-queue-worker" -} - -resource "aws_ssm_parameter" "queue_worker_sentry_dsn" { - #checkov:skip=CKV_AWS_337:The parameter is already using the default key - name = "/${local.queue_worker_name}-${var.env_name}/sentry/dsn" - type = "SecureString" - value = "dummy_value" - - description = "Sentry DSN value for ${local.queue_worker_name} in the ${var.env_name} environment" - - lifecycle { - ignore_changes = [value] - prevent_destroy = true - } -} \ No newline at end of file diff --git a/infra/modules/forms-runner/queue_worker.tf b/infra/modules/forms-runner/queue_worker.tf new file mode 100644 index 000000000..3b4eae797 --- /dev/null +++ b/infra/modules/forms-runner/queue_worker.tf @@ -0,0 +1,192 @@ +locals { + queue_worker_name = "forms-runner-queue-worker" + + # Take the exported task container definition and override some parts of it + queue_worker_container_definitions = merge( + module.ecs_service.task_container_definition, + { + name = local.queue_worker_name, + command = ["bin/jobs"] + + healthCheck = { + command = ["CMD-SHELL", "test -f tmp/solidqueue_healthcheck || exit 1"] + interval = 30 + timeout = 5 + retries = 3 + startPeriod = 10 + } + + logConfiguration = { + logDriver = "awslogs", + options = { + awslogs-group = module.ecs_service.application_log_group_name, + awslogs-region = "eu-west-2", + awslogs-stream-prefix = "forms-runner-${var.env_name}-queue-worker" + } + } + + secrets = [ + { + name = "SETTINGS__FORMS_API__AUTH_KEY", + valueFrom = "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-${var.env_name}/forms-api-key" + }, + { + name = "SETTINGS__SENTRY__DSN", + valueFrom = "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-queue-worker-${var.env_name}/sentry/dsn" + }, + { + name = "SECRET_KEY_BASE", + valueFrom = "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-${var.env_name}/secret-key-base" + }, + { + name = "DATABASE_URL", + valueFrom = "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-${var.env_name}/database/url" + }, + { + name = "QUEUE_DATABASE_URL", + valueFrom = "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-queue-${var.env_name}/database/url" + } + ] + } + ) +} + +resource "aws_ecs_task_definition" "queue_worker" { + family = "${var.env_name}-${local.queue_worker_name}" + container_definitions = jsonencode([local.queue_worker_container_definitions]) + execution_role_arn = aws_iam_role.ecs_task_exec_role.arn + task_role_arn = module.ecs_service.task_definition.task_role_arn + requires_compatibilities = module.ecs_service.task_definition.requires_compatibilities + cpu = module.ecs_service.task_definition.cpu + memory = module.ecs_service.task_definition.memory + network_mode = "awsvpc" + + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "ARM64" + } +} + +resource "aws_ecs_service" "queue_worker" { + #checkov:skip=CKV_AWS_332:We don't want to target "LATEST" and get a surprise when a new version is released. + #checkov:skip=CKV2_FORMS_AWS_2:The queue worker currently doesn't autoscale, revisit this decision by 23/06/2025 + name = local.queue_worker_name + cluster = var.ecs_cluster_arn + desired_count = var.queue_worker_capacity + + task_definition = aws_ecs_task_definition.queue_worker.arn + deployment_maximum_percent = "200" + deployment_minimum_healthy_percent = "100" + + launch_type = "FARGATE" + platform_version = "1.4.0" + + lifecycle { + prevent_destroy = true # ECS services cannot be destructively replaced without downtime. This helps to avoid accidentally doing so. + } + + network_configuration { + subnets = var.private_subnet_ids + security_groups = [aws_security_group.queue_worker.id] + assign_public_ip = false + } +} + +resource "aws_security_group" "queue_worker" { + name = local.queue_worker_name + description = "Restrict all ingress, allow egress to VPC, RDS, and internet" + vpc_id = var.vpc_id + egress { + description = "Permit outbound to VPC CIDR on 443" + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = [var.vpc_cidr_block] + } + + egress { + description = "Permit outbound to the RDS postgres port 5432" + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = [var.vpc_cidr_block] + } + + egress { + description = "Permit outbound 443 to the internet" + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_iam_role" "ecs_task_exec_role" { + name = "${var.env_name}-${local.queue_worker_name}-ecs-task-exec" + description = "Used by ECS to create forms-runner-queue-worker task" + assume_role_policy = data.aws_iam_policy_document.ecs_task_exec_role_assume_role.json +} + +data "aws_iam_policy_document" "ecs_task_exec_role_assume_role" { + statement { + sid = "AllowECS" + actions = ["sts:AssumeRole"] + effect = "Allow" + + principals { + type = "Service" + identifiers = ["ecs-tasks.amazonaws.com"] + } + } +} + +resource "aws_iam_role_policy_attachment" "ecs_task_exec_standard_policy" { + role = aws_iam_role.ecs_task_exec_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" +} + +resource "aws_iam_policy" "ecs_task_exec_additional_policy" { + name = "${var.env_name}-${local.queue_worker_name}-ecs-task-additional-policies" + policy = data.aws_iam_policy_document.queue_worker_ecs_task_exec_additional_policy.json +} + +resource "aws_iam_role_policy_attachment" "ecs_task_exec_additional_policy" { + role = aws_iam_role.ecs_task_exec_role.name + policy_arn = aws_iam_policy.ecs_task_exec_additional_policy.arn +} + +data "aws_iam_policy_document" "queue_worker_ecs_task_exec_additional_policy" { + statement { + actions = [ + "ssm:DescribeParameters" + ] + resources = ["*"] + effect = "Allow" + } + statement { + actions = [ + "ssm:GetParameters" + ] + resources = [ + "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-queue-worker-${var.env_name}/sentry/dsn", + "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-${var.env_name}/secret-key-base", + "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-${var.env_name}/database/url", + "arn:aws:ssm:eu-west-2:${data.aws_caller_identity.current.account_id}:parameter/forms-runner-queue-${var.env_name}/database/url" + ] + effect = "Allow" + } +} + +resource "aws_ssm_parameter" "queue_worker_sentry_dsn" { + #checkov:skip=CKV_AWS_337:The parameter is already using the default key + name = "/${local.queue_worker_name}-${var.env_name}/sentry/dsn" + type = "SecureString" + value = "dummy_value" + + description = "Sentry DSN value for ${local.queue_worker_name} in the ${var.env_name} environment" + + lifecycle { + ignore_changes = [value] + prevent_destroy = true + } +} \ No newline at end of file diff --git a/infra/modules/forms-runner/variables.tf b/infra/modules/forms-runner/variables.tf index ad37d211a..8b92492bb 100644 --- a/infra/modules/forms-runner/variables.tf +++ b/infra/modules/forms-runner/variables.tf @@ -169,3 +169,8 @@ variable "deliveries_kms_key_arn" { type = string description = "The ARN of the KMS key to decrypt messages on the submission deliveries SQS queue" } + +variable "queue_worker_capacity" { + type = number + description = "Sets the desired number of tasks for the SolidQueue worker" +} \ No newline at end of file