diff --git a/pulumi/Pulumi.yaml b/pulumi/Pulumi.yaml new file mode 100644 index 00000000..5a0cfba5 --- /dev/null +++ b/pulumi/Pulumi.yaml @@ -0,0 +1,9 @@ +name: whisper-live +runtime: + name: python +description: Infrastructure definitions for WhisperLive +main: src/ +config: + pulumi:tags: + value: + pulumi:template: python diff --git a/pulumi/README.md b/pulumi/README.md new file mode 100644 index 00000000..558d4530 --- /dev/null +++ b/pulumi/README.md @@ -0,0 +1,105 @@ +# Pulumi Whisper Live GPU Setup + +This project uses Pulumi to provision AWS resources necessary to deploy an ECS +service that runs the WhisperLive service with GPU. It sets up a VPC, subnets, +security groups, NAT gateways, an Auto Scaling Group, an ECS cluster, and +related components required to deploy a containerized application with GPU +support. + +Note: This is provided for reference only! Please be sure you understand the +costs of running GPU instances in AWS before deploying this stack. This stack +may not be set up to ensure security best practices, so use it at your own risk! + +### Prerequisites + +- An AWS account and AWS CLI installed and configured +- Pulumi CLI installed +- A configured Pulumi stack or access to create one + +### Setup Steps + +1. **Install AWS CLI** + + First, ensure you have the AWS CLI installed. If you don't, install it + following [these instructions](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html). + +2. **Configure AWS CLI** + + Once the AWS CLI is installed, configure it by running `aws configure`. + You'll need to input your AWS Access Key ID, Secret Access Key, region, and + output format (e.g., json). + + ```sh + aws configure + ``` + +3. **Install Pulumi** + + If Pulumi is not installed on your system, follow + the [Get Started with Pulumi](https://www.pulumi.com/docs/get-started/aws/) + guide to install it. + +4. **Set up Pulumi Stack** + + Initialize a new Pulumi stack if you haven't already. A stack represents an + isolated deployment (e.g., development, staging, production). + + ```sh + pulumi stack init dev + ``` + +5. **Configure Pulumi for AWS** + + Configure Pulumi to use your AWS credentials. You can specify the AWS region + in which to deploy resources. + + ```sh + pulumi config set aws:region # e.g., us-west-1 + ``` + +6. **Define Required Configuration Variables** + + The Pulumi program requires certain configuration variables. Set them using + the `pulumi config set` command. + + ```sh + pulumi config set vpc_id + pulumi config set public_subnet_id_a + pulumi config set public_subnet_id_b + pulumi config set --optional ecr_repository_url # Default: ghcr.io/collabora/whisperlive-gpu + pulumi config set --optional image_tag # Default: latest + pulumi config set --optional ami_id # Find the AMI ID for the desired ECS compatible GPU instance type available in your region + ``` + +7. **Deploy with Pulumi** + + Run the following command to provision the AWS resources as per the Pulumi + program. + + ```sh + pulumi up + ``` + + Review the proposed changes and type `yes` to proceed with the deployment. + +### Teardown + +1. **Destroy Resources** + + To tear down the resources and remove the stack, run: + + ```sh + pulumi destroy + pulumi stack rm + ``` + + Confirm the destruction of resources when prompted. + +### Important Notes + +- Be mindful of the AWS region where you deploy this project. The AMI ID and + resource availability may vary by region. +- Adjust the `desired_capacity`, `max_size`, and `min_size` for the Auto Scaling + Group based on your application's requirements. +- Always review AWS resource costs and Pulumi's resource management to ensure + that you're operating within your budget and requirements. diff --git a/pulumi/src/__main__.py b/pulumi/src/__main__.py new file mode 100644 index 00000000..48605ace --- /dev/null +++ b/pulumi/src/__main__.py @@ -0,0 +1,349 @@ +import logging +import base64 + +import pulumi +import pulumi_aws as aws + + +logger = logging.getLogger(__name__) + +config = pulumi.Config() + +vpc_id = config.require("vpc_id") +public_subnet_id_a = config.require("public_subnet_id_a") +public_subnet_id_b = config.require("public_subnet_id_b") +ecr_repository_url = config.get("ecr_repository_url", "ghcr.io/collabora/whisperlive-gpu") +image_tag = config.get("image_tag", "latest") + +ami_id = config.get("ami_id", "ami-0a7b82bd04a728ae5") + +if ami_id == "ami-0a7b82bd04a728ae5": + logger.warning( + f"AMI ID {ami_id} is only valid in us-west-1 region. " + "If you are this region, please select another ECS compatible GPU AMI for `whisper-live:vpc_id` in the Pulumi config." + ) + +container_name = "whisper-live-gpu-container" + +# Define the ECS cluster +ecs_cluster = aws.ecs.Cluster("gpu_cluster") + +# Define the IAM role for the EC2 instances +instance_role = aws.iam.Role( + "instance-role", + assume_role_policy="""{ + "Version": "2012-10-17", + "Statement": [{ + "Action": "sts:AssumeRole", + "Principal": {"Service": "ec2.amazonaws.com"}, + "Effect": "Allow", + "Sid": "" + }] + }""" +) + +# Attach the AmazonEC2ContainerServiceforEC2Role policy to the role +policy_attachment = aws.iam.RolePolicyAttachment( + "ecs-instance-role-attachment", + role=instance_role.name, + policy_arn="arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" +) + +# Create an instance profile for the EC2 instances +instance_profile = aws.iam.InstanceProfile("instance-profile", role=instance_role.name) + +# Create a new security group for allowing traffic on port 9090 +security_group = aws.ec2.SecurityGroup( + 'whisper-live-security-group', + description='Allow inbound traffic on port 9090', + ingress=[ + { + 'protocol': 'tcp', + 'from_port': 9090, + 'to_port': 9090, + 'cidr_blocks': ['0.0.0.0/0'], + } + ], + egress=[ + { + 'protocol': '-1', # '-1' indicates all protocols + 'from_port': 0, # '0' indicates all ports. + 'to_port': 0, + 'cidr_blocks': ['0.0.0.0/0'], + } + ] +) + +private_subnet_a = aws.ec2.Subnet( + "private-subnet-a", + vpc_id=vpc_id, + cidr_block="172.31.64.0/24", + availability_zone="us-west-1a", + map_public_ip_on_launch=False +) + +private_subnet_b = aws.ec2.Subnet( + "private-subnet-b", + vpc_id=vpc_id, + cidr_block="172.31.65.0/24", + availability_zone="us-west-1c", + map_public_ip_on_launch=False +) + +private_route_table_a = aws.ec2.RouteTable( + "private-route-table-a", + vpc_id=vpc_id, + tags={"Name": "Private Route Table AZ1"} +) + +private_route_table_b = aws.ec2.RouteTable( + "private-route-table-b", + vpc_id=vpc_id, + tags={"Name": "Private Route Table AZ2"} +) + +route_table_association_a = aws.ec2.RouteTableAssociation( + "route-table-association-a", + subnet_id=private_subnet_a.id, + route_table_id=private_route_table_a.id +) + +route_table_association_b = aws.ec2.RouteTableAssociation( + "route-table-association-b", + subnet_id=private_subnet_b.id, + route_table_id=private_route_table_b.id +) + +# Allocate an Elastic IP for each NAT Gateway +eip_nat_gw_a = aws.ec2.Eip("eip-nat-gw-a", domain='vpc') +eip_nat_gw_b = aws.ec2.Eip("eip-nat-gw-b", domain='vpc') + +# Create NAT Gateways in the public subnets and associate with the EIPs +nat_gateway_a = aws.ec2.NatGateway( + "nat-gateway-a", + allocation_id=eip_nat_gw_a.id, + subnet_id=public_subnet_id_a, +) + +nat_gateway_b = aws.ec2.NatGateway( + "nat-gateway-b", + allocation_id=eip_nat_gw_b.id, + subnet_id=public_subnet_id_b, +) + +# Add a routes to the internet through the NAT Gateways +nat_route_a = aws.ec2.Route( + "nat-route-a", + route_table_id=private_route_table_a.id, + destination_cidr_block="0.0.0.0/0", + nat_gateway_id=nat_gateway_a.id, +) + +nat_route_b = aws.ec2.Route( + "nat-route-b", + route_table_id=private_route_table_b.id, + destination_cidr_block="0.0.0.0/0", + nat_gateway_id=nat_gateway_b.id, +) + +# Define the ALB +alb = aws.lb.LoadBalancer( + "whisper-live-lb", + internal=False, + load_balancer_type="network", + security_groups=[security_group.id], + subnets=[public_subnet_id_a, public_subnet_id_b], + enable_deletion_protection=False, + enable_cross_zone_load_balancing=True, +) + +# Define a Target Group for the ECS service +tg = aws.lb.TargetGroup( + "whisper-live-tg", + port=9090, + protocol="TCP", + target_type="ip", + vpc_id=vpc_id, + health_check=aws.lb.TargetGroupHealthCheckArgs( + protocol="TCP", + healthy_threshold=2, + unhealthy_threshold=2, + timeout=10, + interval=30, + port="9090", + ), +) + +# Define a Listener for the ALB to forward TCP requests to the Target Group +listener = aws.lb.Listener( + "listener", + load_balancer_arn=alb.arn, + port=9090, + protocol="TCP", + default_actions=[aws.lb.ListenerDefaultActionArgs( + type="forward", + target_group_arn=tg.arn, # Forward to the Target Group + )], +) + +# Specify the ECS cluster in the user data for the EC2 instances +user_data_encoded = ecs_cluster.name.apply(lambda name: + base64.b64encode( + f"#!/bin/bash\necho ECS_CLUSTER={name} >> /etc/ecs/ecs.config".encode("ascii") + ).decode("ascii") +) + +launch_template = aws.ec2.LaunchTemplate( + "gpu-launch-template", + image_id="ami-0a7b82bd04a728ae5", + instance_type="g4dn.xlarge", + iam_instance_profile=aws.ec2.LaunchTemplateIamInstanceProfileArgs( + arn=instance_profile.arn + ), + key_name="kevin-local", + user_data=user_data_encoded, + vpc_security_group_ids=[security_group.id], + block_device_mappings=[aws.ec2.LaunchTemplateBlockDeviceMappingArgs( + device_name="/dev/xvda", # or the device name for your AMI + ebs=aws.ec2.LaunchTemplateBlockDeviceMappingEbsArgs( + delete_on_termination="true", + volume_size=60, # Specify your desired volume size in GiB + volume_type="gp3" + ), + )], +) + +# Define an Auto Scaling Group that uses the defined launch template +auto_scaling_group = aws.autoscaling.Group( + "ecs-autoscaling-group", + desired_capacity=1, + max_size=2, + min_size=1, + launch_template=aws.autoscaling.GroupLaunchTemplateArgs( + id=launch_template.id, + version="$Latest" + ), + vpc_zone_identifiers=[private_subnet_a.id, private_subnet_b.id], + tags=[{ + 'key': 'Name', + 'value': 'ECS Instance - GPU', + 'propagate_at_launch': True, + }] +) + +ecs_task_execution_role = aws.iam.Role( + "ecs-task-execution-role", + assume_role_policy="""{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "ecs-tasks.amazonaws.com"}, + "Action": "sts:AssumeRole" + } + ] + }""" +) + +ecs_task_execution_policy_attachment = aws.iam.RolePolicyAttachment( + "ecs-task-execution-policy-attachment", + role=ecs_task_execution_role.name, + policy_arn="arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" +) + +log_group = aws.cloudwatch.LogGroup( + "whisper-live-gpu-log-group", + retention_in_days=7, +) + +# Task definition with GPU support +task_definition = aws.ecs.TaskDefinition( + "whisper-live-gpu-task-definition", + family="whisper-live-gpu-task", + cpu="1024", + memory="4096", + requires_compatibilities=["EC2"], + execution_role_arn=ecs_task_execution_role.arn, + network_mode="awsvpc", + container_definitions=pulumi.Output.all(ecr_repository_url, image_tag, log_group.name).apply(lambda args: f""" + [ + {{ + "name": "{container_name}", + "image": "{args[0]}:{args[1]}", + "cpu": 1024, + "memory": 4096, + "essential": true, + "resourceRequirements": [ + {{ + "value": "1", + "type": "GPU" + }} + ], + "portMappings": [ + {{ + "containerPort": 9090, + "hostPort": 9090, + "protocol": "tcp" + }} + ], + "logConfiguration": {{ + "logDriver": "awslogs", + "options": {{ + "awslogs-group": "{args[2]}", + "awslogs-region": "us-west-1", + "awslogs-stream-prefix": "whisper-live" + }} + }} + }} + ] + """) +) + +capacity_provider = aws.ecs.CapacityProvider( + "gpu-cluster-capacity-provider", + auto_scaling_group_provider=aws.ecs.CapacityProviderAutoScalingGroupProviderArgs( + auto_scaling_group_arn=auto_scaling_group.arn, + managed_scaling=aws.ecs.CapacityProviderAutoScalingGroupProviderManagedScalingArgs( + status="ENABLED", + target_capacity=100, # Adjust based on your needs + minimum_scaling_step_size=1, + maximum_scaling_step_size=1, + ) + ), + tags={"Name": "gpuClusterCapacityProvider"} # Optional tags +) + +capacity_provider_association = aws.ecs.ClusterCapacityProviders( + "gpu-cluster-capacity-provider-association", + cluster_name=ecs_cluster.name, + capacity_providers=[capacity_provider.name], + default_capacity_provider_strategies=[aws.ecs.ClusterCapacityProvidersDefaultCapacityProviderStrategyArgs( + capacity_provider=capacity_provider.name, + weight=1 + )] +) + +# Create an ECS Service +ecs_service = aws.ecs.Service( + "gpu-ecs-service", + cluster=ecs_cluster.arn, + desired_count=1, # Number of tasks to run + deployment_minimum_healthy_percent=0, # Be sure to adjust this in production! + task_definition=task_definition.arn, + network_configuration=aws.ecs.ServiceNetworkConfigurationArgs( + subnets=[private_subnet_a.id, private_subnet_b.id], + security_groups=[security_group.id], + ), + load_balancers=[aws.ecs.ServiceLoadBalancerArgs( + target_group_arn=tg.arn, + container_name=container_name, + container_port=9090, + )], + opts=pulumi.ResourceOptions(depends_on=[task_definition]), + capacity_provider_strategies=[ + aws.ecs.ServiceCapacityProviderStrategyArgs( + capacity_provider=capacity_provider.name, + weight=1, + ) + ], +)