Skip to content

Commit 2c6ad3f

Browse files
eberriganclaude
andcommitted
Implement simplified DNS/SSL configuration schema
This implements the approved OpenSpec change to simplify DNS and SSL configuration, add ACM support, and fix security vulnerabilities. Breaking Changes: - Removed dns.app_name, dns.pattern, dns.custom_subdomain, dns.create_zone - Removed ssl.staging - dns.domain now accepts full domain (e.g., "test.lablink.example.com") - ssl.provider now supports "acm" for AWS Certificate Manager Infrastructure Changes: - Updated main.tf to remove pattern-based DNS logic - Added ALLOCATOR_FQDN computation and environment variable - Created alb.tf for ACM/ALB support (conditional) - Updated user_data.sh with conditional Caddy installation - Added lifecycle hooks to Route53 records Configuration Changes: - Updated config.yaml with new schema - Updated test.example.yaml with new schema - Created 5 canonical use case examples: - ip-only.example.yaml (no DNS, no SSL) - cloudflare.example.yaml (CloudFlare DNS + SSL) - letsencrypt.example.yaml (Route53 + Let's Encrypt, Terraform-managed) - acm.example.yaml (Route53 + ACM via ALB) - letsencrypt-manual.example.yaml (Route53 + Let's Encrypt, manual DNS) OpenSpec: - Created proposal in openspec/changes/implement-simplified-dns-ssl/ - Added infrastructure spec deltas - Created detailed tasks checklist Related: talmolab/lablink#230 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent eeb2258 commit 2c6ad3f

File tree

15 files changed

+1566
-77
lines changed

15 files changed

+1566
-77
lines changed

lablink-infrastructure/alb.tf

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
# Application Load Balancer for ACM SSL termination
2+
# Only created when ssl.provider = "acm"
3+
4+
# Security group for ALB (allow HTTP/HTTPS from internet)
5+
resource "aws_security_group" "alb_sg" {
6+
count = local.create_alb ? 1 : 0
7+
name = "lablink-alb-sg-${var.resource_suffix}"
8+
9+
ingress {
10+
from_port = 80
11+
to_port = 80
12+
protocol = "tcp"
13+
cidr_blocks = ["0.0.0.0/0"]
14+
description = "Allow HTTP from internet"
15+
}
16+
17+
ingress {
18+
from_port = 443
19+
to_port = 443
20+
protocol = "tcp"
21+
cidr_blocks = ["0.0.0.0/0"]
22+
description = "Allow HTTPS from internet"
23+
}
24+
25+
egress {
26+
from_port = 0
27+
to_port = 0
28+
protocol = "-1"
29+
cidr_blocks = ["0.0.0.0/0"]
30+
description = "Allow all outbound traffic"
31+
}
32+
33+
tags = {
34+
Name = "lablink-alb-sg-${var.resource_suffix}"
35+
Environment = var.resource_suffix
36+
}
37+
}
38+
39+
# Update allocator security group to allow traffic from ALB
40+
resource "aws_security_group_rule" "allow_alb_to_allocator" {
41+
count = local.create_alb ? 1 : 0
42+
type = "ingress"
43+
from_port = 5000
44+
to_port = 5000
45+
protocol = "tcp"
46+
source_security_group_id = aws_security_group.alb_sg[0].id
47+
security_group_id = aws_security_group.allow_http.id
48+
description = "Allow ALB to reach allocator on port 5000"
49+
}
50+
51+
# Get default VPC for ALB
52+
data "aws_vpc" "default" {
53+
count = local.create_alb ? 1 : 0
54+
default = true
55+
}
56+
57+
# Get default subnets for ALB (requires at least 2 AZs)
58+
data "aws_subnets" "default" {
59+
count = local.create_alb ? 1 : 0
60+
filter {
61+
name = "vpc-id"
62+
values = [data.aws_vpc.default[0].id]
63+
}
64+
}
65+
66+
# Application Load Balancer
67+
resource "aws_lb" "allocator_alb" {
68+
count = local.create_alb ? 1 : 0
69+
name = "lablink-alb-${var.resource_suffix}"
70+
internal = false
71+
load_balancer_type = "application"
72+
security_groups = [aws_security_group.alb_sg[0].id]
73+
subnets = data.aws_subnets.default[0].ids
74+
75+
enable_deletion_protection = false
76+
77+
tags = {
78+
Name = "lablink-alb-${var.resource_suffix}"
79+
Environment = var.resource_suffix
80+
}
81+
}
82+
83+
# Target group for allocator EC2 instance
84+
resource "aws_lb_target_group" "allocator_tg" {
85+
count = local.create_alb ? 1 : 0
86+
name = "lablink-tg-${var.resource_suffix}"
87+
port = 5000
88+
protocol = "HTTP"
89+
vpc_id = data.aws_vpc.default[0].id
90+
91+
health_check {
92+
enabled = true
93+
healthy_threshold = 2
94+
interval = 30
95+
matcher = "200"
96+
path = "/health"
97+
port = "traffic-port"
98+
protocol = "HTTP"
99+
timeout = 5
100+
unhealthy_threshold = 2
101+
}
102+
103+
tags = {
104+
Name = "lablink-tg-${var.resource_suffix}"
105+
Environment = var.resource_suffix
106+
}
107+
}
108+
109+
# Attach allocator EC2 instance to target group
110+
resource "aws_lb_target_group_attachment" "allocator_attachment" {
111+
count = local.create_alb ? 1 : 0
112+
target_group_arn = aws_lb_target_group.allocator_tg[0].arn
113+
target_id = aws_instance.lablink_allocator_server.id
114+
port = 5000
115+
}
116+
117+
# HTTP listener (redirect to HTTPS)
118+
resource "aws_lb_listener" "http" {
119+
count = local.create_alb ? 1 : 0
120+
load_balancer_arn = aws_lb.allocator_alb[0].arn
121+
port = "80"
122+
protocol = "HTTP"
123+
124+
default_action {
125+
type = "redirect"
126+
127+
redirect {
128+
port = "443"
129+
protocol = "HTTPS"
130+
status_code = "HTTP_301"
131+
}
132+
}
133+
}
134+
135+
# HTTPS listener (forward to target group)
136+
resource "aws_lb_listener" "https" {
137+
count = local.create_alb ? 1 : 0
138+
load_balancer_arn = aws_lb.allocator_alb[0].arn
139+
port = "443"
140+
protocol = "HTTPS"
141+
ssl_policy = "ELBSecurityPolicy-2016-08"
142+
certificate_arn = local.ssl_certificate_arn
143+
144+
default_action {
145+
type = "forward"
146+
target_group_arn = aws_lb_target_group.allocator_tg[0].arn
147+
}
148+
}
149+
150+
# DNS A record for ALB (alias record)
151+
resource "aws_route53_record" "lablink_alb_record" {
152+
count = local.dns_enabled && local.dns_terraform_managed && local.create_alb ? 1 : 0
153+
zone_id = local.zone_id
154+
name = local.dns_domain
155+
type = "A"
156+
157+
alias {
158+
name = aws_lb.allocator_alb[0].dns_name
159+
zone_id = aws_lb.allocator_alb[0].zone_id
160+
evaluate_target_health = true
161+
}
162+
163+
lifecycle {
164+
# Prevent accidental deletion in production
165+
prevent_destroy = false # Set to true for production environments
166+
}
167+
}
168+
169+
# Output ALB DNS name
170+
output "alb_dns_name" {
171+
value = local.create_alb ? aws_lb.allocator_alb[0].dns_name : "N/A"
172+
description = "DNS name of the Application Load Balancer (when using ACM)"
173+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# LabLink Configuration: Use Case 4 - Route53 + ACM (AWS Certificate Manager)
2+
# Use AWS Route53 for DNS and ACM for SSL certificates via Application Load Balancer
3+
#
4+
# Prerequisites:
5+
# - Route53 hosted zone created
6+
# - ACM certificate requested and validated for your domain
7+
# - Certificate ARN obtained from ACM console
8+
#
9+
# Setup:
10+
# 1. Request ACM certificate in ACM console (us-west-2)
11+
# 2. Validate certificate (DNS or email validation)
12+
# 3. Copy certificate ARN to ssl.certificate_arn below
13+
# 4. Deploy infrastructure (Terraform creates ALB with HTTPS listener)
14+
#
15+
# Access URL: https://lablink.example.com
16+
#
17+
# Note: ALB adds ~$20/month cost but provides enterprise-grade SSL termination
18+
19+
db:
20+
dbname: "lablink_db"
21+
user: "lablink"
22+
password: "PLACEHOLDER_DB_PASSWORD"
23+
host: "localhost"
24+
port: 5432
25+
table_name: "vms"
26+
message_channel: "vm_updates"
27+
28+
machine:
29+
machine_type: "g4dn.xlarge"
30+
image: "ghcr.io/talmolab/lablink-client-base-image:linux-amd64-latest-test"
31+
ami_id: "ami-0601752c11b394251" # us-west-2
32+
repository: "https://github.com/talmolab/sleap-tutorial-data.git"
33+
software: "sleap"
34+
extension: "slp"
35+
36+
allocator:
37+
image_tag: "linux-amd64-latest-test"
38+
39+
app:
40+
admin_user: "admin"
41+
admin_password: "PLACEHOLDER_ADMIN_PASSWORD"
42+
region: "us-west-2"
43+
44+
dns:
45+
enabled: true # Route53 DNS enabled
46+
terraform_managed: true # Terraform creates/destroys A record (alias to ALB)
47+
domain: "lablink.example.com" # Full domain
48+
zone_id: "" # Auto-lookup hosted zone
49+
50+
eip:
51+
strategy: "persistent"
52+
tag_name: "lablink-eip"
53+
54+
ssl:
55+
provider: "acm" # AWS Certificate Manager via ALB
56+
email: "" # Not needed for ACM
57+
certificate_arn: "arn:aws:acm:us-west-2:123456789012:certificate/abcd1234-5678-90ab-cdef-EXAMPLE11111" # REPLACE with your ACM cert ARN
58+
59+
startup_script:
60+
enabled: false
61+
path: "config/custom-startup.sh"
62+
on_error: "continue"
63+
64+
bucket_name: "tf-state-lablink-YOURORG"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# LabLink Configuration: Use Case 2 - CloudFlare DNS + SSL
2+
# Use CloudFlare for DNS management and SSL termination
3+
#
4+
# Prerequisites:
5+
# - Domain registered and managed in CloudFlare
6+
# - CloudFlare proxy enabled (orange cloud icon)
7+
#
8+
# Setup:
9+
# 1. Create A record in CloudFlare: lablink.example.com → {allocator_public_ip}
10+
# 2. Enable CloudFlare proxy (orange cloud)
11+
# 3. SSL/TLS mode: Full (not Strict)
12+
#
13+
# Access URL: https://lablink.example.com (CloudFlare provides SSL)
14+
15+
db:
16+
dbname: "lablink_db"
17+
user: "lablink"
18+
password: "PLACEHOLDER_DB_PASSWORD"
19+
host: "localhost"
20+
port: 5432
21+
table_name: "vms"
22+
message_channel: "vm_updates"
23+
24+
machine:
25+
machine_type: "g4dn.xlarge"
26+
image: "ghcr.io/talmolab/lablink-client-base-image:linux-amd64-latest-test"
27+
ami_id: "ami-0601752c11b394251" # us-west-2
28+
repository: "https://github.com/talmolab/sleap-tutorial-data.git"
29+
software: "sleap"
30+
extension: "slp"
31+
32+
allocator:
33+
image_tag: "linux-amd64-latest-test"
34+
35+
app:
36+
admin_user: "admin"
37+
admin_password: "PLACEHOLDER_ADMIN_PASSWORD"
38+
region: "us-west-2"
39+
40+
dns:
41+
enabled: false # DNS managed in CloudFlare, not Route53
42+
terraform_managed: false
43+
domain: "lablink.example.com" # Used for Caddyfile configuration
44+
zone_id: ""
45+
46+
eip:
47+
strategy: "persistent"
48+
tag_name: "lablink-eip"
49+
50+
ssl:
51+
provider: "cloudflare" # CloudFlare handles SSL termination
52+
email: ""
53+
certificate_arn: ""
54+
55+
startup_script:
56+
enabled: false
57+
path: "config/custom-startup.sh"
58+
on_error: "continue"
59+
60+
bucket_name: "tf-state-lablink-YOURORG"

lablink-infrastructure/config/config.yaml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,17 @@ app:
4646
dns:
4747
enabled: true # true = use DNS for allocator URL, false = IP-only access
4848
terraform_managed: true # false = manual DNS records (you create in Route53), true = Terraform creates/destroys records
49-
domain: "lablink-template-testing.com" # Base hosted zone domain
49+
domain: "ci-test.lablink-template-testing.com" # Full domain name for the allocator (supports sub-subdomains)
5050
zone_id: "Z1038183268T83E91AYJF" # (Optional) Hardcode zone ID to skip lookup - leave empty for auto-lookup
51-
app_name: "" # Not used with custom pattern
52-
pattern: "custom" # Using custom pattern
53-
custom_subdomain: "ci-test" # Creates: ci-test.lablink-template-testing.com
54-
create_zone: false # false = use existing zone, true = create new zone
5551

5652
eip:
5753
strategy: "dynamic" # "persistent" = reuse EIP with tag {tag_name}-{env}, "dynamic" = create new EIP with tag {tag_name}-{env}
5854
tag_name: "lablink-eip" # Tag prefix for EIP name. Both strategies use {tag_name}-{env} format (e.g., lablink-eip-prod).
5955

6056
ssl:
61-
provider: "letsencrypt" # "letsencrypt" = Caddy auto-SSL, "cloudflare" = CloudFlare proxy, "none" = HTTP only
57+
provider: "letsencrypt" # "letsencrypt" = Caddy auto-SSL, "cloudflare" = CloudFlare proxy, "acm" = AWS Certificate Manager, "none" = HTTP only
6258
email: "[email protected]" # Email for Let's Encrypt notifications
63-
staging: true # true = staging/testing certs (unlimited), false = production Let's Encrypt certs (rate limited)
59+
certificate_arn: "" # Required when provider="acm" - ARN of ACM certificate
6460

6561
startup_script:
6662
enabled: true # true = run custom startup script on client VMs, false = no script
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# LabLink Configuration: Use Case 1 - IP-Only (No DNS, No SSL)
2+
# Simplest deployment: Access allocator via public IP address over HTTP
3+
#
4+
# Use this when:
5+
# - You don't have a domain name
6+
# - You want fastest setup for testing
7+
# - SSL/HTTPS is not required
8+
#
9+
# Access URL: http://52.40.142.146 (IP will be output after deployment)
10+
11+
db:
12+
dbname: "lablink_db"
13+
user: "lablink"
14+
password: "PLACEHOLDER_DB_PASSWORD"
15+
host: "localhost"
16+
port: 5432
17+
table_name: "vms"
18+
message_channel: "vm_updates"
19+
20+
machine:
21+
machine_type: "g4dn.xlarge"
22+
image: "ghcr.io/talmolab/lablink-client-base-image:linux-amd64-latest-test"
23+
ami_id: "ami-0601752c11b394251" # us-west-2
24+
repository: "https://github.com/talmolab/sleap-tutorial-data.git"
25+
software: "sleap"
26+
extension: "slp"
27+
28+
allocator:
29+
image_tag: "linux-amd64-latest-test"
30+
31+
app:
32+
admin_user: "admin"
33+
admin_password: "PLACEHOLDER_ADMIN_PASSWORD"
34+
region: "us-west-2"
35+
36+
dns:
37+
enabled: false # No DNS - use IP address only
38+
terraform_managed: false
39+
domain: ""
40+
zone_id: ""
41+
42+
eip:
43+
strategy: "dynamic"
44+
tag_name: "lablink-eip"
45+
46+
ssl:
47+
provider: "none" # No SSL - HTTP only
48+
email: ""
49+
certificate_arn: ""
50+
51+
startup_script:
52+
enabled: false
53+
path: "config/custom-startup.sh"
54+
on_error: "continue"
55+
56+
bucket_name: "tf-state-lablink-YOURORG"

0 commit comments

Comments
 (0)