Skip to content

Commit 6e247e3

Browse files
committed
Add VPC sweeper for cleaning up orphaned test resources
- Add vpc-sweeper.sh script to clean up test VPCs older than 7 days - Add GitHub workflow to run sweeper weekly on Sunday 9am PST - Add Makefile targets for vpc-sweeper and vpc-sweeper-force - Includes dry-run mode by default for safety
1 parent ee97808 commit 6e247e3

File tree

3 files changed

+141
-0
lines changed

3 files changed

+141
-0
lines changed

.github/workflows/vpc-sweeper.yaml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: VPC Sweeper
2+
3+
on:
4+
schedule:
5+
- cron: "0 17 * * 0" # Weekly on Sunday at 9 AM PST (17:00 UTC)
6+
workflow_dispatch:
7+
inputs:
8+
dry_run:
9+
description: 'Run in dry-run mode'
10+
required: false
11+
default: 'false'
12+
type: boolean
13+
max_age_hours:
14+
description: 'Maximum age in hours for resources to be cleaned up'
15+
required: false
16+
default: '168'
17+
type: string
18+
19+
permissions:
20+
id-token: write
21+
contents: read
22+
23+
jobs:
24+
vpc-cleanup:
25+
runs-on: ubuntu-latest
26+
steps:
27+
- name: Checkout
28+
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # refs/tags/v5.0.0
29+
30+
- name: Set up AWS credentials
31+
uses: aws-actions/configure-aws-credentials@5579c002bb4778aa43395ef1df492868a9a1c83f # refs/tags/v4.0.2
32+
with:
33+
role-to-assume: ${{ secrets.OSS_TEST_ROLE_ARN }}
34+
role-duration-seconds: 3600
35+
aws-region: us-west-2
36+
37+
- name: Install eksctl
38+
run: |
39+
curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp
40+
sudo mv /tmp/eksctl /usr/local/bin/
41+
42+
- name: Run VPC sweeper
43+
env:
44+
DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }}
45+
MAX_AGE_HOURS: ${{ github.event.inputs.max_age_hours || '168' }}
46+
AWS_DEFAULT_REGION: us-west-2
47+
run: |
48+
./scripts/vpc-sweeper.sh

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,12 @@ docker-unit-tests: build-docker-test ## Run unit tests inside of the testing
250250
$(TEST_IMAGE_NAME) \
251251
make unit-test
252252

253+
vpc-sweeper: ## Run VPC sweeper to clean up orphaned test resources (DRY_RUN=true by default)
254+
./scripts/vpc-sweeper.sh
255+
256+
vpc-sweeper-force: ## Run VPC sweeper with actual deletion (DRY_RUN=false)
257+
DRY_RUN=false ./scripts/vpc-sweeper.sh
258+
253259
##@ Build the Test Binaries files in /test
254260
build-test-binaries:
255261
mkdir -p ${MAKEFILE_PATH}test/build

scripts/vpc-sweeper.sh

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
6+
source "$SCRIPT_DIR"/lib/common.sh
7+
8+
: "${AWS_DEFAULT_REGION:=us-west-2}"
9+
: "${DRY_RUN:=true}"
10+
: "${MAX_AGE_HOURS:=168}"
11+
12+
log() {
13+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
14+
}
15+
16+
cleanup_test_vpcs() {
17+
local cutoff_time=$(date -d "${MAX_AGE_HOURS} hours ago" -u +"%Y-%m-%dT%H:%M:%S")
18+
19+
log "Looking for test VPCs older than ${MAX_AGE_HOURS} hours (before ${cutoff_time})"
20+
21+
# Find VPCs with test cluster tags
22+
local vpcs=$(aws ec2 describe-vpcs \
23+
--region "$AWS_DEFAULT_REGION" \
24+
--filters "Name=tag:kubernetes.io/cluster/cni-test-*,Values=owned" \
25+
--query "Vpcs[?CreationTime<'${cutoff_time}'].{VpcId:VpcId,CreationTime:CreationTime,Tags:Tags}" \
26+
--output json)
27+
28+
if [[ $(echo "$vpcs" | jq length) -eq 0 ]]; then
29+
log "No orphaned test VPCs found"
30+
return
31+
fi
32+
33+
echo "$vpcs" | jq -r '.[] | "\(.VpcId) \(.CreationTime)"' | while read vpc_id creation_time; do
34+
log "Found orphaned VPC: $vpc_id (created: $creation_time)"
35+
36+
if [[ "$DRY_RUN" == "true" ]]; then
37+
log "DRY_RUN: Would delete VPC $vpc_id and associated resources"
38+
else
39+
delete_vpc_resources "$vpc_id"
40+
fi
41+
done
42+
}
43+
44+
delete_vpc_resources() {
45+
local vpc_id="$1"
46+
log "Deleting resources for VPC: $vpc_id"
47+
48+
# Delete EKS clusters in this VPC
49+
local clusters=$(aws eks list-clusters --region "$AWS_DEFAULT_REGION" --query "clusters[?starts_with(@, 'cni-test-')]" --output text)
50+
for cluster in $clusters; do
51+
local cluster_vpc=$(aws eks describe-cluster --name "$cluster" --region "$AWS_DEFAULT_REGION" --query "cluster.resourcesVpcConfig.vpcId" --output text 2>/dev/null || echo "")
52+
if [[ "$cluster_vpc" == "$vpc_id" ]]; then
53+
log "Deleting EKS cluster: $cluster"
54+
eksctl delete cluster "$cluster" --region "$AWS_DEFAULT_REGION" --wait || true
55+
fi
56+
done
57+
58+
# Delete NAT Gateways
59+
aws ec2 describe-nat-gateways --region "$AWS_DEFAULT_REGION" --filter "Name=vpc-id,Values=$vpc_id" --query "NatGateways[].NatGatewayId" --output text | xargs -r -n1 aws ec2 delete-nat-gateway --region "$AWS_DEFAULT_REGION" --nat-gateway-id || true
60+
61+
# Delete Internet Gateways
62+
aws ec2 describe-internet-gateways --region "$AWS_DEFAULT_REGION" --filters "Name=attachment.vpc-id,Values=$vpc_id" --query "InternetGateways[].InternetGatewayId" --output text | xargs -r -n1 -I {} sh -c 'aws ec2 detach-internet-gateway --region "$AWS_DEFAULT_REGION" --internet-gateway-id {} --vpc-id "$1" && aws ec2 delete-internet-gateway --region "$AWS_DEFAULT_REGION" --internet-gateway-id {}' -- "$vpc_id" || true
63+
64+
# Delete subnets
65+
aws ec2 describe-subnets --region "$AWS_DEFAULT_REGION" --filters "Name=vpc-id,Values=$vpc_id" --query "Subnets[].SubnetId" --output text | xargs -r -n1 aws ec2 delete-subnet --region "$AWS_DEFAULT_REGION" --subnet-id || true
66+
67+
# Delete security groups (except default)
68+
aws ec2 describe-security-groups --region "$AWS_DEFAULT_REGION" --filters "Name=vpc-id,Values=$vpc_id" --query "SecurityGroups[?GroupName!='default'].GroupId" --output text | xargs -r -n1 aws ec2 delete-security-group --region "$AWS_DEFAULT_REGION" --group-id || true
69+
70+
# Delete VPC
71+
aws ec2 delete-vpc --region "$AWS_DEFAULT_REGION" --vpc-id "$vpc_id" || true
72+
log "Deleted VPC: $vpc_id"
73+
}
74+
75+
main() {
76+
log "Starting VPC sweeper (DRY_RUN=$DRY_RUN, MAX_AGE_HOURS=$MAX_AGE_HOURS)"
77+
78+
check_is_installed aws
79+
check_is_installed jq
80+
check_is_installed eksctl
81+
82+
cleanup_test_vpcs
83+
84+
log "VPC sweeper completed"
85+
}
86+
87+
main "$@"

0 commit comments

Comments
 (0)