Add AWS Burst CP deploy workflow #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Deploy AWS Burst Control Plane | |
| on: | |
| push: | |
| branches: | |
| - main | |
| - feat/aws-poc-worker-support-clean-rebased | |
| paths: | |
| - "cmd/server/**" | |
| - "internal/**" | |
| - "web/**" | |
| - "deploy/vector/**" | |
| - "go.mod" | |
| - "go.sum" | |
| - ".github/workflows/deploy-aws-burst-cp.yml" | |
| workflow_dispatch: | |
| inputs: | |
| region: | |
| description: AWS region | |
| required: true | |
| default: us-east-2 | |
| cell_id: | |
| description: Cell ID | |
| required: true | |
| default: aws-us-east-2-burst-prod | |
| cp_domain: | |
| description: Public control-plane domain | |
| required: true | |
| default: cp-aws-us-east-2-burst-prod.opencomputer.dev | |
| env: | |
| AWS_REGION: ${{ inputs.region || vars.AWS_REGION || 'us-east-2' }} | |
| CELL_ID: ${{ inputs.cell_id || 'aws-us-east-2-burst-prod' }} | |
| CP_DOMAIN: ${{ inputs.cp_domain || 'cp-aws-us-east-2-burst-prod.opencomputer.dev' }} | |
| GOARCH: amd64 | |
| jobs: | |
| deploy: | |
| name: Deploy AWS Burst CP | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| permissions: | |
| id-token: write | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-go@v5 | |
| with: | |
| go-version: "1.23" | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: "20" | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | |
| aws-region: ${{ env.AWS_REGION }} | |
| - name: Discover control plane | |
| run: | | |
| set -euo pipefail | |
| CP_INSTANCE_ID=$(aws ec2 describe-instances \ | |
| --region "$AWS_REGION" \ | |
| --filters \ | |
| "Name=tag:Name,Values=oc-cp-$CELL_ID" \ | |
| "Name=instance-state-name,Values=running" \ | |
| --query 'Reservations[0].Instances[0].InstanceId' \ | |
| --output text) | |
| if [ -z "$CP_INSTANCE_ID" ] || [ "$CP_INSTANCE_ID" = "None" ]; then | |
| echo "No running CP instance found for $CELL_ID" | |
| exit 1 | |
| fi | |
| CP_PUBLIC_IP=$(aws ec2 describe-instances \ | |
| --region "$AWS_REGION" \ | |
| --instance-ids "$CP_INSTANCE_ID" \ | |
| --query 'Reservations[0].Instances[0].PublicIpAddress' \ | |
| --output text) | |
| CP_AZ=$(aws ec2 describe-instances \ | |
| --region "$AWS_REGION" \ | |
| --instance-ids "$CP_INSTANCE_ID" \ | |
| --query 'Reservations[0].Instances[0].Placement.AvailabilityZone' \ | |
| --output text) | |
| CP_SG_ID=$(aws ec2 describe-security-groups \ | |
| --region "$AWS_REGION" \ | |
| --filters "Name=group-name,Values=oc-sg-cp-$CELL_ID" \ | |
| --query 'SecurityGroups[0].GroupId' \ | |
| --output text) | |
| if [ -z "$CP_PUBLIC_IP" ] || [ "$CP_PUBLIC_IP" = "None" ]; then | |
| echo "CP instance $CP_INSTANCE_ID has no public IP" | |
| exit 1 | |
| fi | |
| if [ -z "$CP_SG_ID" ] || [ "$CP_SG_ID" = "None" ]; then | |
| echo "CP security group not found for $CELL_ID" | |
| exit 1 | |
| fi | |
| echo "CP_INSTANCE_ID=$CP_INSTANCE_ID" >> "$GITHUB_ENV" | |
| echo "CP_PUBLIC_IP=$CP_PUBLIC_IP" >> "$GITHUB_ENV" | |
| echo "CP_AZ=$CP_AZ" >> "$GITHUB_ENV" | |
| echo "CP_SG_ID=$CP_SG_ID" >> "$GITHUB_ENV" | |
| - name: Build server binary | |
| run: | | |
| set -euo pipefail | |
| VERSION=$(git rev-parse --short HEAD) | |
| echo "VERSION=$VERSION" >> "$GITHUB_ENV" | |
| CGO_ENABLED=0 GOOS=linux GOARCH="$GOARCH" go build \ | |
| -ldflags "-X main.Version=$VERSION" \ | |
| -o bin/opensandbox-server ./cmd/server/ | |
| - name: Build web dashboard | |
| run: cd web && npm ci && npm run build | |
| - name: Package deploy artifacts | |
| run: | | |
| set -euo pipefail | |
| tar czf bin/web-dist.tar.gz -C web dist | |
| tar czf bin/vector-deploy.tar.gz -C deploy vector | |
| - name: Prepare SSH key | |
| run: | | |
| set -euo pipefail | |
| mkdir -p ~/.ssh | |
| printf '%s\n' "${{ secrets.AWS_CP_SSH_PRIVATE_KEY }}" > ~/.ssh/aws-burst-cp | |
| chmod 600 ~/.ssh/aws-burst-cp | |
| ssh-keygen -y -f ~/.ssh/aws-burst-cp > ~/.ssh/aws-burst-cp.pub | |
| - name: Authorize runner SSH | |
| run: | | |
| set -euo pipefail | |
| RUNNER_IP=$(curl -fsSL https://checkip.amazonaws.com | tr -d '[:space:]') | |
| echo "RUNNER_CIDR=${RUNNER_IP}/32" >> "$GITHUB_ENV" | |
| set +e | |
| aws ec2 authorize-security-group-ingress \ | |
| --region "$AWS_REGION" \ | |
| --group-id "$CP_SG_ID" \ | |
| --ip-permissions "IpProtocol=tcp,FromPort=22,ToPort=22,IpRanges=[{CidrIp=${RUNNER_IP}/32,Description=github-actions-cp-deploy-${GITHUB_RUN_ID}}]" | |
| STATUS=$? | |
| set -e | |
| if [ "$STATUS" -ne 0 ]; then | |
| echo "SSH ingress may already exist; continuing so deploy can attempt SSH." | |
| fi | |
| - name: Push SSH key with EC2 Instance Connect | |
| run: | | |
| set -euo pipefail | |
| aws ec2-instance-connect send-ssh-public-key \ | |
| --region "$AWS_REGION" \ | |
| --instance-id "$CP_INSTANCE_ID" \ | |
| --availability-zone "$CP_AZ" \ | |
| --instance-os-user ubuntu \ | |
| --ssh-public-key "file://$HOME/.ssh/aws-burst-cp.pub" | |
| - name: Wait for SSH | |
| run: | | |
| set -euo pipefail | |
| for i in $(seq 1 30); do | |
| if ssh -i ~/.ssh/aws-burst-cp \ | |
| -o StrictHostKeyChecking=no \ | |
| -o UserKnownHostsFile=/dev/null \ | |
| -o ConnectTimeout=5 \ | |
| ubuntu@"$CP_PUBLIC_IP" 'echo ssh-ready'; then | |
| exit 0 | |
| fi | |
| sleep 5 | |
| done | |
| echo "SSH did not become ready" | |
| exit 1 | |
| - name: Upload artifacts | |
| run: | | |
| set -euo pipefail | |
| scp -i ~/.ssh/aws-burst-cp \ | |
| -o StrictHostKeyChecking=no \ | |
| -o UserKnownHostsFile=/dev/null \ | |
| bin/opensandbox-server bin/web-dist.tar.gz bin/vector-deploy.tar.gz \ | |
| ubuntu@"$CP_PUBLIC_IP":/tmp/ | |
| - name: Install and restart control plane | |
| run: | | |
| set -euo pipefail | |
| ssh -i ~/.ssh/aws-burst-cp \ | |
| -o StrictHostKeyChecking=no \ | |
| -o UserKnownHostsFile=/dev/null \ | |
| ubuntu@"$CP_PUBLIC_IP" 'bash -s' <<'REMOTE' | |
| set -euo pipefail | |
| sudo mkdir -p /opt/opensandbox/web | |
| sudo install -m 0755 /tmp/opensandbox-server /usr/local/bin/opensandbox-server | |
| sudo tar xzf /tmp/web-dist.tar.gz -C /opt/opensandbox/web | |
| sudo tee /etc/systemd/system/opensandbox-server.service >/dev/null <<'UNIT' | |
| [Unit] | |
| Description=OpenComputer Control Plane | |
| After=network-online.target cloud-init.target | |
| Wants=network-online.target | |
| [Service] | |
| Type=simple | |
| WorkingDirectory=/opt/opensandbox | |
| EnvironmentFile=/etc/opensandbox/server.env | |
| ExecStart=/usr/local/bin/opensandbox-server | |
| Restart=always | |
| RestartSec=5 | |
| [Install] | |
| WantedBy=multi-user.target | |
| UNIT | |
| sudo systemctl daemon-reload | |
| sudo systemctl enable opensandbox-server | |
| sudo systemctl restart opensandbox-server | |
| mkdir -p /tmp/vector-deploy | |
| tar xzf /tmp/vector-deploy.tar.gz -C /tmp/vector-deploy | |
| sudo bash /tmp/vector-deploy/vector/install.sh control-plane || true | |
| rm -rf /tmp/vector-deploy /tmp/opensandbox-server /tmp/web-dist.tar.gz /tmp/vector-deploy.tar.gz | |
| sudo systemctl is-active opensandbox-server | |
| REMOTE | |
| - name: Health check | |
| run: | | |
| set -euo pipefail | |
| echo "Checking direct CP health..." | |
| for i in $(seq 1 30); do | |
| if curl -fsS --max-time 5 "http://${CP_PUBLIC_IP}:8080/health"; then | |
| echo | |
| break | |
| fi | |
| sleep 5 | |
| done | |
| echo "Checking public domain health..." | |
| for i in $(seq 1 30); do | |
| if curl -fsS --max-time 10 "https://${CP_DOMAIN}/health"; then | |
| echo | |
| exit 0 | |
| fi | |
| sleep 5 | |
| done | |
| echo "Public health check failed" | |
| exit 1 | |
| - name: Revoke runner SSH | |
| if: always() | |
| run: | | |
| set -euo pipefail | |
| if [ -z "${RUNNER_CIDR:-}" ] || [ -z "${CP_SG_ID:-}" ]; then | |
| echo "No runner SSH rule recorded; skipping revoke" | |
| exit 0 | |
| fi | |
| aws ec2 revoke-security-group-ingress \ | |
| --region "$AWS_REGION" \ | |
| --group-id "$CP_SG_ID" \ | |
| --protocol tcp \ | |
| --port 22 \ | |
| --cidr "$RUNNER_CIDR" || true | |
| - name: Summary | |
| if: always() | |
| run: | | |
| echo "## AWS Burst CP Deploy" >> "$GITHUB_STEP_SUMMARY" | |
| echo "- **Cell:** \`$CELL_ID\`" >> "$GITHUB_STEP_SUMMARY" | |
| echo "- **CP instance:** \`${CP_INSTANCE_ID:-unknown}\`" >> "$GITHUB_STEP_SUMMARY" | |
| echo "- **CP public IP:** \`${CP_PUBLIC_IP:-unknown}\`" >> "$GITHUB_STEP_SUMMARY" | |
| echo "- **Version:** \`${VERSION:-unknown}\`" >> "$GITHUB_STEP_SUMMARY" | |
| echo "- **Domain:** \`https://$CP_DOMAIN\`" >> "$GITHUB_STEP_SUMMARY" |