Skip to content

Fix credential refresh race during worker activation (#567) #60

Fix credential refresh race during worker activation (#567)

Fix credential refresh race during worker activation (#567) #60

name: Container Image Control Plane CD
on:
push:
branches:
- main
workflow_dispatch:
env:
ECR_REGISTRY: 795637471508.dkr.ecr.us-east-1.amazonaws.com
GHCR_REGISTRY: ghcr.io
IMAGE_NAME: duckgres-controlplane
# CD pipeline for cmd/duckgres-controlplane via Dockerfile.controlplane.
# Single build per sha (no DuckDB-version matrix — the CP is version-
# agnostic by design and one image fits all worker fleets). Multi-arch
# manifest with arm64 + amd64.
jobs:
build:
name: Build controlplane ${{ matrix.platform }}
if: github.repository == 'PostHog/duckgres'
strategy:
fail-fast: false
matrix:
include:
- platform: linux/arm64
runner: ubuntu-24.04-arm
- platform: linux/amd64
runner: ubuntu-24.04
runs-on: ${{ matrix.runner }}
permissions:
id-token: write
contents: read
packages: write
steps:
- name: Check out
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1
with:
role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }}
aws-region: us-east-1
- name: Login to Amazon ECR
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
- name: Login to GHCR
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ${{ env.GHCR_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Prepare platform slug
id: slug
run: echo "arch=${PLATFORM#linux/}" >> "$GITHUB_OUTPUT"
env:
PLATFORM: ${{ matrix.platform }}
- name: Build and push by digest
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2
with:
context: .
file: Dockerfile.controlplane
push: true
platforms: ${{ matrix.platform }}
tags: |
${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}-${{ steps.slug.outputs.arch }}
${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${{ github.sha }}-${{ steps.slug.outputs.arch }}
build-args: |
VERSION=build-${{ github.sha }}
COMMIT=${{ github.sha }}
BUILD_TAGS=kubernetes
cache-from: type=gha,scope=cp-${{ steps.slug.outputs.arch }}
cache-to: type=gha,mode=max,scope=cp-${{ steps.slug.outputs.arch }}
# Smoke test the freshly-pushed image. Same shape as the
# worker-cd smoke step (which see for full rationale): pull
# from GHCR, run the binary on the runner's native arch, assert
# `--version` works, then boot the binary in process-backend
# control-plane mode and confirm "Control plane listening" lands
# in the logs within 30s. If smoke fails the dependent
# `manifest` job is skipped so the unsuffixed multi-arch tag
# never gets produced.
- name: Smoke test pushed image
env:
IMAGE: ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${{ github.sha }}-${{ steps.slug.outputs.arch }}
EXPECTED_VERSION: build-${{ github.sha }}
run: |
set -euo pipefail
docker pull "$IMAGE"
echo "::group::--version"
out=$(docker run --rm "$IMAGE" --version)
echo "$out"
if ! grep -qF "duckgres version $EXPECTED_VERSION" <<<"$out"; then
echo "✗ --version output did not include 'duckgres version $EXPECTED_VERSION'"
exit 1
fi
echo "✓ --version OK"
echo "::endgroup::"
echo "::group::boot smoke"
docker run -d --name cp-smoke "$IMAGE" \
--mode control-plane \
--host 127.0.0.1 \
--port 25432 \
--socket-dir /tmp/sockets \
--process-min-workers 0 \
--process-max-workers 1
trap 'docker rm -f cp-smoke >/dev/null 2>&1 || true' EXIT
status=fail
for i in $(seq 1 30); do
if ! docker ps --format '{{.Names}}' | grep -qx cp-smoke; then
echo "✗ cp-smoke exited before listening:"
docker logs cp-smoke 2>&1 | tail -80
break
fi
if docker logs cp-smoke 2>&1 | grep -q "Control plane listening"; then
echo "✓ cp reached 'Control plane listening' after ${i}s"
docker logs cp-smoke 2>&1 | tail -20
status=ok
break
fi
sleep 1
done
if [ "$status" != "ok" ] && [ "$i" = "30" ]; then
echo "✗ cp did not log 'Control plane listening' within 30s"
docker logs cp-smoke 2>&1 | tail -80
fi
echo "::endgroup::"
[ "$status" = "ok" ]
manifest:
name: Multi-arch manifest controlplane
needs: build
if: github.repository == 'PostHog/duckgres'
runs-on: ubuntu-24.04
permissions:
id-token: write
contents: read
packages: write
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1
with:
role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }}
aws-region: us-east-1
- name: Login to Amazon ECR
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
- name: Login to GHCR
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ${{ env.GHCR_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Create and push ECR / GHCR manifests
run: |
set -euo pipefail
for tag in "${{ github.sha }}" "latest"; do
docker buildx imagetools create \
--tag ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${tag} \
${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}-arm64 \
${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}-amd64
docker buildx imagetools create \
--tag ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${tag} \
${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${{ github.sha }}-arm64 \
${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${{ github.sha }}-amd64
done