Fix runtime worker claim race (#565) #59
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Container Image Worker CD (per DuckDB version) | |
| on: | |
| push: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| env: | |
| ECR_REGISTRY: 795637471508.dkr.ecr.us-east-1.amazonaws.com | |
| GHCR_REGISTRY: ghcr.io | |
| IMAGE_NAME: duckgres-worker | |
| # Per-DuckDB-version matrix build for cmd/duckgres-worker. | |
| # | |
| # Each row produces one image (or multi-arch manifest) tagged | |
| # duckgres-worker:<sha>-duckdb<version>. The "default" row is unsuffixed | |
| # and triggers the Charts dispatch (kept stable so the existing duckgres | |
| # release continues to roll out as before). Non-default rows publish | |
| # their suffixed images and stop there — operators flip a tenant's | |
| # `image` config-store column to point at a specific suffixed tag to | |
| # canary that DuckDB version for that tenant. | |
| # | |
| # To add a DuckDB version, add a row under matrix.duckdb. The | |
| # DUCKDB_GO_VERSION / DUCKDB_BINDINGS_VERSION pair maps to the | |
| # duckdb-go module versions; the encoding is `v0.<major><minor:02d><patch:02d>.0`, | |
| # so DuckDB 1.5.1 → v0.10501.0 / v2.10501.0 and 1.5.2 → v0.10502.0 / | |
| # v2.10502.0. See scripts/ducklake_version_matrix.sh for the same | |
| # mapping in test code. | |
| jobs: | |
| build: | |
| name: Build worker ${{ matrix.duckdb.version }} ${{ matrix.platform.platform }} | |
| if: github.repository == 'PostHog/duckgres' | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| duckdb: | |
| - version: "1.5.2" | |
| go: "v2.10502.0" | |
| bindings: "v0.10502.0" | |
| httpfs: "v1.5.2-stoi-fix" | |
| default: true | |
| - version: "1.5.1" | |
| go: "v2.10501.0" | |
| bindings: "v0.10501.0" | |
| httpfs: "v1.5.1-stoi-fix" | |
| default: false | |
| platform: | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| slug: arm64 | |
| - platform: linux/amd64 | |
| runner: ubuntu-24.04 | |
| slug: amd64 | |
| runs-on: ${{ matrix.platform.runner }} | |
| permissions: | |
| id-token: write | |
| contents: read | |
| packages: write | |
| steps: | |
| - name: Check out | |
| uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }} | |
| aws-region: us-east-1 | |
| - name: Login to Amazon ECR | |
| uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 | |
| - name: Login to GHCR | |
| uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 | |
| with: | |
| registry: ${{ env.GHCR_REGISTRY }} | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Build and push by digest | |
| uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 | |
| with: | |
| context: . | |
| file: Dockerfile.worker | |
| push: true | |
| platforms: ${{ matrix.platform.platform }} | |
| tags: | | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}-duckdb${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${{ github.sha }}-duckdb${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| build-args: | | |
| VERSION=build-${{ github.sha }} | |
| COMMIT=${{ github.sha }} | |
| BUILD_TAGS=kubernetes | |
| DUCKDB_GO_VERSION=${{ matrix.duckdb.go }} | |
| DUCKDB_BINDINGS_VERSION=${{ matrix.duckdb.bindings }} | |
| DUCKDB_EXTENSION_VERSION=${{ matrix.duckdb.version }} | |
| HTTPFS_EXTENSION_TAG=${{ matrix.duckdb.httpfs }} | |
| cache-from: type=gha,scope=worker-${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| cache-to: type=gha,mode=max,scope=worker-${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| # Smoke test the freshly-pushed image. We pull from GHCR (cheaper | |
| # than ECR) and run the binary on the runner's native arch, so no | |
| # qemu is needed. Two assertions: | |
| # 1. `--version` exits 0 and prints the expected build identity. | |
| # Catches stub-binary regressions like the pre-#521 exit-1 | |
| # stub that shipped to ECR for weeks before being noticed. | |
| # 2. The binary boots with the same arg shape the K8s pool | |
| # hardcodes (`--mode duckdb-service --duckdb-listen :8816`) | |
| # and reaches the "Starting DuckDB service" log line within | |
| # 30s. Catches flag.Parse regressions like the missing | |
| # `--mode` flag fixed in #522, and any boot-time linkage | |
| # failure that only manifests at runtime. | |
| # If smoke fails for any matrix cell, the dependent `manifest` | |
| # job is skipped (default `needs:` behavior), so the unsuffixed | |
| # multi-arch tag is never produced and downstream Charts dispatch | |
| # never picks up a broken image. | |
| - name: Smoke test pushed image | |
| env: | |
| IMAGE: ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${{ github.sha }}-duckdb${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| EXPECTED_VERSION: build-${{ github.sha }} | |
| run: | | |
| set -euo pipefail | |
| docker pull "$IMAGE" | |
| echo "::group::--version" | |
| out=$(docker run --rm "$IMAGE" --version) | |
| echo "$out" | |
| if ! grep -qF "duckgres version $EXPECTED_VERSION" <<<"$out"; then | |
| echo "✗ --version output did not include 'duckgres version $EXPECTED_VERSION'" | |
| exit 1 | |
| fi | |
| echo "✓ --version OK" | |
| echo "::endgroup::" | |
| # Worker's TCP listen path requires TLS — duckdbservice.Serve | |
| # always loads certs from cfg.TLSCertFile/KeyFile (default | |
| # ./certs/server.crt + .key) when listener.Network()=="tcp". | |
| # In prod the K8s pool mounts these via a Secret. For smoke | |
| # we generate an ephemeral self-signed pair and bind-mount | |
| # it; DUCKGRES_CERT / DUCKGRES_KEY env feed configresolve. | |
| # Without this, the binary boots far enough to log | |
| # "Starting DuckDB service" then dies in Serve(), which | |
| # the prior version of this step false-passed under (see | |
| # PR #528 follow-up). | |
| echo "::group::generate ephemeral TLS pair" | |
| CERT_DIR="$(mktemp -d)" | |
| # `-nodes` (skip private key encryption) is required: Go's | |
| # tls.LoadX509KeyPair expects an unencrypted PEM. The cert | |
| # lives in CI for ~30s, is never published, never reused, | |
| # and protects nothing real — so the unencrypted key is | |
| # the desired property here, not a vulnerability. | |
| # nosemgrep: trailofbits.generic.openssl-insecure-flags.openssl-insecure-flags | |
| openssl req -x509 -newkey rsa:2048 -nodes \ | |
| -keyout "$CERT_DIR/server.key" \ | |
| -out "$CERT_DIR/server.crt" \ | |
| -days 1 -subj '/CN=worker-smoke' >/dev/null 2>&1 | |
| # mktemp -d defaults to 0700, which the container's | |
| # non-root duckgres UID can't traverse via the bind | |
| # mount → "permission denied" loading the cert. 0755 on | |
| # the dir + 0644 on the files lets any UID read. | |
| chmod 755 "$CERT_DIR" | |
| chmod 644 "$CERT_DIR"/server.crt "$CERT_DIR"/server.key | |
| echo "::endgroup::" | |
| echo "::group::boot smoke" | |
| docker run -d --name worker-smoke \ | |
| -v "$CERT_DIR:/etc/worker-smoke-tls:ro" \ | |
| -e DUCKGRES_CERT=/etc/worker-smoke-tls/server.crt \ | |
| -e DUCKGRES_KEY=/etc/worker-smoke-tls/server.key \ | |
| "$IMAGE" \ | |
| --mode duckdb-service \ | |
| --duckdb-listen :8816 | |
| trap 'docker rm -f worker-smoke >/dev/null 2>&1 || true; rm -rf "$CERT_DIR"' EXIT | |
| # Three exit paths: ok, container-exited, 30s timeout. | |
| # The level=ERROR substring check defends against the race | |
| # where the binary logs "Starting DuckDB service" and then | |
| # crashes inside Serve() before docker ps notices — the | |
| # previous version of this step false-passed under that | |
| # exact pattern. | |
| status=fail | |
| for i in $(seq 1 30); do | |
| logs=$(docker logs worker-smoke 2>&1) | |
| if grep -q "level=ERROR" <<<"$logs"; then | |
| echo "✗ worker logged level=ERROR before reaching ready state:" | |
| tail -80 <<<"$logs" | |
| break | |
| fi | |
| if ! docker ps --format '{{.Names}}' | grep -qx worker-smoke; then | |
| echo "✗ worker-smoke exited before listening:" | |
| tail -80 <<<"$logs" | |
| break | |
| fi | |
| if grep -q "Starting DuckDB service" <<<"$logs"; then | |
| echo "✓ worker reached 'Starting DuckDB service' after ${i}s" | |
| tail -20 <<<"$logs" | |
| status=ok | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if [ "$status" != "ok" ] && [ "$i" = "30" ]; then | |
| echo "✗ worker did not log 'Starting DuckDB service' within 30s" | |
| docker logs worker-smoke 2>&1 | tail -80 | |
| fi | |
| echo "::endgroup::" | |
| [ "$status" = "ok" ] | |
| manifest: | |
| name: Multi-arch manifest worker ${{ matrix.duckdb.version }} | |
| needs: build | |
| if: github.repository == 'PostHog/duckgres' | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| duckdb: | |
| - version: "1.5.2" | |
| default: true | |
| - version: "1.5.1" | |
| default: false | |
| runs-on: ubuntu-24.04 | |
| permissions: | |
| id-token: write | |
| contents: read | |
| packages: write | |
| steps: | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }} | |
| aws-region: us-east-1 | |
| - name: Login to Amazon ECR | |
| uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 | |
| - name: Login to GHCR | |
| uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 | |
| with: | |
| registry: ${{ env.GHCR_REGISTRY }} | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Create and push ECR / GHCR manifests for this version | |
| run: | | |
| set -euo pipefail | |
| TAG_BASE="${{ github.sha }}-duckdb${{ matrix.duckdb.version }}" | |
| docker buildx imagetools create \ | |
| --tag ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE} \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-arm64 \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-amd64 | |
| docker buildx imagetools create \ | |
| --tag ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${TAG_BASE} \ | |
| ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${TAG_BASE}-arm64 \ | |
| ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${TAG_BASE}-amd64 | |
| - name: Tag default version as <sha> and latest (default rows only) | |
| if: matrix.duckdb.default | |
| run: | | |
| set -euo pipefail | |
| TAG_BASE="${{ github.sha }}-duckdb${{ matrix.duckdb.version }}" | |
| for tag in "${{ github.sha }}" "latest"; do | |
| docker buildx imagetools create \ | |
| --tag ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${tag} \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-arm64 \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-amd64 | |
| docker buildx imagetools create \ | |
| --tag ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${tag} \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-arm64 \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-amd64 | |
| done |