fix(ducklake): retry the first-attach schema-init race instead of sur… #230
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Container Image Worker CD (per DuckDB version) | |
| on: | |
| push: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| env: | |
| ECR_REGISTRY: 795637471508.dkr.ecr.us-east-1.amazonaws.com | |
| GHCR_REGISTRY: ghcr.io | |
| IMAGE_NAME: duckgres-worker | |
| # Per-DuckDB-version matrix build for cmd/duckgres-worker. | |
| # | |
| # Each row produces one image (or multi-arch manifest) tagged | |
| # duckgres-worker:<sha>-duckdb<version>. The "default" row is unsuffixed | |
| # and triggers the Charts dispatch (kept stable so the existing duckgres | |
| # release continues to roll out as before). Non-default rows publish | |
| # their suffixed images and stop there — operators flip a tenant's | |
| # `image` config-store column to point at a specific suffixed tag to | |
| # canary that DuckDB version for that tenant. | |
| # | |
| # To add a DuckDB version, add a row under matrix.duckdb. The | |
| # DUCKDB_GO_VERSION / DUCKDB_BINDINGS_VERSION pair maps to the | |
| # duckdb-go module versions; the encoding is `v0.<major><minor:02d><patch:02d>.0`, | |
| # so DuckDB 1.5.2 → v0.10502.0 / v2.10502.0 and 1.5.3 → v0.10503.0 / | |
| # v2.10503.0. See scripts/ducklake_version_matrix.sh for the same | |
| # mapping in test code. | |
| # | |
| # Each row must declare every field below — Dockerfile.worker asserts the | |
| # build-args are non-empty (`:?must be set`) so a forgotten key produces a | |
| # loud build failure rather than a silent fallback to the ARG default. | |
| # Exactly one row must set `default: true`; the validate-matrix job | |
| # enforces this invariant against BOTH the build and manifest matrices. | |
| jobs: | |
| validate-matrix: | |
| name: Validate matrix invariants | |
| if: github.repository == 'PostHog/duckgres' | |
| runs-on: ubuntu-24.04 | |
| steps: | |
| - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 | |
| - name: Exactly one default:true row in build + manifest matrices | |
| run: | | |
| set -euo pipefail | |
| WF=.github/workflows/container-image-worker-cd.yml | |
| for path in '.jobs.build.strategy.matrix.duckdb' '.jobs.manifest.strategy.matrix.duckdb'; do | |
| n=$(yq "$path | [.[] | select(.default == true)] | length" "$WF") | |
| if [ "$n" != "1" ]; then | |
| echo "ERROR: $path has $n rows with default:true (expected exactly 1)" >&2 | |
| yq "$path" "$WF" >&2 | |
| exit 1 | |
| fi | |
| echo "✓ $path has exactly 1 default:true row" | |
| done | |
| build: | |
| name: Build worker ${{ matrix.duckdb.version }} ${{ matrix.platform.platform }} | |
| needs: validate-matrix | |
| if: github.repository == 'PostHog/duckgres' | |
| # Don't let a flaky legacy/fallback row block the default version's | |
| # multi-arch manifest publish. Default-row failures still fail the | |
| # workflow (the unsuffixed `:<sha>`/`:latest` tags that downstream | |
| # Charts dispatch picks up must come from a green build). | |
| continue-on-error: ${{ !matrix.duckdb.default }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| duckdb: | |
| - version: "1.5.3" | |
| go: "v2.10503.0" | |
| bindings: "v0.10503.0" | |
| httpfs: "v1.5.3-stoi-fix" | |
| ducklake: "v1.0-posthog.4" | |
| # Stable repo hosts postgres_scanner for 1.5.3; nightly | |
| # does not. See scripts/ducklake_version_matrix.sh | |
| # commit history for the rationale on each row. | |
| pg_scanner_repo: "https://extensions.duckdb.org" | |
| default: true | |
| - version: "1.5.2" | |
| go: "v2.10502.0" | |
| bindings: "v0.10502.0" | |
| httpfs: "v1.5.2-stoi-fix" | |
| ducklake: "v1.0-posthog.2" | |
| # Nightly repo: preserves byte-identity with the | |
| # previously-published 1.5.2 worker image, which | |
| # bundled the nightly build for the DuckLake | |
| # metadata-pool reaper fix (PR #447). Stable v1.5.2 | |
| # postgres_scanner exists too but the binary differs; | |
| # keep this row aligned with what was last shipped so | |
| # it remains a true rollback target. | |
| pg_scanner_repo: "http://nightly-extensions.duckdb.org" | |
| default: false | |
| platform: | |
| - platform: linux/arm64 | |
| runner: ubuntu-24.04-arm | |
| slug: arm64 | |
| - platform: linux/amd64 | |
| runner: ubuntu-24.04 | |
| slug: amd64 | |
| runs-on: ${{ matrix.platform.runner }} | |
| permissions: | |
| id-token: write | |
| contents: read | |
| packages: write | |
| steps: | |
| - name: Check out | |
| uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }} | |
| aws-region: us-east-1 | |
| - name: Login to Amazon ECR | |
| uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 | |
| - name: Login to GHCR | |
| uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 | |
| with: | |
| registry: ${{ env.GHCR_REGISTRY }} | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Build and push by digest | |
| uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 | |
| with: | |
| context: . | |
| file: Dockerfile.worker | |
| push: true | |
| platforms: ${{ matrix.platform.platform }} | |
| tags: | | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}-duckdb${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${{ github.sha }}-duckdb${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| build-args: | | |
| VERSION=build-${{ github.sha }} | |
| COMMIT=${{ github.sha }} | |
| BUILD_TAGS=kubernetes | |
| DUCKDB_GO_VERSION=${{ matrix.duckdb.go }} | |
| DUCKDB_BINDINGS_VERSION=${{ matrix.duckdb.bindings }} | |
| DUCKDB_EXTENSION_VERSION=${{ matrix.duckdb.version }} | |
| HTTPFS_EXTENSION_TAG=${{ matrix.duckdb.httpfs }} | |
| DUCKLAKE_EXTENSION_TAG=${{ matrix.duckdb.ducklake }} | |
| POSTGRES_SCANNER_REPOSITORY=${{ matrix.duckdb.pg_scanner_repo }} | |
| cache-from: type=gha,scope=worker-${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| cache-to: type=gha,mode=max,scope=worker-${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| # Smoke test the freshly-pushed image. We pull from GHCR (cheaper | |
| # than ECR) and run the binary on the runner's native arch, so no | |
| # qemu is needed. Two assertions: | |
| # 1. `--version` exits 0 and prints the expected build identity. | |
| # Catches stub-binary regressions like the pre-#521 exit-1 | |
| # stub that shipped to ECR for weeks before being noticed. | |
| # 2. The binary boots with the same arg shape the K8s pool | |
| # hardcodes (`--mode duckdb-service --duckdb-listen :8816`) | |
| # and reaches the "Starting DuckDB service" log line within | |
| # 30s. Catches flag.Parse regressions like the missing | |
| # `--mode` flag fixed in #522, and any boot-time linkage | |
| # failure that only manifests at runtime. | |
| # If smoke fails for any matrix cell, the dependent `manifest` | |
| # job is skipped (default `needs:` behavior), so the unsuffixed | |
| # multi-arch tag is never produced and downstream Charts dispatch | |
| # never picks up a broken image. | |
| - name: Smoke test pushed image | |
| env: | |
| IMAGE: ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${{ github.sha }}-duckdb${{ matrix.duckdb.version }}-${{ matrix.platform.slug }} | |
| EXPECTED_VERSION: build-${{ github.sha }} | |
| run: | | |
| set -euo pipefail | |
| docker pull "$IMAGE" | |
| echo "::group::--version" | |
| out=$(docker run --rm "$IMAGE" --version) | |
| echo "$out" | |
| if ! grep -qF "duckgres version $EXPECTED_VERSION" <<<"$out"; then | |
| echo "✗ --version output did not include 'duckgres version $EXPECTED_VERSION'" | |
| exit 1 | |
| fi | |
| echo "✓ --version OK" | |
| echo "::endgroup::" | |
| # Worker's TCP listen path requires TLS — duckdbservice.Serve | |
| # always loads certs from cfg.TLSCertFile/KeyFile (default | |
| # ./certs/server.crt + .key) when listener.Network()=="tcp". | |
| # In prod the K8s pool mounts these via a Secret. For smoke | |
| # we generate an ephemeral self-signed pair and bind-mount | |
| # it; DUCKGRES_CERT / DUCKGRES_KEY env feed configresolve. | |
| # Without this, the binary boots far enough to log | |
| # "Starting DuckDB service" then dies in Serve(), which | |
| # the prior version of this step false-passed under (see | |
| # PR #528 follow-up). | |
| echo "::group::generate ephemeral TLS pair" | |
| CERT_DIR="$(mktemp -d)" | |
| # `-nodes` (skip private key encryption) is required: Go's | |
| # tls.LoadX509KeyPair expects an unencrypted PEM. The cert | |
| # lives in CI for ~30s, is never published, never reused, | |
| # and protects nothing real — so the unencrypted key is | |
| # the desired property here, not a vulnerability. | |
| # nosemgrep: trailofbits.generic.openssl-insecure-flags.openssl-insecure-flags | |
| openssl req -x509 -newkey rsa:2048 -nodes \ | |
| -keyout "$CERT_DIR/server.key" \ | |
| -out "$CERT_DIR/server.crt" \ | |
| -days 1 -subj '/CN=worker-smoke' >/dev/null 2>&1 | |
| # mktemp -d defaults to 0700, which the container's | |
| # non-root duckgres UID can't traverse via the bind | |
| # mount → "permission denied" loading the cert. 0755 on | |
| # the dir + 0644 on the files lets any UID read. | |
| chmod 755 "$CERT_DIR" | |
| chmod 644 "$CERT_DIR"/server.crt "$CERT_DIR"/server.key | |
| echo "::endgroup::" | |
| echo "::group::boot smoke" | |
| docker run -d --name worker-smoke \ | |
| -v "$CERT_DIR:/etc/worker-smoke-tls:ro" \ | |
| -e DUCKGRES_CERT=/etc/worker-smoke-tls/server.crt \ | |
| -e DUCKGRES_KEY=/etc/worker-smoke-tls/server.key \ | |
| "$IMAGE" \ | |
| --mode duckdb-service \ | |
| --duckdb-listen :8816 | |
| trap 'docker rm -f worker-smoke >/dev/null 2>&1 || true; rm -rf "$CERT_DIR"' EXIT | |
| # Three exit paths: ok, container-exited, 30s timeout. | |
| # The level=ERROR substring check defends against the race | |
| # where the binary logs "Starting DuckDB service" and then | |
| # crashes inside Serve() before docker ps notices — the | |
| # previous version of this step false-passed under that | |
| # exact pattern. | |
| status=fail | |
| for i in $(seq 1 30); do | |
| logs=$(docker logs worker-smoke 2>&1) | |
| if grep -q "level=ERROR" <<<"$logs"; then | |
| echo "✗ worker logged level=ERROR before reaching ready state:" | |
| tail -80 <<<"$logs" | |
| break | |
| fi | |
| if ! docker ps --format '{{.Names}}' | grep -qx worker-smoke; then | |
| echo "✗ worker-smoke exited before listening:" | |
| tail -80 <<<"$logs" | |
| break | |
| fi | |
| if grep -q "Starting DuckDB service" <<<"$logs"; then | |
| echo "✓ worker reached 'Starting DuckDB service' after ${i}s" | |
| tail -20 <<<"$logs" | |
| status=ok | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| if [ "$status" != "ok" ] && [ "$i" = "30" ]; then | |
| echo "✗ worker did not log 'Starting DuckDB service' within 30s" | |
| docker logs worker-smoke 2>&1 | tail -80 | |
| fi | |
| echo "::endgroup::" | |
| [ "$status" = "ok" ] | |
| manifest: | |
| name: Multi-arch manifest worker ${{ matrix.duckdb.version }} | |
| needs: build | |
| if: github.repository == 'PostHog/duckgres' | |
| # Mirror the build job's tolerance: if the 1.5.2 manifest fails | |
| # (e.g. because that row's build was continue-on-errored), the | |
| # default 1.5.3 manifest still publishes and Charts dispatch | |
| # proceeds. | |
| continue-on-error: ${{ !matrix.duckdb.default }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| duckdb: | |
| - version: "1.5.3" | |
| default: true | |
| - version: "1.5.2" | |
| default: false | |
| runs-on: ubuntu-24.04 | |
| permissions: | |
| id-token: write | |
| contents: read | |
| packages: write | |
| steps: | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_ECR_PUBLISH_IAM_ROLE }} | |
| aws-region: us-east-1 | |
| - name: Login to Amazon ECR | |
| uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 | |
| - name: Login to GHCR | |
| uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 | |
| with: | |
| registry: ${{ env.GHCR_REGISTRY }} | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Create and push ECR / GHCR manifests for this version | |
| run: | | |
| set -euo pipefail | |
| TAG_BASE="${{ github.sha }}-duckdb${{ matrix.duckdb.version }}" | |
| docker buildx imagetools create \ | |
| --tag ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE} \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-arm64 \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-amd64 | |
| docker buildx imagetools create \ | |
| --tag ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${TAG_BASE} \ | |
| ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${TAG_BASE}-arm64 \ | |
| ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${TAG_BASE}-amd64 | |
| - name: Tag default version as <sha> and latest (default rows only) | |
| if: matrix.duckdb.default | |
| run: | | |
| set -euo pipefail | |
| TAG_BASE="${{ github.sha }}-duckdb${{ matrix.duckdb.version }}" | |
| for tag in "${{ github.sha }}" "latest"; do | |
| docker buildx imagetools create \ | |
| --tag ${{ env.GHCR_REGISTRY }}/posthog/${{ env.IMAGE_NAME }}:${tag} \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-arm64 \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-amd64 | |
| docker buildx imagetools create \ | |
| --tag ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${tag} \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-arm64 \ | |
| ${{ env.ECR_REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG_BASE}-amd64 | |
| done |