Skip to content

Deploy app (DuckDB) #19

Deploy app (DuckDB)

Deploy app (DuckDB) #19

Workflow file for this run

# App-only deploy for warehouse-duckdb. Parallel to deploy.yml on the SQLite
# track.
#
# Push the DuckDB image to Fly and roll it onto the current machine. The
# .duckdb files on the volume are untouched — the new container boots into
# whatever's already in /data. refresh-data-duckdb.yml handles data updates;
# the two share the warehouse-duckdb-deploy concurrency group so they
# serialize.
#
# Bootstrap (one-time, like the SQLite app): on a brand-new app this workflow
# has no machine to roll onto. Create the first machine manually via
# `flyctl deploy -c fly.duckdb.toml --remote-only` after `flyctl apps create
# warehouse-duckdb` and IP allocation. From then on this workflow rolls
# images onto whatever machine has role=current (or the first machine, on
# the cycle right after manual bootstrap before refresh has run).
name: Deploy app (DuckDB)
on:
push:
branches:
- main
# TEMPORARY: also fire on the feature branch so we can iterate on the
# image / serve-script before merging. Remove before merging.
- duckdb-parallel-build
workflow_dispatch:
concurrency:
group: warehouse-duckdb-deploy
cancel-in-progress: false
permissions:
contents: read
env:
FLY_APP: warehouse-duckdb
jobs:
deploy:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4
- name: Install flyctl
# Pinned to v1.6 commit SHA. `@master` would let an upstream
# compromise run with our FLY_API_TOKEN.
uses: superfly/flyctl-actions/setup-flyctl@ed8efb33836e8b2096c7fd3ba1c8afe303ebbff1 # v1.6
- name: Build image
id: build
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
# Same pattern as deploy.yml on the SQLite track: build+push only,
# then pin by manifest digest in `machine update`. Bypasses Fly's
# tag→digest cache and never reconciles fly.toml services (so it
# doesn't spawn sibling data-less machines).
set -o pipefail
TAG="build-$GITHUB_RUN_NUMBER"
# Resolve the current tip of duckdb-deploy and pass it as a build
# arg so the Dockerfile's pip install layer cache invalidates when
# datasette changes (the @branch URL is text-identical across
# deploys, so BuildKit reuses stale layers without this).
DATASETTE_SHA=$(git ls-remote https://github.com/fgregg/datasette duckdb-deploy | awk '{print $1}')
if [ -z "$DATASETTE_SHA" ]; then
echo "Could not resolve duckdb-deploy commit SHA" >&2
exit 1
fi
echo "Building against datasette duckdb-deploy @ $DATASETTE_SHA"
flyctl deploy --build-only --remote-only --app "$FLY_APP" \
--config fly.duckdb.toml \
--image-label "$TAG" \
--push \
--build-arg "GIT_SHA=$GITHUB_SHA" \
--build-arg "DATASETTE_REF=$DATASETTE_SHA" \
2>&1 | tee /tmp/build.log
DIGEST=$(grep -oE "pushing manifest for [^ ]*@sha256:[0-9a-f]+" /tmp/build.log \
| grep -oE "sha256:[0-9a-f]+" | tail -1)
if [ -z "$DIGEST" ]; then
echo "Could not extract manifest digest from build output." >&2
exit 1
fi
echo "Resolved $TAG -> $DIGEST"
# repo:tag@sha256:digest form is the documented workaround for
# flyctl's double-digest-append bug on `machine update`.
echo "image=registry.fly.io/$FLY_APP:$TAG@$DIGEST" >> "$GITHUB_OUTPUT"
- name: Roll image onto the current machine
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
# Prefer role=current; fall back to the first machine for the
# bootstrap window (right after manual creation, before refresh
# has promoted anything to role=current).
MID=$(flyctl machine list --app "$FLY_APP" --json | jq -r '
[.[] | select(.config.metadata.role == "current")] | first | .id
')
if [ -z "$MID" ] || [ "$MID" = "null" ]; then
MID=$(flyctl machine list --app "$FLY_APP" --json | jq -r '.[0].id // empty')
fi
if [ -z "$MID" ] || [ "$MID" = "null" ]; then
echo "No machine exists on $FLY_APP — bootstrap one manually first." >&2
exit 1
fi
echo "Rolling image onto $MID"
# Fly's registry can 404 the manifest for seconds-to-minutes after
# `flyctl deploy --push` completes (push reports success before the
# manifest is globally readable). Retry up to ~5 min.
for i in $(seq 1 20); do
if flyctl machine update "$MID" --app "$FLY_APP" --yes \
--image "${{ steps.build.outputs.image }}"; then
echo "MID=$MID" >> $GITHUB_ENV
exit 0
fi
echo "retry $i: machine update failed, sleeping 15s..." >&2
sleep 15
done
echo "machine update kept failing for ~5 min" >&2
exit 1
- name: Verify running rootfs matches commit
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
# Read /etc/build-sha from the running container (baked in by the
# Dockerfile's ARG GIT_SHA). If it doesn't match GITHUB_SHA, Fly
# handed us a different rootfs than we asked for — fail loudly.
for i in 1 2 3 4 5; do
RUNNING_SHA=$(flyctl ssh console --app "$FLY_APP" \
-C "cat /etc/build-sha" 2>/dev/null \
| tr -d '\r\n' | grep -Eo '[0-9a-f]{40}' | head -1) || RUNNING_SHA=""
if [ -n "$RUNNING_SHA" ]; then
break
fi
echo "retry $i: ssh/build-sha read failed, sleeping..." >&2
sleep $((i * 5))
done
if [ "$RUNNING_SHA" != "$GITHUB_SHA" ]; then
echo "::error::Running rootfs SHA ($RUNNING_SHA) does not match GITHUB_SHA ($GITHUB_SHA)."
exit 1
fi
echo "Verified: running rootfs SHA = $RUNNING_SHA"
- name: Wait for datasette to bind :8080
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
# machine update restarts the container. Verify-rootfs above
# used hallpass (independent of datasette), so the workflow could
# otherwise complete while datasette is still starting (~2 min over
# 13 dbs on shared-cpu-1x), and visitors get 502s until it binds.
flyctl ssh console --app "$FLY_APP" -C "rm -f /tmp/wait-for-datasette.sh"
echo "put scripts/wait-for-datasette.sh /tmp/wait-for-datasette.sh" \
| flyctl ssh sftp shell --app "$FLY_APP"
flyctl ssh console --app "$FLY_APP" -C "sh /tmp/wait-for-datasette.sh"