Skip to content

fly-deploy: clear /data/previous before decompress #17

fly-deploy: clear /data/previous before decompress

fly-deploy: clear /data/previous before decompress #17

Workflow file for this run

# Fly.io deploy pipeline — parallel to the Cloud Run pipeline in build.yml.
#
# Order matters: data goes on the volume *before* the image redeploys. That
# way the new container boots with both the new .db files and the new
# inspect-data.json already in /data, and datasette starts up with
# --inspect-file pointing at a real, current file. If we deployed the image
# first, the post-deploy health check would hit cold mmap with no inspect
# data — and /-/databases.json would never return in time.
#
# Runs on every push to fly-volumes, on schedule, and on manual dispatch.
# Once we're confident, we'll merge to main and retire build.yml.
name: Fly Deploy
on:
push:
branches:
- fly-volumes
schedule:
- cron: "0 7 * * *" # daily, after upstream data sources update
workflow_dispatch:
env:
FLY_APP: warehouse
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Free disk space
run: |
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \
/opt/hostedtoolcache/CodeQL || true
df -h
- name: Install build dependencies
run: |
pip install -r requirements.txt
pip install labor-union-parser
- name: Build all databases
run: make
- name: Build inspect-data.json
run: |
# Pre-compute per-table row counts + schema hashes so datasette
# doesn't have to do it at startup on every machine restart.
# `datasette inspect` writes JSON to stdout.
datasette inspect *.db > inspect-data.json
ls -lh inspect-data.json
- name: Compress databases for transfer
run: |
# SFTP through `flyctl` tops out at ~5 MB/s and does not compress
# on the wire. Gzipping ~10 GB of .db files down to ~3-4 GB roughly
# halves the upload time. pigz is preinstalled on GH ubuntu
# runners; fall back to gzip if it isn't.
if command -v pigz >/dev/null; then
pigz -f *.db
else
gzip -f *.db
fi
ls -lh *.db.gz
- name: Install flyctl
uses: superfly/flyctl-actions/setup-flyctl@master
# DATA path — upload first, while the old image is still serving.
- name: Ensure machine is running before SFTP
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
# Idempotent: starts the machine if it's stopped, no-op otherwise.
flyctl machine list --app "$FLY_APP" --json \
| jq -r '.[].id' \
| xargs -I{} flyctl machine start {} --app "$FLY_APP" || true
sleep 10
- name: Upload databases to Fly volume
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
# `flyctl ssh console -C` does not interpret shell operators — it
# passes the rest of the line as argv to the first binary. Wrap
# multi-step commands in `sh -c`.
flyctl ssh console --app "$FLY_APP" -C \
"sh -c 'rm -rf /data/incoming && mkdir /data/incoming'"
(
for f in *.db.gz; do
echo "put $f /data/incoming/$f"
done
echo "put inspect-data.json /data/incoming/inspect-data.json"
) | flyctl ssh sftp shell --app "$FLY_APP"
- name: Decompress databases on the volume
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
# Free up /data/previous (from the last successful swap) before
# decompressing. Otherwise peak usage is live (10 GB) + previous
# (10 GB) + decompressed incoming (10 GB) = 30 GB, which overflows
# the 25 GB volume. swap-data.sh also clears previous, but that
# runs *after* this step — too late.
#
# gunzip removes the .gz when it succeeds, so /data/incoming ends
# up with the same shape swap-data.sh expects (plain .db files +
# inspect-data.json).
flyctl ssh console --app "$FLY_APP" -C \
"sh -c 'rm -f /data/previous/*.db /data/previous/inspect-data.json && gunzip /data/incoming/*.db.gz && ls -la /data/incoming'"
- name: Swap databases atomically
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
# Moves /data/incoming/*.db + inspect-data.json into /data/.
# Old container keeps serving from FDs open on the now-unlinked
# /data/previous/*.db until the next step restarts it.
flyctl ssh console --app "$FLY_APP" -C '/app/scripts/swap-data.sh'
# CODE path — image redeploy, which also restarts every machine and
# picks up the freshly-swapped /data + inspect-data.json.
- name: Deploy app to Fly
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
flyctl deploy --remote-only --app "$FLY_APP" \
--image-label "build-$GITHUB_RUN_NUMBER"
- name: Purge Cloudflare cache
env:
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID }}
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
run: |
response=$(curl -fsS -X POST \
"https://api.cloudflare.com/client/v4/zones/${CF_ZONE_ID}/purge_cache" \
-H "Authorization: Bearer ${CF_API_TOKEN}" \
-H "Content-Type: application/json" \
--data '{"purge_everything":true}')
echo "$response"
echo "$response" | grep -q '"success":true' || { echo "Purge failed"; exit 1; }