fly-deploy: clear /data/previous before decompress #17
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Fly.io deploy pipeline — parallel to the Cloud Run pipeline in build.yml. | |
| # | |
| # Order matters: data goes on the volume *before* the image redeploys. That | |
| # way the new container boots with both the new .db files and the new | |
| # inspect-data.json already in /data, and datasette starts up with | |
| # --inspect-file pointing at a real, current file. If we deployed the image | |
| # first, the post-deploy health check would hit cold mmap with no inspect | |
| # data — and /-/databases.json would never return in time. | |
| # | |
| # Runs on every push to fly-volumes, on schedule, and on manual dispatch. | |
| # Once we're confident, we'll merge to main and retire build.yml. | |
| name: Fly Deploy | |
| on: | |
| push: | |
| branches: | |
| - fly-volumes | |
| schedule: | |
| - cron: "0 7 * * *" # daily, after upstream data sources update | |
| workflow_dispatch: | |
| env: | |
| FLY_APP: warehouse | |
| jobs: | |
| deploy: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Free disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \ | |
| /opt/hostedtoolcache/CodeQL || true | |
| df -h | |
| - name: Install build dependencies | |
| run: | | |
| pip install -r requirements.txt | |
| pip install labor-union-parser | |
| - name: Build all databases | |
| run: make | |
| - name: Build inspect-data.json | |
| run: | | |
| # Pre-compute per-table row counts + schema hashes so datasette | |
| # doesn't have to do it at startup on every machine restart. | |
| # `datasette inspect` writes JSON to stdout. | |
| datasette inspect *.db > inspect-data.json | |
| ls -lh inspect-data.json | |
| - name: Compress databases for transfer | |
| run: | | |
| # SFTP through `flyctl` tops out at ~5 MB/s and does not compress | |
| # on the wire. Gzipping ~10 GB of .db files down to ~3-4 GB roughly | |
| # halves the upload time. pigz is preinstalled on GH ubuntu | |
| # runners; fall back to gzip if it isn't. | |
| if command -v pigz >/dev/null; then | |
| pigz -f *.db | |
| else | |
| gzip -f *.db | |
| fi | |
| ls -lh *.db.gz | |
| - name: Install flyctl | |
| uses: superfly/flyctl-actions/setup-flyctl@master | |
| # DATA path — upload first, while the old image is still serving. | |
| - name: Ensure machine is running before SFTP | |
| env: | |
| FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} | |
| run: | | |
| # Idempotent: starts the machine if it's stopped, no-op otherwise. | |
| flyctl machine list --app "$FLY_APP" --json \ | |
| | jq -r '.[].id' \ | |
| | xargs -I{} flyctl machine start {} --app "$FLY_APP" || true | |
| sleep 10 | |
| - name: Upload databases to Fly volume | |
| env: | |
| FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} | |
| run: | | |
| # `flyctl ssh console -C` does not interpret shell operators — it | |
| # passes the rest of the line as argv to the first binary. Wrap | |
| # multi-step commands in `sh -c`. | |
| flyctl ssh console --app "$FLY_APP" -C \ | |
| "sh -c 'rm -rf /data/incoming && mkdir /data/incoming'" | |
| ( | |
| for f in *.db.gz; do | |
| echo "put $f /data/incoming/$f" | |
| done | |
| echo "put inspect-data.json /data/incoming/inspect-data.json" | |
| ) | flyctl ssh sftp shell --app "$FLY_APP" | |
| - name: Decompress databases on the volume | |
| env: | |
| FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} | |
| run: | | |
| # Free up /data/previous (from the last successful swap) before | |
| # decompressing. Otherwise peak usage is live (10 GB) + previous | |
| # (10 GB) + decompressed incoming (10 GB) = 30 GB, which overflows | |
| # the 25 GB volume. swap-data.sh also clears previous, but that | |
| # runs *after* this step — too late. | |
| # | |
| # gunzip removes the .gz when it succeeds, so /data/incoming ends | |
| # up with the same shape swap-data.sh expects (plain .db files + | |
| # inspect-data.json). | |
| flyctl ssh console --app "$FLY_APP" -C \ | |
| "sh -c 'rm -f /data/previous/*.db /data/previous/inspect-data.json && gunzip /data/incoming/*.db.gz && ls -la /data/incoming'" | |
| - name: Swap databases atomically | |
| env: | |
| FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} | |
| run: | | |
| # Moves /data/incoming/*.db + inspect-data.json into /data/. | |
| # Old container keeps serving from FDs open on the now-unlinked | |
| # /data/previous/*.db until the next step restarts it. | |
| flyctl ssh console --app "$FLY_APP" -C '/app/scripts/swap-data.sh' | |
| # CODE path — image redeploy, which also restarts every machine and | |
| # picks up the freshly-swapped /data + inspect-data.json. | |
| - name: Deploy app to Fly | |
| env: | |
| FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} | |
| run: | | |
| flyctl deploy --remote-only --app "$FLY_APP" \ | |
| --image-label "build-$GITHUB_RUN_NUMBER" | |
| - name: Purge Cloudflare cache | |
| env: | |
| CF_ZONE_ID: ${{ secrets.CF_ZONE_ID }} | |
| CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }} | |
| run: | | |
| response=$(curl -fsS -X POST \ | |
| "https://api.cloudflare.com/client/v4/zones/${CF_ZONE_ID}/purge_cache" \ | |
| -H "Authorization: Bearer ${CF_API_TOKEN}" \ | |
| -H "Content-Type: application/json" \ | |
| --data '{"purge_everything":true}') | |
| echo "$response" | |
| echo "$response" | grep -q '"success":true' || { echo "Purge failed"; exit 1; } |