warehouse/.github/workflows/deploy-duckdb.yml at e1f4d3eb9e43488b66c6b536321a2965fabf5d5d · labordata/warehouse · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# App-only deploy for warehouse-duckdb. Parallel to deploy.yml on the SQLite
# track.
#
# Push the DuckDB image to Fly and roll it onto the current machine. The
# .duckdb files on the volume are untouched — the new container boots into
# whatever's already in /data. refresh-data-duckdb.yml handles data updates;
# the two share the warehouse-duckdb-deploy concurrency group so they
# serialize.
#
# Bootstrap (one-time, like the SQLite app): on a brand-new app this workflow
# has no machine to roll onto. Create the first machine manually via
# `flyctl deploy -c fly.duckdb.toml --remote-only` after `flyctl apps create
# warehouse-duckdb` and IP allocation. From then on this workflow rolls
# images onto whatever machine has role=current (or the first machine, on
# the cycle right after manual bootstrap before refresh has run).

name: Deploy app (DuckDB)

on:
  push:
    branches:
      - main
      # TEMPORARY: also fire on the feature branch so we can iterate on the
      # image / serve-script before merging. Remove before merging.
      - duckdb-parallel-build
  workflow_dispatch:

concurrency:
  group: warehouse-duckdb-deploy
  cancel-in-progress: false

permissions:
  contents: read

env:
  FLY_APP: warehouse-duckdb

jobs:
  deploy:
    runs-on: ubuntu-latest
    timeout-minutes: 20
    steps:
      - uses: actions/checkout@v4

      - name: Install flyctl
        # Pinned to v1.6 commit SHA. `@master` would let an upstream
        # compromise run with our FLY_API_TOKEN.
        uses: superfly/flyctl-actions/setup-flyctl@ed8efb33836e8b2096c7fd3ba1c8afe303ebbff1  # v1.6

      - name: Build image
        id: build
        env:
          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
        run: |
          # Same pattern as deploy.yml on the SQLite track: build+push only,
          # then pin by manifest digest in `machine update`. Bypasses Fly's
          # tag→digest cache and never reconciles fly.toml services (so it
          # doesn't spawn sibling data-less machines).
          set -o pipefail
          TAG="build-$GITHUB_RUN_NUMBER"
          # Resolve the current tip of duckdb-deploy and pass it as a build
          # arg so the Dockerfile's pip install layer cache invalidates when
          # datasette changes (the @branch URL is text-identical across
          # deploys, so BuildKit reuses stale layers without this).
          DATASETTE_SHA=$(git ls-remote https://github.com/fgregg/datasette duckdb-deploy | awk '{print $1}')
          if [ -z "$DATASETTE_SHA" ]; then
            echo "Could not resolve duckdb-deploy commit SHA" >&2
            exit 1
          fi
          echo "Building against datasette duckdb-deploy @ $DATASETTE_SHA"
          flyctl deploy --build-only --remote-only --app "$FLY_APP" \
            --config fly.duckdb.toml \
            --image-label "$TAG" \
            --push \
            --build-arg "GIT_SHA=$GITHUB_SHA" \
            --build-arg "DATASETTE_REF=$DATASETTE_SHA" \
            2>&1 | tee /tmp/build.log
          DIGEST=$(grep -oE "pushing manifest for [^ ]*@sha256:[0-9a-f]+" /tmp/build.log \
                   | grep -oE "sha256:[0-9a-f]+" | tail -1)
          if [ -z "$DIGEST" ]; then
            echo "Could not extract manifest digest from build output." >&2
            exit 1
          fi
          echo "Resolved $TAG -> $DIGEST"
          # repo:tag@sha256:digest form is the documented workaround for
          # flyctl's double-digest-append bug on `machine update`.
          echo "image=registry.fly.io/$FLY_APP:$TAG@$DIGEST" >> "$GITHUB_OUTPUT"

      - name: Roll image onto the current machine
        env:
          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
        run: |
          # Prefer role=current; fall back to the first machine for the
          # bootstrap window (right after manual creation, before refresh
          # has promoted anything to role=current).
          MID=$(flyctl machine list --app "$FLY_APP" --json | jq -r '
            [.[] | select(.config.metadata.role == "current")] | first | .id
          ')
          if [ -z "$MID" ] || [ "$MID" = "null" ]; then
            MID=$(flyctl machine list --app "$FLY_APP" --json | jq -r '.[0].id // empty')
          fi
          if [ -z "$MID" ] || [ "$MID" = "null" ]; then
            echo "No machine exists on $FLY_APP — bootstrap one manually first." >&2
            exit 1
          fi
          echo "Rolling image onto $MID"
          # Fly's registry can 404 the manifest for seconds-to-minutes after
          # `flyctl deploy --push` completes (push reports success before the
          # manifest is globally readable). Retry up to ~5 min.
          for i in $(seq 1 20); do
            if flyctl machine update "$MID" --app "$FLY_APP" --yes \
                 --image "${{ steps.build.outputs.image }}"; then
              echo "MID=$MID" >> $GITHUB_ENV
              exit 0
            fi
            echo "retry $i: machine update failed, sleeping 15s..." >&2
            sleep 15
          done
          echo "machine update kept failing for ~5 min" >&2
          exit 1

      - name: Verify running rootfs matches commit
        env:
          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
        run: |
          # Read /etc/build-sha from the running container (baked in by the
          # Dockerfile's ARG GIT_SHA). If it doesn't match GITHUB_SHA, Fly
          # handed us a different rootfs than we asked for — fail loudly.
          for i in 1 2 3 4 5; do
            RUNNING_SHA=$(flyctl ssh console --app "$FLY_APP" \
              -C "cat /etc/build-sha" 2>/dev/null \
              | tr -d '\r\n' | grep -Eo '[0-9a-f]{40}' | head -1) || RUNNING_SHA=""
            if [ -n "$RUNNING_SHA" ]; then
              break
            fi
            echo "retry $i: ssh/build-sha read failed, sleeping..." >&2
            sleep $((i * 5))
          done
          if [ "$RUNNING_SHA" != "$GITHUB_SHA" ]; then
            echo "::error::Running rootfs SHA ($RUNNING_SHA) does not match GITHUB_SHA ($GITHUB_SHA)."
            exit 1
          fi
          echo "Verified: running rootfs SHA = $RUNNING_SHA"

      - name: Wait for datasette to bind :8080
        env:
          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
        run: |
          # machine update restarts the container. Verify-rootfs above
          # used hallpass (independent of datasette), so the workflow could
          # otherwise complete while datasette is still starting (~2 min over
          # 13 dbs on shared-cpu-1x), and visitors get 502s until it binds.
          flyctl ssh console --app "$FLY_APP" -C "rm -f /tmp/wait-for-datasette.sh"
          echo "put scripts/wait-for-datasette.sh /tmp/wait-for-datasette.sh" \
            | flyctl ssh sftp shell --app "$FLY_APP"
          flyctl ssh console --app "$FLY_APP" -C "sh /tmp/wait-for-datasette.sh"