prefect/.github/workflows/benchmarks.yaml at main · PrefectHQ/prefect · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
name: Benchmarks

env:
  PY_COLORS: 1

on:
  pull_request:
    paths:
      - .github/workflows/benchmarks.yaml
      - .github/workflows/python-tests.yaml
      - benches/cli-bench.toml
      - "src/prefect/**/*.py"
      - pyproject.toml
      - uv.lock
      - Dockerfile
  push:
    branches:
      - main

permissions:
  contents: read

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:
  run-benchmarks:
    name: Benchmark
    runs-on: ubuntu-latest
    timeout-minutes: 20

    steps:
      - uses: actions/checkout@v6
        with:
          persist-credentials: false
          fetch-depth: 0
          filter: blob:none

      - name: Set up uv
        uses: astral-sh/setup-uv@v7
        with:
          python-version: "3.12"
          enable-cache: true
          cache-dependency-glob: "pyproject.toml"

      - name: Install the project
        run:  uv sync --group benchmark --compile-bytecode --locked

      - name: Prepare benchmark comparisons
        # Note: We use a "cache" instead of artifacts because artifacts are not available
        #       across workflow runs.
        id: bench-cache
        uses: actions/cache@v5
        with:
          path: ./.benchmarks
          # Pushes benchmark results for this branch and sha, this will always be a cache miss
          # and `restore-keys` will be used to get the benchmarks for comparison
          key: ${{ runner.os }}-${{ github.head_ref || 'main' }}-${{ github.sha }}
          # Pulls benchmark results for the base branch
          restore-keys: |
            ${{ runner.os }}-${{ github.base_ref }}-
            ${{ runner.os }}-main-

      - name: Start server
        run: |
          PREFECT_HOME=$(pwd) uv run prefect server start&
          PREFECT_API_URL="http://127.0.0.1:4200/api" uv run ./scripts/wait-for-server.py

          # TODO: Replace `wait-for-server` with dedicated command
          #       https://github.com/PrefectHQ/prefect/issues/6990

      - name: Run benchmarks
        env:
          HEAD_REF: ${{ github.head_ref }}
          GITHUB_SHA: ${{ github.sha }}
        # Includes comparison to previous benchmarks if available
        # Import benchmark is ignored because because we run those
        # benchmarks via CodSpeed
        run: |
          if [[ -z "$HEAD_REF" ]]; then
            # HEAD_REF is unset or empty, use 'main' with the SHA
            uniquename="main-$GITHUB_SHA"
          else
            # HEAD_REF is set, use the branch name directly
            uniquename="$HEAD_REF"
          fi

          # Allow alphanumeric, underscores, and dashes, and replace other
          # characters with an underscore
          sanitized_uniquename="${uniquename//[^a-zA-Z0-9_\-]/_}"

          PREFECT_API_URL="http://127.0.0.1:4200/api" \
          uv run python -m benches \
          --ignore=benches/bench_import.py \
          --timeout=180 \
          --benchmark-save="${sanitized_uniquename}" \
          ${{ steps.bench-cache.outputs.cache-hit && '--benchmark-compare' || '' }}

  detect-cli-benchmark-changes:
    name: Detect CLI benchmark scope
    if: ${{ github.event_name == 'pull_request' }}
    runs-on: ubuntu-latest
    outputs:
      should_run: ${{ steps.filter.outputs.should_run }}

    steps:
      - name: Detect relevant changes with path filters
        id: filter
        uses: dorny/paths-filter@v3
        with:
          filters: |
            should_run:
              - "src/prefect/cli/**"
              - "benches/cli-bench.toml"
              - "pyproject.toml"
              - "uv.lock"
              - "Dockerfile"
              - ".github/workflows/benchmarks.yaml"

  cli-benchmark-shards:
    name: CLI startup benchmark shard ${{ matrix.shard_index }}
    if: ${{ github.event_name == 'pull_request' && needs.detect-cli-benchmark-changes.outputs.should_run == 'true' }}
    needs: [detect-cli-benchmark-changes]
    runs-on: ubuntu-latest
    timeout-minutes: 20
    strategy:
      fail-fast: false
      matrix:
        shard_index: [0, 1, 2, 3]

    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          filter: blob:none

      - name: Set up uv
        uses: astral-sh/setup-uv@v7
        with:
          python-version: "3.12"
          enable-cache: true
          cache-dependency-glob: "pyproject.toml"

      - name: Install hyperfine
        run: |
          sudo apt-get update
          sudo apt-get install -y hyperfine

      - name: Extract cli-bench config from head
        id: full-config
        run: |
          git fetch origin ${{ github.event.pull_request.head.sha }}
          config_path="$RUNNER_TEMP/cli-bench.full.toml"
          git show ${{ github.event.pull_request.head.sha }}:benches/cli-bench.toml > "$config_path"
          echo "path=$config_path" >> "$GITHUB_OUTPUT"
          echo "Using benches/cli-bench.toml from ${{ github.event.pull_request.head.sha }}"
          cat "$config_path"

      - name: Build sharded benchmark config
        env:
          CLI_BENCH_SHARD_INDEX: ${{ matrix.shard_index }}
          CLI_BENCH_SHARD_TOTAL: "4"
          CLI_BENCH_CONFIG_SOURCE: ${{ steps.full-config.outputs.path }}
          CLI_BENCH_CONFIG_SHARD: ${{ runner.temp }}/cli-bench.shard.toml
        run: |
          uv run python - <<'PY'
          from __future__ import annotations

          import json
          import os
          import tomllib
          from pathlib import Path

          shard_index = int(os.environ["CLI_BENCH_SHARD_INDEX"])
          shard_total = int(os.environ["CLI_BENCH_SHARD_TOTAL"])
          source_path = Path(os.environ["CLI_BENCH_CONFIG_SOURCE"])
          output_path = Path(os.environ["CLI_BENCH_CONFIG_SHARD"])

          data = tomllib.loads(source_path.read_text())
          commands = data.get("commands", [])
          selected = [
              command
              for index, command in enumerate(commands)
              if index % shard_total == shard_index
          ]

          if not selected:
              raise SystemExit(
                  f"No commands selected for shard {shard_index}/{shard_total}"
              )

          lines: list[str] = [
              "# generated shard config",
              f"# source: {source_path}",
              f"# shard: {shard_index}/{shard_total}",
              "",
              "[project]",
          ]

          for key, value in data.get("project", {}).items():
              lines.append(f"{key} = {json.dumps(value)}")

          for command in selected:
              lines.append("")
              lines.append("[[commands]]")
              for key, value in command.items():
                  lines.append(f"{key} = {json.dumps(value)}")

          output_path.write_text("\n".join(lines) + "\n")
          print(
              f"Shard {shard_index}/{shard_total} selected {len(selected)} "
              f"commands out of {len(commands)}"
          )
          print(output_path.read_text())
          PY

      - name: Prepare worktrees
        run: |
          git fetch origin ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}
          git worktree add "$RUNNER_TEMP/base" ${{ github.event.pull_request.base.sha }}
          git worktree add "$RUNNER_TEMP/head" ${{ github.event.pull_request.head.sha }}

      - name: Install dependencies
        run: uv sync --group cli-bench --locked

      - name: Run base benchmarks
        run: |
          uv run --group cli-bench cli-bench \
            --config "$RUNNER_TEMP/cli-bench.shard.toml" \
            --project-root "$RUNNER_TEMP/base" \
            run \
            --runs 5 \
            --category startup \
            --output baseline.json

      - name: Run head benchmarks
        run: |
          uv run --group cli-bench cli-bench \
            --config "$RUNNER_TEMP/cli-bench.shard.toml" \
            --project-root "$RUNNER_TEMP/head" \
            run \
            --runs 5 \
            --category startup \
            --output comparison.json

      - name: Check shard for regressions
        run: |
          uv run --group cli-bench cli-bench compare \
            baseline.json comparison.json \
            --threshold 15 \
            --fail-on-regression \
            --summary-md cli-benchmark-compare.md \
            --digest-json cli-benchmark-digest.json

      - name: Upload benchmark shard
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: cli-benchmark-shard-${{ matrix.shard_index }}
          path: |
            baseline.json
            comparison.json
            cli-benchmark-compare.md
            cli-benchmark-digest.json

  cli-benchmarks:
    name: CLI startup benchmarks
    if: ${{ always() && github.event_name == 'pull_request' }}
    needs: [detect-cli-benchmark-changes, cli-benchmark-shards]
    runs-on: ubuntu-latest
    timeout-minutes: 5

    steps:
      - name: Validate shard outcomes
        run: |
          echo "CLI benchmark scope check: ${{ needs.detect-cli-benchmark-changes.outputs.should_run }}"
          if [[ "${{ needs.detect-cli-benchmark-changes.outputs.should_run }}" != "true" ]]; then
            echo "No CLI/dependency changes detected; skipping CLI startup benchmark shards."
            exit 0
          fi

          echo "Shard result: ${{ needs.cli-benchmark-shards.result }}"
          if [[ "${{ needs.cli-benchmark-shards.result }}" != "success" ]]; then
            echo "One or more CLI benchmark shards failed."
            exit 1
          fi
          echo "All CLI benchmark shards passed."