-
Notifications
You must be signed in to change notification settings - Fork 428
148 lines (131 loc) · 5.05 KB
/
Copy pathdisk-benchmarks.yml
File metadata and controls
148 lines (131 loc) · 5.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
# DiskANN Benchmarks Workflow
#
# This workflow runs macro benchmarks comparing the current branch against a baseline.
# It is manually triggered and requires a baseline reference (branch, tag, or commit).
name: Disk Benchmarks
on:
workflow_dispatch:
inputs:
baseline_ref:
description: 'A branch, commit SHA, or tag name to compare the current branch with'
required: true
default: 'main'
type: string
pull_request:
branches:
- main
paths:
- 'diskann/**'
- 'diskann-disk/**'
- 'diskann-linalg/**'
- 'diskann-providers/**'
- 'diskann-quantization/**'
- 'diskann-vector/**'
- 'diskann-wide/**'
- 'diskann-utils/**'
- 'diskann-platform/**'
- 'diskann-label-filter/**'
- 'diskann-benchmark/**'
- 'diskann-benchmark-runner/**'
- '.github/workflows/disk-benchmarks.yml'
# Cancel in-progress runs when a new run is triggered
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
PERF_INPUTS: diskann-benchmark/perf_test_inputs
defaults:
run:
shell: bash
permissions:
contents: read
jobs:
# Macro benchmark: compare current branch against baseline
macro-benchmark:
name: Macro Benchmark - ${{ matrix.dataset }}
runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest, "JobId=macro-benchmark-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}-${{ strategy.job-index }}" ]
# TODO: For production benchmarks, consider using a self-hosted runner with:
# - NVMe storage for consistent I/O performance
# - CPU pinning (taskset) for reduced variance
# - Dedicated hardware to avoid noisy neighbor effects
timeout-minutes: 120
strategy:
fail-fast: false
matrix:
include:
- dataset: wikipedia-100K
config: wikipedia-100K-disk-index.json
archive: wikipedia-100K.tar.gz
data_dir: wikipedia_cohere
- dataset: openai-100K
config: openai-100K-disk-index.json
archive: openai-100K.tar.gz
data_dir: OpenAIArXiv
steps:
# Kept inline because this must run before checkout, but local action.yml
# files are only available after checkout.
- name: Mount high-speed NVMe SSD
shell: bash
run: |
sudo mkdir -p /mnt/nvme
sudo lsblk
sudo mkfs.ext4 /dev/nvme0n1
sudo mount /dev/nvme0n1 /mnt/nvme
sudo chmod 777 /mnt/nvme
mkdir -p /mnt/nvme/diskann_rust /mnt/nvme/baseline
ln -s /mnt/nvme/diskann_rust diskann_rust
ln -s /mnt/nvme/baseline baseline
- name: Checkout current branch
uses: actions/checkout@v4
with:
path: diskann_rust
lfs: true
- name: Checkout baseline (${{ inputs.baseline_ref || 'main' }})
uses: actions/checkout@v4
with:
ref: ${{ inputs.baseline_ref || 'main' }}
path: baseline
lfs: true
- name: Setup benchmark environment
uses: ./diskann_rust/.github/actions/setup-disk-benchmark
with:
dataset: ${{ matrix.dataset }}
archive: ${{ matrix.archive }}
extract-to: diskann_rust/target/tmp
- name: Copy dataset to baseline
run: |
mkdir -p baseline/target/tmp
cp -r diskann_rust/target/tmp/${{ matrix.data_dir }} baseline/target/tmp/
- name: Run baseline benchmark
working-directory: baseline
run: |
cargo run -p diskann-benchmark --features disk-index --release -- \
run --input-file ../diskann_rust/${{ env.PERF_INPUTS }}/${{ matrix.config }} \
--output-file target/tmp/${{ matrix.dataset }}_baseline.json
- name: Run current branch benchmark
working-directory: diskann_rust
run: |
cargo run -p diskann-benchmark --features disk-index --release -- \
run --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \
--output-file target/tmp/${{ matrix.dataset }}_target.json
- name: Validate benchmark results
working-directory: diskann_rust
run: |
cargo run -p diskann-benchmark --features disk-index --release -- \
check run \
--tolerances ${{ env.PERF_INPUTS }}/disk-index-tolerances.json \
--input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \
--before ../baseline/target/tmp/${{ matrix.dataset }}_baseline.json \
--after target/tmp/${{ matrix.dataset }}_target.json
- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always() # Upload even if validation fails
with:
name: benchmark-results-${{ matrix.dataset }}
path: |
diskann_rust/target/tmp/${{ matrix.dataset }}_target.json
baseline/target/tmp/${{ matrix.dataset }}_baseline.json
retention-days: 30