DiskANN/.github/workflows/disk-benchmarks.yml at 19cf04a210f88779f8ec9c76d2beac17be2adb94 · microsoft/DiskANN · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

# DiskANN Benchmarks Workflow
#
# This workflow runs macro benchmarks comparing the current branch against a baseline.
# It is manually triggered and requires a baseline reference (branch, tag, or commit).

name: Disk Benchmarks

on:
  workflow_dispatch:
    inputs:
      baseline_ref:
        description: 'A branch, commit SHA, or tag name to compare the current branch with'
        required: true
        default: 'main'
        type: string
  pull_request:
    branches:
      - main
    paths:
      - 'diskann/**'
      - 'diskann-disk/**'
      - 'diskann-linalg/**'
      - 'diskann-providers/**'
      - 'diskann-quantization/**'
      - 'diskann-vector/**'
      - 'diskann-wide/**'
      - 'diskann-utils/**'
      - 'diskann-platform/**'
      - 'diskann-label-filter/**'
      - 'diskann-benchmark/**'
      - 'diskann-benchmark-runner/**'
      - '.github/workflows/disk-benchmarks.yml'

# Cancel in-progress runs when a new run is triggered
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
  cancel-in-progress: true

env:
  RUST_BACKTRACE: 1
  PERF_INPUTS: diskann-benchmark/perf_test_inputs

defaults:
  run:
    shell: bash

permissions:
  contents: read

jobs:
  # Macro benchmark: compare current branch against baseline
  macro-benchmark:
    name: Macro Benchmark - ${{ matrix.dataset }}
    runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest, "JobId=macro-benchmark-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}-${{ strategy.job-index }}" ]
    # TODO: For production benchmarks, consider using a self-hosted runner with:
    # - NVMe storage for consistent I/O performance
    # - CPU pinning (taskset) for reduced variance
    # - Dedicated hardware to avoid noisy neighbor effects
    timeout-minutes: 120
    strategy:
      fail-fast: false
      matrix:
        include:
          - dataset: wikipedia-100K
            config: wikipedia-100K-disk-index.json
            archive: wikipedia-100K.tar.gz
            data_dir: wikipedia_cohere
          - dataset: openai-100K
            config: openai-100K-disk-index.json
            archive: openai-100K.tar.gz
            data_dir: OpenAIArXiv

    steps:
      # Kept inline because this must run before checkout, but local action.yml
      # files are only available after checkout.
      - name: Mount high-speed NVMe SSD
        shell: bash
        run: |
          sudo mkdir -p /mnt/nvme
          sudo lsblk
          sudo mkfs.ext4 /dev/nvme0n1
          sudo mount /dev/nvme0n1 /mnt/nvme
          sudo chmod 777 /mnt/nvme
          mkdir -p /mnt/nvme/diskann_rust /mnt/nvme/baseline
          ln -s /mnt/nvme/diskann_rust diskann_rust
          ln -s /mnt/nvme/baseline baseline

      - name: Checkout current branch
        uses: actions/checkout@v4
        with:
          path: diskann_rust
          lfs: true

      - name: Checkout baseline (${{ inputs.baseline_ref || 'main' }})
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.baseline_ref || 'main' }}
          path: baseline
          lfs: true

      - name: Setup benchmark environment
        uses: ./diskann_rust/.github/actions/setup-disk-benchmark
        with:
          dataset: ${{ matrix.dataset }}
          archive: ${{ matrix.archive }}
          extract-to: diskann_rust/target/tmp

      - name: Copy dataset to baseline
        run: |
          mkdir -p baseline/target/tmp
          cp -r diskann_rust/target/tmp/${{ matrix.data_dir }} baseline/target/tmp/

      - name: Run baseline benchmark
        working-directory: baseline
        run: |
          cargo run -p diskann-benchmark --features disk-index --release -- \
            run --input-file ../diskann_rust/${{ env.PERF_INPUTS }}/${{ matrix.config }} \
            --output-file target/tmp/${{ matrix.dataset }}_baseline.json

      - name: Run current branch benchmark
        working-directory: diskann_rust
        run: |
          cargo run -p diskann-benchmark --features disk-index --release -- \
            run --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \
            --output-file target/tmp/${{ matrix.dataset }}_target.json

      - name: Validate benchmark results
        working-directory: diskann_rust
        run: |
          cargo run -p diskann-benchmark --features disk-index --release -- \
            check run \
            --tolerances ${{ env.PERF_INPUTS }}/disk-index-tolerances.json \
            --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \
            --before ../baseline/target/tmp/${{ matrix.dataset }}_baseline.json \
            --after target/tmp/${{ matrix.dataset }}_target.json

      - name: Upload benchmark results
        uses: actions/upload-artifact@v4
        if: always()  # Upload even if validation fails
        with:
          name: benchmark-results-${{ matrix.dataset }}
          path: |
            diskann_rust/target/tmp/${{ matrix.dataset }}_target.json
            baseline/target/tmp/${{ matrix.dataset }}_baseline.json
          retention-days: 30