Skip to content

Commit adb787a

Browse files
Merge branch 'main' into main
2 parents ce0a0c8 + 9cedeb9 commit adb787a

160 files changed

Lines changed: 14528 additions & 7997 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/build-pull-request.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ jobs:
3838
uses: actions/checkout@v4
3939
- name: Build and Test (cmake)
4040
uses: ./.github/actions/build_cmake
41+
- name: Build C++ demos
42+
run: |
43+
make -C build demo_diversity_result_handler
4144
linux-x86_64-AVX2-cmake:
4245
name: Linux x86_64 AVX2 (cmake)
4346
needs: linux-x86_64-cmake
@@ -71,6 +74,17 @@ jobs:
7174
uses: ./.github/actions/build_cmake
7275
with:
7376
opt_level: avx512_spr
77+
linux-x86_64-DD-cmake:
78+
name: Linux x86_64 Dynamic Dispatch (cmake)
79+
needs: linux-x86_64-cmake
80+
runs-on: faiss-aws-m7i.large
81+
steps:
82+
- name: Checkout
83+
uses: actions/checkout@v4
84+
- name: Build and Test (cmake)
85+
uses: ./.github/actions/build_cmake
86+
with:
87+
opt_level: dd
7488
linux-x86_64-GPU-cmake:
7589
name: Linux x86_64 GPU (cmake)
7690
needs: linux-x86_64-cmake
@@ -108,6 +122,19 @@ jobs:
108122
env:
109123
# Context: https://github.com/facebookresearch/faiss/wiki/Troubleshooting#surprising-faiss-openmp-and-openblas-interaction
110124
OPENBLAS_NUM_THREADS: '1'
125+
linux-arm64-DD-cmake:
126+
name: Linux arm64 Dynamic Dispatch (cmake)
127+
needs: linux-x86_64-cmake
128+
runs-on: faiss-aws-r8g.large
129+
steps:
130+
- name: Checkout
131+
uses: actions/checkout@v4
132+
- name: Build and Test (cmake)
133+
uses: ./.github/actions/build_cmake
134+
with:
135+
opt_level: dd
136+
env:
137+
OPENBLAS_NUM_THREADS: '1'
111138
linux-x86_64-conda:
112139
name: Linux x86_64 (conda)
113140
needs: linux-x86_64-cmake

.github/workflows/index-io-backward-compatibility.yml

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,21 @@ jobs:
4545
echo "Files created by CMake build:"
4646
ls -lh ${{ env.SHARED_DATA_DIR }}
4747
48-
# Step 2: Install conda faiss-cpu and read files
49-
- name: Clean cmake-built packages
48+
# Step 2: Install conda faiss-cpu in a clean environment and read files
49+
- name: Create conda read environment with faiss-cpu
5050
shell: bash
5151
run: |
5252
eval "$(conda shell.bash hook)"
53-
conda activate
54-
# Remove packages that conflict with faiss-cpu
55-
conda remove -y numpy scipy pytest gflags swig cmake make mkl mkl-devel || true
56-
57-
- name: Install faiss-cpu from pytorch channel
58-
shell: bash
59-
run: |
60-
eval "$(conda shell.bash hook)"
61-
conda activate
62-
conda list
63-
conda install -y -c pytorch faiss-cpu=1.13.2
53+
conda create -n faiss_conda_read -y python=3.11
54+
conda activate faiss_conda_read
55+
conda install -y -c pytorch -c conda-forge faiss-cpu=1.13.2
6456
conda list
6557
6658
- name: Run Conda reader (read Faiss index and verify)
6759
shell: bash
6860
run: |
6961
eval "$(conda shell.bash hook)"
70-
conda activate
62+
conda activate faiss_conda_read
7163
python tests/index_io_backward_compatibility/conda_reader.py ${{ env.SHARED_DATA_DIR }}
7264
7365
- name: Upload artifacts from cmake->conda test
@@ -90,20 +82,14 @@ jobs:
9082
fetch-depth: 0
9183
fetch-tags: true
9284

93-
# Step 1: Install conda faiss-cpu package and write files
94-
- name: Setup miniconda
95-
uses: conda-incubator/setup-miniconda@v3
96-
with:
97-
python-version: '3.11'
98-
miniforge-version: latest
99-
100-
- name: Install faiss-cpu from pytorch channel
85+
# Step 1: Install conda faiss-cpu in a clean environment and write files
86+
- name: Create conda write environment with faiss-cpu
10187
shell: bash
10288
run: |
10389
eval "$(conda shell.bash hook)"
104-
conda activate
105-
# Install pre-built faiss-cpu
106-
conda install -y -c pytorch faiss-cpu=1.13.2
90+
conda create -n faiss_conda_write -y python=3.11
91+
conda activate faiss_conda_write
92+
conda install -y -c pytorch -c conda-forge faiss-cpu=1.13.2
10793
conda list
10894
10995
- name: Create shared data directory
@@ -116,7 +102,7 @@ jobs:
116102
shell: bash
117103
run: |
118104
eval "$(conda shell.bash hook)"
119-
conda activate
105+
conda activate faiss_conda_write
120106
python tests/index_io_backward_compatibility/conda_writer.py ${{ env.SHARED_DATA_DIR }}
121107
122108
- name: Verify files were written
@@ -125,19 +111,10 @@ jobs:
125111
echo "Files created by Conda build:"
126112
ls -lh ${{ env.SHARED_DATA_DIR }}
127113
128-
# Step 2: Rebuild with CMake and read files
129-
- name: Clean conda artifacts
130-
shell: bash
131-
run: |
132-
# Uninstall conda-built faiss to avoid conflicts
133-
eval "$(conda shell.bash hook)"
134-
conda activate
135-
conda uninstall -y faiss-cpu || true
136-
114+
# Step 2: Build with CMake and read files
137115
- name: Build with CMake
138116
uses: ./.github/actions/build_cmake
139117
with:
140-
setup_conda: 'false'
141118
upload_artifacts: 'false'
142119

143120
- name: Run CMake reader (read Faiss index and verify)

INSTALL.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,31 @@ To install the latest stable release:
1212

1313
``` shell
1414
# CPU-only version
15-
$ conda install -c pytorch faiss-cpu=1.13.2
15+
$ conda install -c pytorch -c conda-forge faiss-cpu=1.13.2
1616

1717
# GPU(+CPU) version
18-
$ conda install -c pytorch -c nvidia faiss-gpu=1.13.2
18+
$ conda install -c pytorch -c nvidia -c conda-forge faiss-gpu=1.13.2
1919

2020
# GPU(+CPU) version with NVIDIA cuVS
2121
$ conda install -c pytorch -c nvidia -c rapidsai -c conda-forge libnvjitlink faiss-gpu-cuvs=1.13.2
2222

2323
# GPU(+CPU) version using AMD ROCm not yet available
2424
```
2525

26-
For faiss-gpu, the nvidia channel is required for CUDA, which is not published in the main anaconda channel.
26+
The conda-forge channel is required for up-to-date dependencies (MKL on x86-64, OpenBLAS on ARM), which are not regularly updated in the default Anaconda channel.
27+
28+
For faiss-gpu, the nvidia channel is additionally required for CUDA, which is not published in the main anaconda channel.
2729

2830
For faiss-gpu-cuvs, the rapidsai, conda-forge and nvidia channels are required.
2931

3032
Nightly pre-release packages can be installed as follows:
3133

3234
``` shell
3335
# CPU-only version
34-
$ conda install -c pytorch/label/nightly faiss-cpu
36+
$ conda install -c pytorch/label/nightly -c conda-forge faiss-cpu
3537

3638
# GPU(+CPU) version
37-
$ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.13.2
39+
$ conda install -c pytorch/label/nightly -c nvidia -c conda-forge faiss-gpu=1.13.2
3840

3941
# GPU(+CPU) version with NVIDIA cuVS (package built with CUDA 12.6)
4042
conda install -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 'cuda-version=12.6'

benchs/CMakeLists.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,15 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
6+
find_package(BLAS REQUIRED)
7+
find_package(LAPACK REQUIRED)
8+
find_package(OpenMP REQUIRED)
79

810
add_executable(bench_ivf_selector EXCLUDE_FROM_ALL bench_ivf_selector.cpp)
9-
target_link_libraries(bench_ivf_selector PRIVATE faiss)
11+
target_link_libraries(bench_ivf_selector PRIVATE faiss_avx512 ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} OpenMP::OpenMP_CXX)
12+
target_compile_options(bench_ivf_selector PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mpopcnt>)
1013

14+
add_executable(bench_result_handler_overhead EXCLUDE_FROM_ALL
15+
bench_result_handler_overhead.cpp)
16+
target_link_libraries(bench_result_handler_overhead PRIVATE faiss_avx512 ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} OpenMP::OpenMP_CXX)
17+
target_compile_options(bench_result_handler_overhead PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mpopcnt>)
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#include "faiss_avx512_result_handler.h"
9+
10+
#include <faiss/IndexIVF.h>
11+
#include <faiss/index_factory.h>
12+
#include <faiss/utils/random.h>
13+
#include <faiss/utils/utils.h>
14+
#include <omp.h>
15+
16+
#include <cstdio>
17+
#include <memory>
18+
#include <vector>
19+
20+
using namespace faiss;
21+
22+
// Parameters
23+
constexpr int d = 64; // dimension
24+
constexpr size_t nb = 1000000; // database size
25+
constexpr size_t nt = 10000; // training size
26+
constexpr size_t nq = 100; // number of queries
27+
constexpr int nrun = 5; // number of timing runs
28+
29+
int main() {
30+
// Use single OpenMP thread
31+
32+
printf("Generating nt=%zu nb=%zu nq=%zu vectors of dimension %d\n",
33+
nt,
34+
nb,
35+
nq,
36+
d);
37+
std::vector<float> xt(nt * d), xb(nb * d), xq(nq * d);
38+
rand_smooth_vectors(nt, d, xt.data(), 1234);
39+
rand_smooth_vectors(nb, d, xb.data(), 4567);
40+
rand_smooth_vectors(nq, d, xq.data(), 7890);
41+
42+
// Build IVF1024,Flat index
43+
printf("Building IVF1024,Flat index...\n");
44+
std::unique_ptr<Index> index(index_factory(d, "IVF1024,Flat", METRIC_L2));
45+
46+
printf("Training index...\n");
47+
index->train(nt, xt.data());
48+
49+
printf("Adding %zu vectors to index...\n", nb);
50+
index->add(nb, xb.data());
51+
52+
// Set nprobe for IVF index
53+
IndexIVF* index_ivf = dynamic_cast<IndexIVF*>(index.get());
54+
if (index_ivf) {
55+
}
56+
omp_set_num_threads(1);
57+
58+
// Test with varying k values
59+
std::vector<size_t> k_values = {1, 10, 20, 50, 100, 200, 500, 1000};
60+
std::vector<size_t> nprobe_values = {1, 2, 4, 8, 16, 64};
61+
62+
printf("\nBenchmarking with %d OpenMP thread(s), %d runs per config\n",
63+
omp_get_max_threads(),
64+
nrun);
65+
printf("%-8s %15s %15s %10s\n",
66+
"k",
67+
"baseline(ms)",
68+
"avx512(ms)",
69+
"speedup");
70+
printf("------------------------------------------------------------\n");
71+
72+
for (size_t nprobe : nprobe_values) {
73+
index_ivf->nprobe = nprobe;
74+
printf("============ nprobe=%zu ===========\n", nprobe);
75+
for (size_t k : k_values) {
76+
std::vector<float> D_ref(nq * k);
77+
std::vector<idx_t> I_ref(nq * k);
78+
std::vector<float> D_avx(nq * k);
79+
std::vector<idx_t> I_avx(nq * k);
80+
81+
// Warmup
82+
index->search(nq, xq.data(), k, D_ref.data(), I_ref.data());
83+
84+
// Benchmark baseline search
85+
double t0 = getmillisecs();
86+
for (int run = 0; run < nrun; run++) {
87+
for (size_t q = 0; q < nq; q++) {
88+
index->search(
89+
1,
90+
xq.data() + q * d,
91+
k,
92+
D_ref.data() + q * k,
93+
I_ref.data() + q * k);
94+
}
95+
}
96+
double baseline_time = (getmillisecs() - t0) / nrun;
97+
98+
// Warmup AVX512 handler
99+
ReservoirResultHandlerAVX512 handler(k);
100+
for (size_t q = 0; q < nq; q++) {
101+
handler.begin();
102+
index->search1(xq.data() + q * d, handler);
103+
handler.end(D_avx.data() + q * k, I_avx.data() + q * k);
104+
}
105+
106+
// Benchmark AVX512 result handler
107+
t0 = getmillisecs();
108+
for (int run = 0; run < nrun; run++) {
109+
for (size_t q = 0; q < nq; q++) {
110+
handler.begin();
111+
index->search1(xq.data() + q * d, handler);
112+
handler.end(D_avx.data() + q * k, I_avx.data() + q * k);
113+
}
114+
}
115+
double avx512_time = (getmillisecs() - t0) / nrun;
116+
117+
double speedup = baseline_time / avx512_time;
118+
printf("%-8zu %15.3f %15.3f %10.2fx\n",
119+
k,
120+
baseline_time,
121+
avx512_time,
122+
speedup);
123+
}
124+
}
125+
126+
return 0;
127+
}

0 commit comments

Comments
 (0)