Skip to content

Commit 0b7fc87

Browse files
tarang-jainfacebook-github-bot
authored andcommitted
Upgrade cuVS Version to 26.02 (facebookresearch#4788)
Summary: Pull Request resolved: facebookresearch#4788 Reviewed By: yingufan Differential Revision: D96931953 Pulled By: mnorris11
1 parent 1e4d227 commit 0b7fc87

16 files changed

Lines changed: 145 additions & 72 deletions

File tree

.github/actions/build_cmake/action.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ runs:
7474
conda install -y -q cuda-toolkit=12.6 gxx_linux-64=12.4 -c "nvidia/label/cuda-12.6"
7575
# and CUDA from cuVS channel for cuVS builds
7676
elif [ "${{ inputs.cuvs }}" = "ON" ]; then
77-
conda install -y -q libcuvs=25.10 'cuda-version=12.6' cuda-toolkit=12.6 gxx_linux-64=13.4.0 -c rapidsai -c rapidsai-nightly -c conda-forge
77+
conda install -y -q libcuvs=26.02 'cuda-version=12.9' cuda-toolkit=12.9 sysroot_linux-64=2.34 -c rapidsai -c rapidsai-nightly -c conda-forge
7878
fi
7979
8080
# install SVS runtime for SVS builds
@@ -87,7 +87,7 @@ runs:
8787
: # skip torch install via conda, we need to install via pip to get
8888
# ROCm-enabled version until it's supported in conda by PyTorch
8989
elif [ "${{ inputs.gpu }}" = "ON" ]; then
90-
conda install -y -q "pytorch>=2.7" "pytorch-gpu>=2.7" -c pytorch -c "nvidia/label/12.6"
90+
conda install -y -q "pytorch>=2.7" "pytorch-gpu>=2.7" -c pytorch -c "nvidia/label/12.9"
9191
else
9292
# TestLowLevelIVF.IVFRQ hangs on pytorch>=2.7, so left it as <2.5 for now.
9393
conda install -y -q "pytorch<2.5" -c pytorch

INSTALL.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ pre-release nightly builds.
66

77
- The CPU-only faiss-cpu conda package is currently available on Linux (x86-64 and aarch64), OSX (arm64 only), and Windows (x86-64)
88
- faiss-gpu, containing both CPU and GPU indices, is available on Linux (x86-64 only) for CUDA 11.4 and 12.1
9-
- faiss-gpu-cuvs package containing GPU indices provided by [NVIDIA cuVS](https://github.com/rapidsai/cuvs/) version 25.10, is available on Linux (x86-64 only) for CUDA 12.4.
9+
- faiss-gpu-cuvs package containing GPU indices provided by [NVIDIA cuVS](https://github.com/rapidsai/cuvs/) version 26.02, is available on Linux (x86-64 only) for CUDA 12.4.
1010

1111
To install the latest stable release:
1212

@@ -72,7 +72,7 @@ The optional requirements are:
7272
- for AMD GPUs:
7373
- AMD ROCm,
7474
- for using NVIDIA cuVS implementations:
75-
- libcuvs=25.10
75+
- libcuvs=26.02
7676
- for the python bindings:
7777
- python 3,
7878
- numpy,
@@ -87,9 +87,9 @@ section of the wiki](https://github.com/facebookresearch/faiss/wiki/Troubleshoot
8787

8888
The libcuvs dependency should be installed via conda:
8989
```
90-
conda install -c rapidsai -c conda-forge -c nvidia libcuvs=25.10 'cuda-version=12.6'
90+
conda install -c rapidsai -c conda-forge -c nvidia libcuvs=26.02 'cuda-version=12.6'
9191
```
92-
For more ways to install cuVS 25.10, refer to the [RAPIDS Installation Guide](https://docs.rapids.ai/install).
92+
For more ways to install cuVS 26.02, refer to the [RAPIDS Installation Guide](https://docs.rapids.ai/install).
9393

9494
### Building with Intel(R) SVS
9595

cmake/thirdparty/fetch_rapids.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
# or implied. See the License for the specific language governing permissions and limitations under
1616
# the License.
1717
# =============================================================================
18-
set(RAPIDS_VERSION "25.10")
18+
set(RAPIDS_VERSION "26.02")
1919
set(rapids-cmake-version ${RAPIDS_VERSION})
2020

2121
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake)
22-
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake
22+
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/release/${RAPIDS_VERSION}/RAPIDS.cmake
2323
${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake)
2424
endif()
2525
include(${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake)

conda/faiss-gpu-cuvs/meta.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
{% set version = environ.get('GIT_DESCRIBE_TAG').lstrip('v') %}
77
{% set suffix = "_nightly" if environ.get('PACKAGE_TYPE') == 'nightly' else "" %}
88
{% set number = GIT_DESCRIBE_NUMBER %}
9-
{% set cuda_constraints=">=12.6,<12.7" %}
10-
{% set libcublas_constraints=">=12.6,<12.7" %}
11-
{% set cudart_constraints=">=12.6,<12.7" %}
9+
{% set cuda_constraints=">=12.9,<13.0" %}
10+
{% set libcublas_constraints=">=12.9,<13.0" %}
11+
{% set cudart_constraints=">=12.9,<13.0" %}
1212

1313
package:
1414
name: faiss-pkg
@@ -60,7 +60,7 @@ outputs:
6060
- _openmp_mutex =4.5=2_kmp_llvm # [x86_64]
6161
- mkl >=2024.2.2 # [x86_64]
6262
- openblas =0.3.30 # [not x86_64]
63-
- libcuvs =25.10
63+
- libcuvs =26.02
6464
- cuda-version {{ cuda_constraints }}
6565
- libsvs-runtime =0.2.0 # [x86_64 and linux]
6666
run:
@@ -69,7 +69,7 @@ outputs:
6969
- openblas =0.3.30 # [not x86_64]
7070
- cuda-cudart {{ cuda_constraints }}
7171
- libcublas {{ libcublas_constraints }}
72-
- libcuvs =25.10
72+
- libcuvs =26.02
7373
- cuda-version {{ cuda_constraints }}
7474
- libnvjitlink
7575
- libsvs-runtime =0.2.0 # [x86_64 and linux]
@@ -91,10 +91,10 @@ outputs:
9191
string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
9292
requirements:
9393
build:
94-
- {{ compiler('cxx') }} =12.4
95-
- sysroot_linux-64 =2.17 # [linux64]
94+
- {{ compiler('cxx') }} =14.2
95+
- sysroot_linux-64 =2.34 # [linux64]
9696
- swig =4.0
97-
- cmake >=3.26.4
97+
- cmake >=3.30.4
9898
- make =4.2 # [not win]
9999
- _openmp_mutex =4.5=2_kmp_llvm # [x86_64]
100100
- mkl >=2024.2.2 # [x86_64]

faiss/gpu/GpuDistance.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) {
238238
if (should_use_cuvs(args) && args.queriesRowMajor == args.vectorsRowMajor &&
239239
args.outIndicesType == IndicesDataType::I64 &&
240240
args.vectorType == DistanceDataType::F32 && args.k > 0) {
241-
cuvsDistanceType distance = metricFaissToCuvs(args.metric, false);
241+
auto distance = metricFaissToCuvs(args.metric, false);
242242

243243
auto resImpl = prov->getResources();
244244
auto res = resImpl.get();

faiss/gpu/GpuIndexCagra.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,14 +193,14 @@ struct GpuIndexCagraConfig : public GpuIndexConfig {
193193

194194
enum class search_algo {
195195
/// For large batch sizes.
196-
SINGLE_CTA,
196+
SINGLE_CTA = 0,
197197
/// For small batch sizes.
198-
MULTI_CTA,
199-
MULTI_KERNEL,
200-
AUTO
198+
MULTI_CTA = 1,
199+
MULTI_KERNEL = 2,
200+
AUTO = 100
201201
};
202202

203-
enum class hash_mode { HASH, SMALL, AUTO };
203+
enum class hash_mode { HASH = 0, SMALL = 1, AUTO = 100 };
204204

205205
struct SearchParametersCagra : SearchParameters {
206206
/// Maximum number of queries to search at the same time (batch size). Auto

faiss/gpu/GpuResources.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
#if defined USE_NVIDIA_CUVS
3535
#include <raft/core/device_resources.hpp>
36-
#include <rmm/mr/device/device_memory_resource.hpp>
36+
#include <rmm/mr/device_memory_resource.hpp>
3737
#endif
3838

3939
namespace faiss {

faiss/gpu/StandardGpuResources.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323

2424
#if defined USE_NVIDIA_CUVS
2525
#include <raft/core/device_resources.hpp>
26-
#include <rmm/mr/device/managed_memory_resource.hpp>
27-
#include <rmm/mr/device/per_device_resource.hpp>
28-
#include <rmm/mr/host/pinned_memory_resource.hpp>
26+
#include <rmm/mr/managed_memory_resource.hpp>
27+
#include <rmm/mr/per_device_resource.hpp>
28+
#include <rmm/mr/pinned_host_memory_resource.hpp>
2929
#include <memory>
3030
#endif
3131

@@ -93,7 +93,7 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl()
9393
:
9494
#if defined USE_NVIDIA_CUVS
9595
mmr_(new rmm::mr::managed_memory_resource),
96-
pmr_(new rmm::mr::pinned_memory_resource),
96+
pmr_(new rmm::mr::pinned_host_memory_resource),
9797
#endif
9898
pinnedMemAlloc_(nullptr),
9999
pinnedMemAllocSize_(0),
@@ -164,7 +164,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
164164

165165
if (pinnedMemAlloc_) {
166166
#if defined USE_NVIDIA_CUVS
167-
pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_);
167+
pmr_->deallocate_sync(pinnedMemAlloc_, pinnedMemAllocSize_);
168168
#else
169169
auto err = cudaFreeHost(pinnedMemAlloc_);
170170
FAISS_ASSERT_FMT(
@@ -350,7 +350,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
350350
// pinned memory allocation
351351
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
352352
try {
353-
pinnedMemAlloc_ = pmr_->allocate(pinnedMemSize_);
353+
pinnedMemAlloc_ = pmr_->allocate_sync(pinnedMemSize_);
354354
} catch (const std::bad_alloc& rmm_ex) {
355355
FAISS_THROW_MSG("CUDA memory allocation error");
356356
}
@@ -549,7 +549,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
549549
rmm::mr::device_memory_resource* current_mr =
550550
rmm::mr::get_per_device_resource(
551551
rmm::cuda_device_id{adjReq.device});
552-
p = current_mr->allocate_async(adjReq.size, adjReq.stream);
552+
p = current_mr->allocate(adjReq.stream, adjReq.size);
553553
adjReq.mr = current_mr;
554554
} catch (const std::bad_alloc& rmm_ex) {
555555
FAISS_THROW_MSG("CUDA memory allocation error");
@@ -584,7 +584,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
584584
// TODO: change this to use the current device resource once RMM has
585585
// a way to retrieve a "guaranteed" managed memory resource for a
586586
// device.
587-
p = mmr_->allocate_async(adjReq.size, adjReq.stream);
587+
p = mmr_->allocate(adjReq.stream, adjReq.size);
588588
adjReq.mr = mmr_.get();
589589
} catch (const std::bad_alloc& rmm_ex) {
590590
FAISS_THROW_MSG("CUDA memory allocation error");
@@ -648,7 +648,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
648648
req.space == MemorySpace::Device ||
649649
req.space == MemorySpace::Unified) {
650650
#if defined USE_NVIDIA_CUVS
651-
req.mr->deallocate_async(p, req.size, req.stream);
651+
req.mr->deallocate(req.stream, p, req.size);
652652
#else
653653
auto err = cudaFree(p);
654654
FAISS_ASSERT_FMT(

faiss/gpu/StandardGpuResources.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525

2626
#if defined USE_NVIDIA_CUVS
2727
#include <raft/core/device_resources.hpp>
28-
#include <rmm/mr/host/pinned_memory_resource.hpp>
28+
#include <rmm/mr/device_memory_resource.hpp>
29+
#include <rmm/mr/pinned_host_memory_resource.hpp>
2930
#endif
3031

3132
#include <faiss/gpu/GpuResources.h>
@@ -172,8 +173,8 @@ class StandardGpuResourcesImpl : public GpuResources {
172173
// managed_memory_resource
173174
std::unique_ptr<rmm::mr::device_memory_resource> mmr_;
174175

175-
// pinned_memory_resource
176-
std::unique_ptr<rmm::mr::host_memory_resource> pmr_;
176+
// pinned_host_memory_resource
177+
std::unique_ptr<rmm::mr::pinned_host_memory_resource> pmr_;
177178
#endif
178179

179180
/// Pinned memory allocation for use with this GPU

faiss/gpu/impl/BinaryCuvsCagra.cu

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ BinaryCuvsCagra::BinaryCuvsCagra(
4949
IndicesOptions indicesOptions)
5050
: resources_(resources),
5151
dim_(dim),
52+
store_dataset_(store_dataset),
5253
graph_build_algo_(graph_build_algo),
53-
nn_descent_niter_(nn_descent_niter),
54-
store_dataset_(store_dataset) {
54+
nn_descent_niter_(nn_descent_niter) {
5555
FAISS_THROW_IF_NOT_MSG(
5656
indicesOptions == faiss::gpu::INDICES_64_BIT,
5757
"only INDICES_64_BIT is supported for cuVS CAGRA index");
@@ -161,6 +161,11 @@ void BinaryCuvsCagra::train(idx_t n, const uint8_t* x) {
161161
cuvs::neighbors::cagra::graph_build_params::iterative_search_params
162162
graph_build_params;
163163
index_params_.graph_build_params = graph_build_params;
164+
if (index_params_.graph_degree ==
165+
index_params_.intermediate_graph_degree) {
166+
index_params_.intermediate_graph_degree =
167+
1.5 * index_params_.graph_degree;
168+
}
164169
}
165170

166171
if (getDeviceForAddress(x) >= 0) {

0 commit comments

Comments
 (0)