I was running HNSWCagra Index range_search and comparing it with HNSWIndex. What I realized was when CagraIndex contains only base layer, then in that case number of results returned is 0. On doing further deep-dive what I can see was since, CagraIndex with base layer only doesn't have an entry point, the code exists with 0 results.
For normal topK search with base layer only, there is a special code for searching in the base layer only. Ref:
I created this below code to repro the bug. In the code I have 3 indices,
For index 3, we are not seeing any results.
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cstdio>
#include <iostream>
#include <random>
#include <vector>
#include <faiss/IndexFlat.h>
#include <faiss/IndexHNSW.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/HNSW.h>
/// Copy the base layer graph from src to a new IndexHNSWCagra (dst)
/// that only has level 0. Modeled after GpuIndexCagra::copyTo().
void copyBaseLevelOnly(
const faiss::IndexHNSWCagra& src,
faiss::IndexHNSWCagra& dst) {
auto n = src.ntotal;
auto d = src.d;
auto M = src.hnsw.nb_neighbors(0) / 2;
auto graph_degree = src.hnsw.nb_neighbors(0);
// Setup storage
if (dst.storage && dst.own_fields) {
delete dst.storage;
}
dst.storage = new faiss::IndexFlatL2(d);
dst.own_fields = true;
dst.d = d;
dst.metric_type = src.metric_type;
dst.is_trained = true;
dst.keep_max_size_level0 = true;
// Reset and reconfigure HNSW structure
dst.hnsw.reset();
dst.hnsw.assign_probas.clear();
dst.hnsw.cum_nneighbor_per_level.clear();
dst.hnsw.set_default_probas(M, 1.0 / log(M));
// Only build level 0 — no upper levels
dst.hnsw.prepare_level_tab(n, false);
// Copy vectors into storage
auto src_flat = dynamic_cast<faiss::IndexFlat*>(src.storage);
dst.storage->add(n, src_flat->get_xb());
dst.ntotal = n;
// Copy level 0 neighbors from src to dst
#pragma omp parallel for
for (faiss::idx_t i = 0; i < n; i++) {
size_t src_begin, src_end;
src.hnsw.neighbor_range(i, 0, &src_begin, &src_end);
size_t dst_begin, dst_end;
dst.hnsw.neighbor_range(i, 0, &dst_begin, &dst_end);
for (size_t j = 0; j < graph_degree && j < (dst_end - dst_begin); j++) {
dst.hnsw.neighbors[dst_begin + j] =
src.hnsw.neighbors[src_begin + j];
}
}
// Mark as base_level_only
dst.base_level_only = true;
}
int main() {
int d = 8; // dimension
int nb = 10; // number of vectors to index
int nq = 1; // number of queries
int k = 3; // number of nearest neighbors
int M = 4; // HNSW parameter
// Generate random vectors
std::mt19937 rng(42);
std::uniform_real_distribution<float> distrib(0.0f, 1.0f);
std::vector<float> xb(nb * d);
for (auto& v : xb) {
v = distrib(rng);
}
std::vector<float> xq(nq * d);
for (auto& v : xq) {
v = distrib(rng);
}
// Build index with full HNSW (base_level_only must be false to add)
faiss::IndexHNSWCagra index(d, M, faiss::METRIC_L2);
index.add(nb, xb.data());
printf("Index built: ntotal=%ld\n", index.ntotal);
// Enable base_level_only search
index.base_level_only = true;
index.num_base_level_search_entrypoints = 2;
{
std::vector<float> distances(nq * k);
std::vector<faiss::idx_t> labels(nq * k);
index.search(nq, xq.data(), k, distances.data(), labels.data());
printf("\nBase-level-only search results (k=%d):\n", k);
for (int i = 0; i < nq; i++) {
for (int j = 0; j < k; j++) {
printf(" neighbor %d: id=%ld, distance=%.4f\n",
j,
labels[i * k + j],
distances[i * k + j]);
}
}
}
// Range search with the same query vector (IndexHNSWCagra)
{
index.base_level_only = true;
float radius = 0.7f;
faiss::RangeSearchResult rsr(nq);
index.range_search(nq, xq.data(), radius, &rsr);
printf("\nIndexHNSWCagra range search results (radius=%.4f):\n", radius);
for (int i = 0; i < nq; i++) {
printf(" query %d: %ld neighbors found\n",
i,
rsr.lims[i + 1] - rsr.lims[i]);
for (size_t j = rsr.lims[i]; j < rsr.lims[i + 1]; j++) {
printf(" id=%ld, distance=%.4f\n",
rsr.labels[j],
rsr.distances[j]);
}
}
}
// IndexHNSW with same data
printf("\n========== IndexHNSW ==========\n");
faiss::IndexHNSWFlat index_hnsw(d, M, faiss::METRIC_L2);
index_hnsw.add(nb, xb.data());
printf("IndexHNSW built: ntotal=%ld\n", index_hnsw.ntotal);
// KNN search
{
std::vector<float> distances(nq * k);
std::vector<faiss::idx_t> labels(nq * k);
index_hnsw.search(nq, xq.data(), k, distances.data(), labels.data());
printf("\nIndexHNSW search results (k=%d):\n", k);
for (int i = 0; i < nq; i++) {
for (int j = 0; j < k; j++) {
printf(" neighbor %d: id=%ld, distance=%.4f\n",
j,
labels[i * k + j],
distances[i * k + j]);
}
}
}
// Range search
{
float radius = 0.7f;
faiss::RangeSearchResult rsr(nq);
index_hnsw.range_search(nq, xq.data(), radius, &rsr);
printf("\nIndexHNSW range search results (radius=%.4f):\n", radius);
for (int i = 0; i < nq; i++) {
printf(" query %d: %ld neighbors found\n",
i,
rsr.lims[i + 1] - rsr.lims[i]);
for (size_t j = rsr.lims[i]; j < rsr.lims[i + 1]; j++) {
printf(" id=%ld, distance=%.4f\n",
rsr.labels[j],
rsr.distances[j]);
}
}
}
// Copy base layer from IndexHNSWCagra to a new base-level-only index
printf("\n========== Copy Base Level Only ==========\n");
faiss::IndexHNSWCagra dst_index;
copyBaseLevelOnly(index, dst_index);
dst_index.num_base_level_search_entrypoints = 2;
printf("Copied index: ntotal=%ld, base_level_only=%d\n",
dst_index.ntotal,
dst_index.base_level_only);
{
std::vector<float> distances(nq * k);
std::vector<faiss::idx_t> labels(nq * k);
dst_index.search(nq, xq.data(), k, distances.data(), labels.data());
printf("\nCopied base-level-only search results (k=%d):\n", k);
for (int i = 0; i < nq; i++) {
for (int j = 0; j < k; j++) {
printf(" neighbor %d: id=%ld, distance=%.4f\n",
j,
labels[i * k + j],
distances[i * k + j]);
}
}
}
// Range search on copied index
{
dst_index.base_level_only = true;
float radius = 0.7f;
faiss::RangeSearchResult rsr(nq);
dst_index.range_search(nq, xq.data(), radius, &rsr);
printf("\nCopied index range search results (radius=%.4f):\n", radius);
for (int i = 0; i < nq; i++) {
printf(" query %d: %ld neighbors found\n",
i,
rsr.lims[i + 1] - rsr.lims[i]);
for (size_t j = rsr.lims[i]; j < rsr.lims[i + 1]; j++) {
printf(" id=%ld, distance=%.4f\n",
rsr.labels[j],
rsr.distances[j]);
}
}
}
std::cout << "Test completed successfully!" << std::endl;
return 0;
}
Description
I was running HNSWCagra Index range_search and comparing it with HNSWIndex. What I realized was when CagraIndex contains only base layer, then in that case number of results returned is 0. On doing further deep-dive what I can see was since, CagraIndex with base layer only doesn't have an entry point, the code exists with 0 results.
Code ref:
faiss/faiss/impl/HNSW.cpp
Lines 845 to 847 in df0dea6
For normal topK search with base layer only, there is a special code for searching in the base layer only. Ref:
faiss/faiss/IndexHNSW.cpp
Lines 966 to 977 in df0dea6
Reproduction Code
I created this below code to repro the bug. In the code I have 3 indices,
For index 3, we are not seeing any results.