Skip to content

[BUG] IndexHNSWCagra with Base layer only return 0 results for range_search #4927

@navneet1v

Description

@navneet1v

Description

I was running HNSWCagra Index range_search and comparing it with HNSWIndex. What I realized was when CagraIndex contains only base layer, then in that case number of results returned is 0. On doing further deep-dive what I can see was since, CagraIndex with base layer only doesn't have an entry point, the code exists with 0 results.

Code ref:

faiss/faiss/impl/HNSW.cpp

Lines 845 to 847 in df0dea6

if (entry_point == -1) {
return stats;
}

For normal topK search with base layer only, there is a special code for searching in the base layer only. Ref:

faiss/faiss/IndexHNSW.cpp

Lines 966 to 977 in df0dea6

search_level_0(
n,
x,
k,
nearest.data(),
nearest_d.data(),
distances,
labels,
1, // n_probes
1, // search_type
params);
}

Reproduction Code

I created this below code to repro the bug. In the code I have 3 indices,

  1. CagraIndex with all layers.
  2. HNSWIndex
  3. CagraIndex with base layer only copied from 1 using copyBaseLevelOnly function.

For index 3, we are not seeing any results.

/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <cstdio>
#include <iostream>
#include <random>
#include <vector>

#include <faiss/IndexFlat.h>
#include <faiss/IndexHNSW.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/HNSW.h>

/// Copy the base layer graph from src to a new IndexHNSWCagra (dst)
/// that only has level 0. Modeled after GpuIndexCagra::copyTo().
void copyBaseLevelOnly(
        const faiss::IndexHNSWCagra& src,
        faiss::IndexHNSWCagra& dst) {
    auto n = src.ntotal;
    auto d = src.d;
    auto M = src.hnsw.nb_neighbors(0) / 2;
    auto graph_degree = src.hnsw.nb_neighbors(0);

    // Setup storage
    if (dst.storage && dst.own_fields) {
        delete dst.storage;
    }
    dst.storage = new faiss::IndexFlatL2(d);
    dst.own_fields = true;
    dst.d = d;
    dst.metric_type = src.metric_type;
    dst.is_trained = true;
    dst.keep_max_size_level0 = true;

    // Reset and reconfigure HNSW structure
    dst.hnsw.reset();
    dst.hnsw.assign_probas.clear();
    dst.hnsw.cum_nneighbor_per_level.clear();
    dst.hnsw.set_default_probas(M, 1.0 / log(M));

    // Only build level 0 — no upper levels
    dst.hnsw.prepare_level_tab(n, false);

    // Copy vectors into storage
    auto src_flat = dynamic_cast<faiss::IndexFlat*>(src.storage);
    dst.storage->add(n, src_flat->get_xb());
    dst.ntotal = n;

    // Copy level 0 neighbors from src to dst
#pragma omp parallel for
    for (faiss::idx_t i = 0; i < n; i++) {
        size_t src_begin, src_end;
        src.hnsw.neighbor_range(i, 0, &src_begin, &src_end);

        size_t dst_begin, dst_end;
        dst.hnsw.neighbor_range(i, 0, &dst_begin, &dst_end);

        for (size_t j = 0; j < graph_degree && j < (dst_end - dst_begin); j++) {
            dst.hnsw.neighbors[dst_begin + j] =
                    src.hnsw.neighbors[src_begin + j];
        }
    }

    // Mark as base_level_only
    dst.base_level_only = true;
}

int main() {
    int d = 8;    // dimension
    int nb = 10;  // number of vectors to index
    int nq = 1;   // number of queries
    int k = 3;    // number of nearest neighbors
    int M = 4;    // HNSW parameter

    // Generate random vectors
    std::mt19937 rng(42);
    std::uniform_real_distribution<float> distrib(0.0f, 1.0f);

    std::vector<float> xb(nb * d);
    for (auto& v : xb) {
        v = distrib(rng);
    }

    std::vector<float> xq(nq * d);
    for (auto& v : xq) {
        v = distrib(rng);
    }

    // Build index with full HNSW (base_level_only must be false to add)
    faiss::IndexHNSWCagra index(d, M, faiss::METRIC_L2);
    index.add(nb, xb.data());

    printf("Index built: ntotal=%ld\n", index.ntotal);

    // Enable base_level_only search
    index.base_level_only = true;
    index.num_base_level_search_entrypoints = 2;

    {
        std::vector<float> distances(nq * k);
        std::vector<faiss::idx_t> labels(nq * k);

        index.search(nq, xq.data(), k, distances.data(), labels.data());

        printf("\nBase-level-only search results (k=%d):\n", k);
        for (int i = 0; i < nq; i++) {
            for (int j = 0; j < k; j++) {
                printf("  neighbor %d: id=%ld, distance=%.4f\n",
                       j,
                       labels[i * k + j],
                       distances[i * k + j]);
            }
        }
    }

    // Range search with the same query vector (IndexHNSWCagra)
    {
        index.base_level_only = true;
        float radius = 0.7f;
        faiss::RangeSearchResult rsr(nq);

        index.range_search(nq, xq.data(), radius, &rsr);

        printf("\nIndexHNSWCagra range search results (radius=%.4f):\n", radius);
        for (int i = 0; i < nq; i++) {
            printf("  query %d: %ld neighbors found\n",
                   i,
                   rsr.lims[i + 1] - rsr.lims[i]);
            for (size_t j = rsr.lims[i]; j < rsr.lims[i + 1]; j++) {
                printf("    id=%ld, distance=%.4f\n",
                       rsr.labels[j],
                       rsr.distances[j]);
            }
        }
    }

    // IndexHNSW with same data
    printf("\n========== IndexHNSW ==========\n");
    faiss::IndexHNSWFlat index_hnsw(d, M, faiss::METRIC_L2);
    index_hnsw.add(nb, xb.data());

    printf("IndexHNSW built: ntotal=%ld\n", index_hnsw.ntotal);

    // KNN search
    {
        std::vector<float> distances(nq * k);
        std::vector<faiss::idx_t> labels(nq * k);

        index_hnsw.search(nq, xq.data(), k, distances.data(), labels.data());

        printf("\nIndexHNSW search results (k=%d):\n", k);
        for (int i = 0; i < nq; i++) {
            for (int j = 0; j < k; j++) {
                printf("  neighbor %d: id=%ld, distance=%.4f\n",
                       j,
                       labels[i * k + j],
                       distances[i * k + j]);
            }
        }
    }

    // Range search
    {
        float radius = 0.7f;
        faiss::RangeSearchResult rsr(nq);

        index_hnsw.range_search(nq, xq.data(), radius, &rsr);

        printf("\nIndexHNSW range search results (radius=%.4f):\n", radius);
        for (int i = 0; i < nq; i++) {
            printf("  query %d: %ld neighbors found\n",
                   i,
                   rsr.lims[i + 1] - rsr.lims[i]);
            for (size_t j = rsr.lims[i]; j < rsr.lims[i + 1]; j++) {
                printf("    id=%ld, distance=%.4f\n",
                       rsr.labels[j],
                       rsr.distances[j]);
            }
        }
    }

    // Copy base layer from IndexHNSWCagra to a new base-level-only index
    printf("\n========== Copy Base Level Only ==========\n");
    faiss::IndexHNSWCagra dst_index;
    copyBaseLevelOnly(index, dst_index);
    dst_index.num_base_level_search_entrypoints = 2;

    printf("Copied index: ntotal=%ld, base_level_only=%d\n",
           dst_index.ntotal,
           dst_index.base_level_only);

    {
        std::vector<float> distances(nq * k);
        std::vector<faiss::idx_t> labels(nq * k);

        dst_index.search(nq, xq.data(), k, distances.data(), labels.data());

        printf("\nCopied base-level-only search results (k=%d):\n", k);
        for (int i = 0; i < nq; i++) {
            for (int j = 0; j < k; j++) {
                printf("  neighbor %d: id=%ld, distance=%.4f\n",
                       j,
                       labels[i * k + j],
                       distances[i * k + j]);
            }
        }
    }

    // Range search on copied index
    {
        dst_index.base_level_only = true;
        float radius = 0.7f;
        faiss::RangeSearchResult rsr(nq);

        dst_index.range_search(nq, xq.data(), radius, &rsr);

        printf("\nCopied index range search results (radius=%.4f):\n", radius);
        for (int i = 0; i < nq; i++) {
            printf("  query %d: %ld neighbors found\n",
                   i,
                   rsr.lims[i + 1] - rsr.lims[i]);
            for (size_t j = rsr.lims[i]; j < rsr.lims[i + 1]; j++) {
                printf("    id=%ld, distance=%.4f\n",
                       rsr.labels[j],
                       rsr.distances[j]);
            }
        }
    }
    std::cout << "Test completed successfully!" << std::endl;
    return 0;
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions