Skip to content

Commit 66853ee

Browse files
scsiguymeta-codesync[bot]
authored andcommitted
Fix NSG off-by-one neighbor ID check (#4804)
Summary: Pull Request resolved: #4804 Valid neighbor indexes are in the range of [0, ntotal). Fix range check in NSG::search_on_graph() that accepted an index value of ntotal. NOTE: This data structure uses -1 as a termninal sentinal. The call to nsg::Graph::get_neighgbors() above the for loop that includes the upper bound check stops when encountering a negative index. This means the check for negative values has already occurred and does not need to be repeated inside the loop. Reviewed By: junjieqi, sh00s Differential Revision: D93040548 fbshipit-source-id: e1bde990a459c86dfaebdd2f780375b7922fc5f5
1 parent ac3bf46 commit 66853ee

2 files changed

Lines changed: 65 additions & 1 deletion

File tree

faiss/impl/NSG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ void NSG::search_on_graph(
304304
size_t nneigh_for_n = graph.get_neighbors(n, neighbors.data());
305305
for (int m = 0; m < nneigh_for_n; m++) {
306306
int id = neighbors[m];
307-
if (id > ntotal || vt.get(id)) {
307+
if (id >= ntotal || vt.get(id)) {
308308
continue;
309309
}
310310
vt.set(id);

tests/test_io_corrupted.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#include <gtest/gtest.h>
9+
10+
#include <cstring>
11+
#include <memory>
12+
#include <vector>
13+
14+
#include <faiss/Index.h>
15+
#include <faiss/IndexFlat.h>
16+
#include <faiss/IndexNSG.h>
17+
18+
#include <faiss/utils/random.h>
19+
20+
using namespace faiss;
21+
22+
TEST(IoCorrupted, NSGNeighborIdEqualsNtotal) {
23+
// Build an NSG index
24+
int d = 8, nb = 500;
25+
std::vector<float> xb(nb * d);
26+
faiss::rand_smooth_vectors(nb, d, xb.data(), 1234);
27+
28+
IndexNSGFlat index(d, 16);
29+
index.add(nb, xb.data());
30+
31+
// Verify that NSG::search_on_graph() skips corrupt neighbor entries
32+
// that would trigger access outside/above the bounds of the index by
33+
// setting the LAST valid neighbor of every node to ntotal.
34+
// get_neighbors returns neighbors until it hits a -1 sentinel,
35+
// so setting the last non-sentinel entry preserves graph connectivity
36+
// while ensuring the inner search loop encounters id == ntotal.
37+
auto* graph = index.nsg.final_graph.get();
38+
for (int i = 0; i < nb; i++) {
39+
// Find the last valid (non-sentinel) neighbor slot
40+
int last = -1;
41+
for (int j = 0; j < graph->K; j++) {
42+
if (graph->at(i, j) >= 0) {
43+
last = j;
44+
} else {
45+
break;
46+
}
47+
}
48+
if (last >= 0) {
49+
graph->at(i, last) = nb; // inject ntotal
50+
}
51+
}
52+
53+
std::vector<float> xq(d);
54+
faiss::rand_smooth_vectors(1, d, xq.data(), 5678);
55+
std::vector<idx_t> I(1);
56+
std::vector<float> D(1);
57+
58+
// Safely skipped without an exception or ASAN failure.
59+
EXPECT_NO_THROW(index.search(1, xq.data(), 1, D.data(), I.data()));
60+
61+
// Result must be a valid node ID
62+
EXPECT_GE(I[0], 0);
63+
EXPECT_LT(I[0], nb);
64+
}

0 commit comments

Comments
 (0)