Skip to content

Commit 40a8cc0

Browse files
Michael Norrismeta-codesync[bot]
authored andcommitted
Fix lints associated with early stop facilities for IVF (#5172)
Summary: Pull Request resolved: #5172 Follow up to D103044128 Fix some lint issues and one stats-tracking issue (and add test for it) in IVF early-stop code: - **ResultHandler.h**: `RangeResultHandler::add_result` now returns `true` when a result is actually added, fixing `nheap_updates` stat tracking through the `scan_codes_range` / `expanded_scanners.h` path. Previously always returned `false`, making `nheap_updates` a perpetual no-op for range search via the default scanner. This is inconsistent with `iterate_codes_range` and `IndexIVFSpectralHash_impl.h`, which track correctly. - **IndexIVF.cpp**: Move `effective_max_codes` declaration to its only use site, eliminating a dead-store lint warning. - **IndexIVFPQFastScan.cpp**: Remove unused `#include <faiss/utils/Heap.h>`. - **test_fast_scan_ivf.py**: Reformat trailing comment for readability. - **test_ivf_early_termination.cpp**: Add unit test verifying `RangeResultHandler::add_result` return value. Register test in BUCK (was CMake-only). Reviewed By: mdouze Differential Revision: D103438123 fbshipit-source-id: b8878975e1f280ff1590363541a100d3ae51ba0e
1 parent 1e69d5a commit 40a8cc0

5 files changed

Lines changed: 77 additions & 5 deletions

File tree

faiss/IndexIVF.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,6 @@ void IndexIVF::search_preassigned(
421421
const idx_t unlimited_list_size = std::numeric_limits<idx_t>::max();
422422
idx_t cur_max_codes = params ? params->max_codes : this->max_codes;
423423
const bool ensure_topk_full = params ? params->ensure_topk_full : false;
424-
idx_t effective_max_codes = cur_max_codes;
425424

426425
IDSelector* sel = params ? params->sel : nullptr;
427426
const IDSelectorRange* selr = dynamic_cast<const IDSelectorRange*>(sel);
@@ -466,7 +465,7 @@ void IndexIVF::search_preassigned(
466465
}
467466
// Budget used by the probe loop below. ensure_topk_full makes a small
468467
// max_codes budget large enough to give k post-filter candidates a chance.
469-
effective_max_codes =
468+
idx_t effective_max_codes =
470469
ensure_topk_full ? std::max(cur_max_codes, k) : cur_max_codes;
471470

472471
[[maybe_unused]] bool do_parallel = omp_get_max_threads() >= 2 &&

faiss/IndexIVFPQFastScan.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <faiss/impl/FaissAssert.h>
1717
#include <faiss/impl/ResultHandler.h>
1818
#include <faiss/impl/simdlib/simdlib_dispatch.h>
19-
#include <faiss/utils/Heap.h>
2019
#include <faiss/utils/distances.h>
2120
#include <faiss/utils/distances_dispatch.h>
2221
#include <faiss/utils/extra_distances.h>

faiss/impl/ResultHandler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ struct RangeResultHandler : ResultHandlerT<C> {
596596
bool add_result(T dis, TI idx) final {
597597
if (C::cmp(threshold, dis)) {
598598
qr->add(dis, idx);
599+
return true;
599600
}
600601
return false;
601602
}

tests/test_fast_scan_ivf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,8 +1074,8 @@ def test_fastscan_ensure_topk_full_fills_heap(self):
10741074
k = 10
10751075
params = faiss.SearchParametersIVF()
10761076
params.nprobe = 16
1077-
params.max_codes = k // 2 # tight: forces truncation without
1078-
# ensure_topk_full
1077+
# k // 2 max_codes forces truncation without ensure_topk_full
1078+
params.max_codes = k // 2
10791079
params.ensure_topk_full = False
10801080
_, I_tight = index.search(q, k, params=params)
10811081
params.ensure_topk_full = True

tests/test_ivf_early_termination.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <faiss/impl/AuxIndexStructures.h>
2929
#include <faiss/impl/FaissException.h>
3030
#include <faiss/impl/IDSelector.h>
31+
#include <faiss/impl/ResultHandler.h>
3132
#include <faiss/index_factory.h>
3233

3334
namespace {
@@ -504,6 +505,78 @@ TEST(IVFEarlyTermination, FastscanNheapUpdatesIsZeroToday) {
504505
"test and verify the value is correct.";
505506
}
506507

508+
// RangeResultHandler::add_result must return true when a result is added
509+
// (distance passes the threshold) so that callers in expanded_scanners.h
510+
// correctly track nheap_updates.
511+
TEST(IVFEarlyTermination, RangeResultHandlerAddResultReturnValue) {
512+
using C = faiss::CMax<float, faiss::idx_t>;
513+
514+
faiss::RangeSearchResult rsr(1);
515+
faiss::RangeSearchPartialResult pres(&rsr);
516+
auto& qr = pres.new_result(0);
517+
518+
faiss::RangeResultHandler<C> handler(&qr, /*threshold=*/5.0f);
519+
520+
// CMax::cmp(5.0, 3.0) is true: distance within radius, should add
521+
EXPECT_TRUE(handler.add_result(3.0f, 42));
522+
EXPECT_EQ(qr.nres, size_t(1));
523+
524+
// CMax::cmp(5.0, 5.0) is false: at boundary, should not add
525+
EXPECT_FALSE(handler.add_result(5.0f, 43));
526+
EXPECT_EQ(qr.nres, size_t(1));
527+
528+
// CMax::cmp(5.0, 10.0) is false: distance exceeds radius
529+
EXPECT_FALSE(handler.add_result(10.0f, 44));
530+
EXPECT_EQ(qr.nres, size_t(1));
531+
532+
// Second valid result
533+
EXPECT_TRUE(handler.add_result(1.0f, 45));
534+
EXPECT_EQ(qr.nres, size_t(2));
535+
536+
// Finalize and verify stored (dis, id) pairs
537+
pres.finalize();
538+
ASSERT_EQ(rsr.lims[0], size_t(0));
539+
ASSERT_EQ(rsr.lims[1], size_t(2));
540+
std::set<faiss::idx_t> ids(rsr.labels, rsr.labels + 2);
541+
EXPECT_EQ(ids, (std::set<faiss::idx_t>{42, 45}));
542+
std::set<float> dists(rsr.distances, rsr.distances + 2);
543+
EXPECT_EQ(dists, (std::set<float>{1.0f, 3.0f}));
544+
}
545+
546+
// Same test with CMin (inner product semantics): adds when threshold < dis.
547+
TEST(IVFEarlyTermination, RangeResultHandlerAddResultCMin) {
548+
using C = faiss::CMin<float, faiss::idx_t>;
549+
550+
faiss::RangeSearchResult rsr(1);
551+
faiss::RangeSearchPartialResult pres(&rsr);
552+
auto& qr = pres.new_result(0);
553+
554+
faiss::RangeResultHandler<C> handler(&qr, /*threshold=*/5.0f);
555+
556+
// CMin::cmp(5.0, 10.0) is true: dis above threshold, should add
557+
EXPECT_TRUE(handler.add_result(10.0f, 100));
558+
EXPECT_EQ(qr.nres, size_t(1));
559+
560+
// CMin::cmp(5.0, 5.0) is false: at boundary, should not add
561+
EXPECT_FALSE(handler.add_result(5.0f, 101));
562+
EXPECT_EQ(qr.nres, size_t(1));
563+
564+
// CMin::cmp(5.0, 3.0) is false: dis below threshold
565+
EXPECT_FALSE(handler.add_result(3.0f, 102));
566+
EXPECT_EQ(qr.nres, size_t(1));
567+
568+
// Second valid result
569+
EXPECT_TRUE(handler.add_result(7.0f, 103));
570+
EXPECT_EQ(qr.nres, size_t(2));
571+
572+
pres.finalize();
573+
ASSERT_EQ(rsr.lims[1], size_t(2));
574+
std::set<faiss::idx_t> ids(rsr.labels, rsr.labels + 2);
575+
EXPECT_EQ(ids, (std::set<faiss::idx_t>{100, 103}));
576+
std::set<float> dists(rsr.distances, rsr.distances + 2);
577+
EXPECT_EQ(dists, (std::set<float>{7.0f, 10.0f}));
578+
}
579+
507580
// FastScan early-stop options use the per-query implementations.
508581
TEST(IVFEarlyTermination, FastscanDefaultsWork) {
509582
auto xb = make_data(nb, 0xcafebabe);

0 commit comments

Comments
 (0)