Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions faiss/IndexFlat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ inline void flat_pano_search_core(
{
SingleResultHandler res(handler);

std::vector<float> query_cum_norms(index.n_levels + 1);
std::vector<float> query_cum_norms(index.pano.n_levels + 1);
std::vector<uint32_t> active_indices(index.batch_size);
std::vector<uint8_t> active_byteset(index.batch_size);
std::vector<float> exact_distances(index.batch_size);
Expand Down Expand Up @@ -698,7 +698,7 @@ void IndexFlatPanorama::add(idx_t n, const float* x) {
size_t num_batches = (ntotal + batch_size - 1) / batch_size;

codes.resize(num_batches * batch_size * code_size);
cum_sums.resize(num_batches * batch_size * (n_levels + 1));
cum_sums.resize(num_batches * batch_size * (pano.n_levels + 1));

const uint8_t* code = reinterpret_cast<const uint8_t*>(x);
pano.copy_codes_to_level_layout(codes.data(), offset, n, code);
Expand Down Expand Up @@ -771,7 +771,7 @@ size_t IndexFlatPanorama::remove_ids(const IDSelector& sel) {
ntotal = j;
size_t num_batches = (ntotal + batch_size - 1) / batch_size;
codes.resize(num_batches * batch_size * code_size);
cum_sums.resize(num_batches * batch_size * (n_levels + 1));
cum_sums.resize(num_batches * batch_size * (pano.n_levels + 1));
}
return nremove;
}
Expand Down Expand Up @@ -843,7 +843,7 @@ void IndexFlatPanorama::search_subset(
{
SingleResultHandler res(handler);

std::vector<float> query_cum_norms(n_levels + 1);
std::vector<float> query_cum_norms(pano.n_levels + 1);

// Panorama's optimized point-wise refinement (Algorithm 2):
// Batch-wise Panorama, as implemented in Panorama.h, incurs
Expand Down Expand Up @@ -881,7 +881,7 @@ void IndexFlatPanorama::search_subset(
continue;
}

size_t cum_sum_offset = (n_levels + 1) * idx;
size_t cum_sum_offset = (pano.n_levels + 1) * idx;
float cum_sum = cum_sums[cum_sum_offset];
float exact_distance = 0.0f;
if constexpr (!is_sim) {
Expand All @@ -897,7 +897,7 @@ void IndexFlatPanorama::search_subset(
local_stats.total_dims += d;

bool pruned = false;
for (size_t level = 0; level < n_levels; level++) {
for (size_t level = 0; level < pano.n_levels; level++) {
local_stats.total_dims_scanned +=
pano.level_width_floats;

Expand Down
5 changes: 4 additions & 1 deletion faiss/IndexHNSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,10 @@ IndexHNSWFlat::IndexHNSWFlat(int d_in, int M, MetricType metric)
**************************************************************/

IndexHNSWFlatPanorama::IndexHNSWFlatPanorama()
: IndexHNSWFlat(), cum_sums(), pano(0, 1, 1), num_panorama_levels(0) {}
: IndexHNSWFlat(),
cum_sums(),
pano(sizeof(float), 1, 1),
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed so we don't have a division by zero when we do size_t n_real_levels = d / level_width_floats.

num_panorama_levels(0) {}

IndexHNSWFlatPanorama::IndexHNSWFlatPanorama(
int d_in,
Expand Down
20 changes: 16 additions & 4 deletions faiss/impl/Panorama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,20 @@ Panorama::Panorama(
void Panorama::set_derived_values() {
FAISS_THROW_IF_NOT_MSG(n_levels > 0, "Panorama: n_levels must be > 0");
this->d = code_size / sizeof(float);
FAISS_THROW_IF_NOT_MSG(n_levels <= d, "Panorama: n_levels must be <= d");
this->level_width_floats = ((d + n_levels - 1) / n_levels);
this->level_width = this->level_width_floats * sizeof(float);
size_t n_real_levels = d / level_width_floats;
if (d > n_real_levels * level_width_floats) {
n_real_levels++;
}
if (this->n_levels != n_real_levels) {
fprintf(stderr,
"WARNING truncating nlevels from %zu to %zu\n",
this->n_levels,
n_real_levels);
this->n_levels = n_real_levels;
}
}

/**
Expand Down Expand Up @@ -151,12 +163,12 @@ void Panorama::reconstruct(idx_t key, float* recons, const uint8_t* codes_base)

for (size_t level = 0; level < n_levels; level++) {
size_t level_offset = level * level_width * batch_size;
size_t actual_level_width =
std::min(level_width, code_size - level * level_width);
const uint8_t* src = codes_base + batch_offset + level_offset +
pos_in_batch * level_width;
pos_in_batch * actual_level_width;
uint8_t* dest = recons_buffer + level * level_width;
size_t copy_size =
std::min(level_width, code_size - level * level_width);
memcpy(dest, src, copy_size);
memcpy(dest, src, actual_level_width);
}
}

Expand Down
10 changes: 5 additions & 5 deletions tests/test_flat_panorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def test_add_search_add_search(self):

def test_reconstruct(self):
"""Test reconstruct and reconstruct_n return original vectors"""
d, nb, nt, nq, nlevels = 128, 10000, 15000, 10, 8
d, nb, nt, nq, nlevels = 964, 1000, 15000, 10, 128
_, xb, _ = self.generate_data(d, nt, nb, nq, seed=2025)

for metric in self.METRICS:
Expand All @@ -558,7 +558,7 @@ def test_reconstruct(self):

def test_remove_ids_then_add(self):
"""Test removing vectors with remove_ids() then adding more vectors"""
d, nb, nt, nq, nlevels, k = 128, 500000, 0, 10, 9, 15
d, nb, nt, nq, nlevels, k = 964, 50000, 0, 10, 128, 15
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=2026)

xb1 = xb[:nb // 2]
Expand Down Expand Up @@ -603,7 +603,7 @@ def test_remove_ids_then_add(self):

def test_merge_from(self):
"""Test merging indexes with merge_from()"""
d, nb, nt, nq, nlevels, k, batch_size = 128, 500000, 0, 10, 9, 15, 16
d, nb, nt, nq, nlevels, k, batch_size = 964, 50000, 0, 10, 128, 15, 16
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=2027)

# Split data and create two separate indexes
Expand Down Expand Up @@ -637,7 +637,7 @@ def test_merge_from(self):

def test_permute_entries(self):
"""Test permuting entries with permute_entries()"""
d, nb, nt, nq, nlevels, k = 128, 500000, 0, 10, 8, 15
d, nb, nt, nq, nlevels, k = 964, 50000, 0, 20, 128, 10
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=2028)

for metric in self.METRICS:
Expand All @@ -664,7 +664,7 @@ def test_permute_entries(self):

def test_serialization(self):
"""Test write/read Panorama indexes preserves search results"""
d, nb, nt, nq, nlevels, k = 128, 10000, 15000, 100, 8, 20
d, nb, nt, nq, nlevels, k = 964, 10000, 15000, 100, 128, 20
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=2024)

for metric in self.METRICS:
Expand Down
32 changes: 29 additions & 3 deletions tests/test_hnsw_panorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,31 @@ def test_different_panorama_levels(self):
# More levels should still maintain reasonable recall
self.assertGreaterEqual(recall, 0.80)

def test_uneven_dimension_division(self):
"""Test when n_levels doesn't evenly divide dimension."""
test_cases = [(65, 4), (63, 8), (100, 7), (960, 128), (964, 128)]

nb, nt, nq, k = 1000, 700, 20, 5

for d, nlevels in test_cases:
with self.subTest(d=d, nlevels=nlevels):
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=789)

index = faiss.IndexHNSWFlatPanorama(d, 32, nlevels)
index.hnsw.efSearch = 64
index.add(xb)

_, I = index.search(xq, k)

_, gt_I = self.compute_ground_truth(xb, xq, k)

recall = self.compute_recall(gt_I, I)
self.assertGreaterEqual(
recall, 0.80,
f"Recall too low for d={d}, nlevels={nlevels}: "
f"{recall:.4f}",
)

def test_consistency(self):
"""Test that search results are consistent across multiple searches."""
d = 64
Expand All @@ -180,16 +205,17 @@ def test_consistency(self):

def test_io(self):
"""Test serialization and deserialization."""
d = 64
d = 964
nb = 500
nt = 700
nq = 10
k = 5
nlevels = 128

# Generate data
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=2024)

index = faiss.IndexHNSWFlatPanorama(d, 16, 8)
index = faiss.IndexHNSWFlatPanorama(d, 16, nlevels)
index.add(xb)

# Get search results before saving
Expand All @@ -204,7 +230,7 @@ def test_io(self):
self.assertIsInstance(loaded_index, faiss.IndexHNSWFlatPanorama)
self.assertEqual(loaded_index.d, d)
self.assertEqual(loaded_index.ntotal, nb)
self.assertEqual(loaded_index.num_panorama_levels, 8)
self.assertEqual(loaded_index.num_panorama_levels, nlevels)

# Search after loading
D_after, I_after = loaded_index.search(xq, k)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ivf_flat_panorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def test_different_n_levels(self):

def test_uneven_dimension_division(self):
"""Test when n_levels doesn't evenly divide dimension"""
test_cases = [(65, 4), (63, 8), (100, 7)]
test_cases = [(65, 4), (63, 8), (100, 7), (960, 128), (964, 128)]

# TODO(aknayar): Test functions like get_single_code().

Expand Down
2 changes: 1 addition & 1 deletion tests/test_refine_panorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def test_different_n_levels(self):

def test_uneven_dimension_division(self):
"""Test when n_levels doesn't evenly divide dimension"""
test_cases = [(65, 4), (63, 8), (100, 7)]
test_cases = [(65, 4), (63, 8), (100, 7), (960, 128), (964, 128)]

for metric in self.METRICS:
for d, nlevels in test_cases:
Expand Down
Loading