Skip to content

Commit 85fc627

Browse files
scsiguymeta-codesync[bot]
authored andcommitted
Validate ProductQuantizer M*ksub during deserialization to prevent oversized allocations (#5187)
Summary: Pull Request resolved: #5187 ProductQuantizer::set_derived_values only checks d % M == 0, which is satisfied by any M when d == 0. A corrupt or maliciously constructed serialized index can therefore carry an enormous M alongside an empty centroids vector and pass read_ProductQuantizer's existing centroids validation. Downstream allocations sized M * ksub — most notably the r_norms vector in initialize_IVFPQ_precomputed_table when an IVFPQ is precomputed at deserialization time — then exceed std::vector::max_size() and raise std::length_error. In code paths invoked from noexcept callers this aborts the process via std::terminate. Two layers of protection: 1. read_ProductQuantizer now bounds M * ksub by the configurable deserialization vector byte limit, mirroring the existing d * ksub check that protects the centroids vector. This caps every PQ-derived M * ksub allocation (residual norms, search-time distance tables) at the same byte budget the caller has already chosen for vector deserialization, and rejects pathological M values up front with a FaissException rather than letting them propagate to allocation sites. 2. initialize_IVFPQ_precomputed_table now uses mul_no_overflow when computing table_size = pq.M * pq.ksub * nlist * sizeof(float). The existing precomputed_table_max_bytes guard depended on this product not wrapping size_t; with raw multiplication a sufficiently large M, ksub, or nlist silently wrapped to a small value, bypassed the guard, and proceeded to the std::vector::max_size() failure described above. Switching to mul_no_overflow makes the guard work correctly for any input that reaches it, and also defends the in-memory IVFPQ construction paths (IndexIVFPQ::train, IndexHNSW post-init, IndexIVFPQFastScan) without changing their behavior on well-formed inputs. Reviewed By: mnorris11 Differential Revision: D104129118 fbshipit-source-id: c273aecd22572d734f535df98127dbb7a437ac4b
1 parent b7618fa commit 85fc627

3 files changed

Lines changed: 124 additions & 8 deletions

File tree

faiss/IndexIVFPQ.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,9 @@ void initialize_IVFPQ_precomputed_table(
404404
return;
405405
}
406406

407+
const size_t m_ksub =
408+
mul_no_overflow(pq.M, pq.ksub, "IVFPQ precomputed_table");
409+
407410
if (use_precomputed_table == 0) { // then choose the type of table
408411
if (!(quantizer->metric_type == METRIC_L2 && by_residual)) {
409412
if (verbose) {
@@ -418,7 +421,10 @@ void initialize_IVFPQ_precomputed_table(
418421
if (miq && pq.M % miq->pq.M == 0) {
419422
use_precomputed_table = 2;
420423
} else {
421-
size_t table_size = pq.M * pq.ksub * nlist * sizeof(float);
424+
size_t table_size = mul_no_overflow(
425+
mul_no_overflow(m_ksub, nlist, "IVFPQ precomputed_table"),
426+
sizeof(float),
427+
"IVFPQ precomputed_table");
422428
if (table_size > precomputed_table_max_bytes) {
423429
if (verbose) {
424430
printf("IndexIVFPQ::precompute_table: not precomputing table, "
@@ -438,7 +444,7 @@ void initialize_IVFPQ_precomputed_table(
438444
}
439445

440446
// squared norms of the PQ centroids
441-
std::vector<float> r_norms(pq.M * pq.ksub, NAN);
447+
std::vector<float> r_norms(m_ksub, NAN);
442448
for (size_t m = 0; m < pq.M; m++) {
443449
for (size_t j = 0; j < pq.ksub; j++) {
444450
r_norms[m * pq.ksub + j] =
@@ -447,15 +453,16 @@ void initialize_IVFPQ_precomputed_table(
447453
}
448454

449455
if (use_precomputed_table == 1) {
450-
precomputed_table.resize(nlist * pq.M * pq.ksub);
456+
precomputed_table.resize(
457+
mul_no_overflow(nlist, m_ksub, "IVFPQ precomputed_table"));
451458
std::vector<float> centroid(d);
452459

453460
for (size_t i = 0; i < nlist; i++) {
454461
quantizer->reconstruct(i, centroid.data());
455462

456-
float* tab = &precomputed_table[i * pq.M * pq.ksub];
463+
float* tab = &precomputed_table[i * m_ksub];
457464
pq.compute_inner_prod_table(centroid.data(), tab);
458-
fvec_madd_dispatch(pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
465+
fvec_madd_dispatch(m_ksub, r_norms.data(), 2.0, tab, tab);
459466
}
460467
} else if (use_precomputed_table == 2) {
461468
const MultiIndexQuantizer* miq =
@@ -464,7 +471,8 @@ void initialize_IVFPQ_precomputed_table(
464471
const ProductQuantizer& cpq = miq->pq;
465472
FAISS_THROW_IF_NOT(pq.M % cpq.M == 0);
466473

467-
precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
474+
precomputed_table.resize(
475+
mul_no_overflow(cpq.ksub, m_ksub, "IVFPQ precomputed_table"));
468476

469477
// reorder PQ centroid table
470478
std::vector<float> centroids(d * cpq.ksub, NAN);
@@ -481,8 +489,8 @@ void initialize_IVFPQ_precomputed_table(
481489
cpq.ksub, centroids.data(), precomputed_table.data());
482490

483491
for (size_t i = 0; i < cpq.ksub; i++) {
484-
float* tab = &precomputed_table[i * pq.M * pq.ksub];
485-
fvec_madd_dispatch(pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
492+
float* tab = &precomputed_table[i * m_ksub];
493+
fvec_madd_dispatch(m_ksub, r_norms.data(), 2.0, tab, tab);
486494
}
487495
}
488496
}

faiss/impl/index_read.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,13 @@ void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) {
712712
FAISS_THROW_IF_NOT_MSG(
713713
n < get_deserialization_vector_byte_limit() / sizeof(float),
714714
"PQ centroids allocation would exceed deserialization byte limit");
715+
// Per-subquantizer tables (e.g. IVFPQ residual norms, search-time
716+
// distance tables) are sized M * ksub.
717+
size_t m_ksub = mul_no_overflow(pq->M, ksub, "PQ M*ksub");
718+
FAISS_THROW_IF_NOT_MSG(
719+
m_ksub <
720+
get_deserialization_vector_byte_limit() / sizeof(float),
721+
"PQ M*ksub allocation would exceed deserialization byte limit");
715722
}
716723
pq->set_derived_values();
717724
READVECTOR(pq->centroids);

tests/test_read_index_deserialize.cpp

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3798,6 +3798,107 @@ TEST(ReadIndexDeserialize, IndexLatticeCodeSizeTooLarge) {
37983798
expect_read_throws_with(buf, "code_size");
37993799
}
38003800

3801+
// -----------------------------------------------------------------------
3802+
// Test: ProductQuantizer with M*ksub exceeding the deserialization byte
3803+
// limit is rejected. ProductQuantizer::set_derived_values only validates
3804+
// d % M == 0, which is satisfied by any M when d == 0, so a crafted PQ
3805+
// header could carry an enormous M alongside an empty centroids vector.
3806+
// Without the read-side bound, downstream allocations sized M*ksub (e.g.
3807+
// IVFPQ residual norms in initialize_IVFPQ_precomputed_table) reach
3808+
// std::vector::max_size() and abort the process via std::length_error.
3809+
// -----------------------------------------------------------------------
3810+
TEST(ReadIndexDeserialize, ProductQuantizerMKsubExceedsByteLimit) {
3811+
const size_t old_limit = get_deserialization_vector_byte_limit();
3812+
set_deserialization_vector_byte_limit(1 << 20); // 1 MB
3813+
3814+
std::vector<uint8_t> buf;
3815+
push_fourcc(buf, "IxPq");
3816+
push_index_header(buf, /*d=*/0, /*ntotal=*/0);
3817+
// PQ: d=0 (so 0 % M == 0 holds), M=1<<20, nbits=8 -> ksub=256
3818+
// M * ksub = 2^28 floats = 1 GB, well above the 1 MB byte limit.
3819+
push_pq(buf,
3820+
/*d=*/0,
3821+
/*M=*/(size_t{1} << 20),
3822+
/*nbits=*/8,
3823+
/*centroids=*/{});
3824+
3825+
expect_read_throws_with(buf, "M*ksub");
3826+
3827+
set_deserialization_vector_byte_limit(old_limit);
3828+
}
3829+
3830+
// -----------------------------------------------------------------------
3831+
// Test: initialize_IVFPQ_precomputed_table rejects parameter combinations
3832+
// whose nlist * pq.M * pq.ksub * sizeof(float) computation overflows
3833+
// size_t. Without overflow-checked multiplication, the wrapped value
3834+
// silently bypasses the precomputed_table_max_bytes guard and the
3835+
// subsequent r_norms allocation aborts the process.
3836+
// -----------------------------------------------------------------------
3837+
TEST(ReadIndexDeserialize, InitializeIVFPQPrecomputedTableOverflowRejected) {
3838+
// Construct a PQ with M*ksub already saturating most of size_t. The
3839+
// nlist factor (taken from the quantizer's ntotal) then forces overflow
3840+
// when multiplied in.
3841+
ProductQuantizer pq;
3842+
pq.d = 0;
3843+
pq.M = size_t{1} << 40;
3844+
pq.nbits = 24;
3845+
pq.ksub = size_t{1} << pq.nbits;
3846+
pq.dsub = 0;
3847+
pq.code_size = 0;
3848+
// Skip set_derived_values (which would resize centroids); we are
3849+
// exercising initialize_IVFPQ_precomputed_table's arithmetic guard, not
3850+
// PQ training.
3851+
3852+
IndexFlatL2 quantizer(/*d_in=*/0);
3853+
quantizer.ntotal = size_t{1} << 20; // nlist
3854+
AlignedTable<float> precomputed_table;
3855+
int use_precomputed_table = 0;
3856+
3857+
EXPECT_THROW(
3858+
initialize_IVFPQ_precomputed_table(
3859+
use_precomputed_table,
3860+
&quantizer,
3861+
pq,
3862+
precomputed_table,
3863+
/*by_residual=*/true,
3864+
/*verbose=*/false),
3865+
faiss::FaissException);
3866+
}
3867+
3868+
// -----------------------------------------------------------------------
3869+
// Test: initialize_IVFPQ_precomputed_table rejects an overflowing M*ksub
3870+
// even when the caller has pre-set use_precomputed_table to 1, which
3871+
// bypasses the in-function size-class branch. Protects in-memory IVFPQ
3872+
// callers (training, IndexHNSW, IndexIVFPQFastScan) and the
3873+
// IndexIVFIndependentQuantizer deserialization path that reads
3874+
// use_precomputed_table directly from the stream.
3875+
// -----------------------------------------------------------------------
3876+
TEST(ReadIndexDeserialize,
3877+
InitializeIVFPQPrecomputedTableUserSetFlagOverflowRejected) {
3878+
ProductQuantizer pq;
3879+
pq.d = 0;
3880+
pq.M = size_t{1} << 40;
3881+
pq.nbits = 24;
3882+
pq.ksub = size_t{1} << pq.nbits;
3883+
pq.dsub = 0;
3884+
pq.code_size = 0;
3885+
3886+
IndexFlatL2 quantizer(/*d_in=*/0);
3887+
quantizer.ntotal = 1; // nlist
3888+
AlignedTable<float> precomputed_table;
3889+
int use_precomputed_table = 1; // caller pre-selects table type 1
3890+
3891+
EXPECT_THROW(
3892+
initialize_IVFPQ_precomputed_table(
3893+
use_precomputed_table,
3894+
&quantizer,
3895+
pq,
3896+
precomputed_table,
3897+
/*by_residual=*/true,
3898+
/*verbose=*/false),
3899+
faiss::FaissException);
3900+
}
3901+
38013902
// ============================================================
38023903
// SVS fourcc rejection / deserialization safety (Group F: T262015608)
38033904
// ============================================================

0 commit comments

Comments
 (0)