Add QT_0bit to ScalarQuantizer for centroid-only IVF distance (facebookresearch#5079)

Michael Norris · facebook-github-bot · commit 6b9279c2f220 · 2026-04-10T21:43:29.000-07:00
Summary:

Adds a new ScalarQuantizer::QT_0bit type that encodes 0 bits per component, enabling IndexIVFScalarQuantizer to operate in centroid-only distance mode (code_size=0). In this mode, distance_to_code() returns the coarse distance from the quantizer, no per-vector data is stored, and reconstruction returns the centroid vector.

This is useful for IVF configurations where only query-to-centroid distances are needed (e.g., Unicorn's SQ0 use case).

Changes:
- Add QT_0bit enum to ScalarQuantizer::QuantizerType
- Add IVFCoarseDistanceScanner that returns coarse_dis from set_list()
- Handle code_size=0 in encode/decode/reconstruct/add paths
- Add 'SQ0' to index_factory
- Force by_residual=false for QT_0bit
- Guard memcpy in ArrayInvertedLists for code_size=0
- Handle QT_0bit in index_read.cpp validation switch
- Add tests for L2, IP, and index_factory
---

Differential Revision: D100348052
diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp
@@ -133,6 +133,10 @@ IndexIVFScalarQuantizer::IndexIVFScalarQuantizer(
         invlists->code_size = code_size;
     }
     is_trained = false;
+    if (qtype == ScalarQuantizer::QT_0bit) {
+        by_residual = false;
+        is_trained = true; // no training needed
+    }
 }
 
 IndexIVFScalarQuantizer::IndexIVFScalarQuantizer() : IndexIVF() {
@@ -156,6 +160,19 @@ void IndexIVFScalarQuantizer::encode_vectors(
         const idx_t* list_nos,
         uint8_t* codes,
         bool include_listnos) const {
+    if (sq.code_size == 0) {
+        // QT_0bit: nothing to encode, but handle coarse codes if needed
+        if (include_listnos) {
+            size_t coarse_size = coarse_code_size();
+            for (idx_t i = 0; i < n; i++) {
+                int64_t list_no = list_nos[i];
+                if (list_no >= 0) {
+                    encode_listno(list_no, codes + i * coarse_size);
+                }
+            }
+        }
+        return;
+    }
     std::unique_ptr<ScalarQuantizer::SQuantizer> squant(sq.select_quantizer());
     size_t coarse_size = include_listnos ? coarse_code_size() : 0;
     memset(codes, 0, (code_size + coarse_size) * n);
@@ -186,14 +203,46 @@ void IndexIVFScalarQuantizer::encode_vectors(
 void IndexIVFScalarQuantizer::decode_vectors(
         idx_t n,
         const uint8_t* codes,
-        const idx_t*,
+        const idx_t* list_nos,
         float* x) const {
+    if (sq.code_size == 0) {
+        // QT_0bit: reconstruct centroids if list_nos provided
+        if (list_nos) {
+            for (idx_t i = 0; i < n; i++) {
+                quantizer->reconstruct(list_nos[i], x + i * d);
+            }
+        } else {
+            memset(x, 0, sizeof(float) * d * n);
+        }
+        return;
+    }
     FAISS_THROW_IF_NOT(is_trained);
-    return sq.decode(codes, x, n);
+    sq.decode(codes, x, n);
+    if (by_residual) {
+        FAISS_THROW_IF_NOT_MSG(
+                list_nos, "decode_vectors with by_residual requires list_nos");
+#pragma omp parallel for if (n > 1000)
+        for (idx_t i = 0; i < n; i++) {
+            std::vector<float> centroid(d);
+            quantizer->reconstruct(list_nos[i], centroid.data());
+            for (size_t j = 0; j < static_cast<size_t>(d); j++) {
+                x[i * d + j] += centroid[j];
+            }
+        }
+    }
 }
 
 void IndexIVFScalarQuantizer::sa_decode(idx_t n, const uint8_t* codes, float* x)
         const {
+    if (sq.code_size == 0) {
+        size_t coarse_size = coarse_code_size();
+        for (idx_t i = 0; i < n; i++) {
+            const uint8_t* code = codes + i * coarse_size;
+            int64_t list_no = decode_listno(code);
+            quantizer->reconstruct(list_no, x + i * d);
+        }
+        return;
+    }
     std::unique_ptr<ScalarQuantizer::SQuantizer> squant(sq.select_quantizer());
     size_t coarse_size = coarse_code_size();
 
@@ -224,6 +273,23 @@ void IndexIVFScalarQuantizer::add_core(
         const idx_t* coarse_idx,
         void* inverted_list_context) {
     FAISS_THROW_IF_NOT(is_trained);
+    if (sq.code_size == 0) {
+        // QT_0bit: just add IDs with empty codes
+        uint8_t dummy_code = 0;
+        DirectMapAdd dm_add(direct_map, n, xids);
+        for (idx_t i = 0; i < n; i++) {
+            int64_t list_no = coarse_idx[i];
+            if (list_no >= 0) {
+                int64_t id = xids ? xids[i] : ntotal + i;
+                size_t ofs = invlists->add_entry(list_no, id, &dummy_code);
+                dm_add.add(i, list_no, ofs);
+            } else {
+                dm_add.add(i, -1, 0);
+            }
+        }
+        ntotal += n;
+        return;
+    }
 
     std::unique_ptr<ScalarQuantizer::SQuantizer> squant(sq.select_quantizer());
 
@@ -277,6 +343,11 @@ void IndexIVFScalarQuantizer::reconstruct_from_offset(
         int64_t list_no,
         int64_t offset,
         float* recons) const {
+    if (sq.code_size == 0) {
+        // QT_0bit: reconstruct from centroid
+        quantizer->reconstruct(list_no, recons);
+        return;
+    }
     const uint8_t* code = invlists->get_single_code(list_no, offset);
 
     if (by_residual) {
diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp
@@ -62,6 +62,10 @@ void ScalarQuantizer::set_derived_sizes() {
             code_size = d * 2;
             bits = 16;
             break;
+        case QT_0bit:
+            code_size = 0;
+            bits = 0;
+            break;
         default:
             break;
     }
@@ -71,6 +75,10 @@ void ScalarQuantizer::train(size_t n, const float* x) {
     using scalar_quantizer::train_NonUniform;
     using scalar_quantizer::train_Uniform;
 
+    if (qtype == QT_0bit) {
+        return; // nothing to train for centroid-only mode
+    }
+
     int bit_per_dim = qtype == QT_4bit_uniform ? 4
             : qtype == QT_4bit                 ? 4
             : qtype == QT_6bit                 ? 6
@@ -128,6 +136,9 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
 
 void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
         const {
+    if (code_size == 0) {
+        return; // QT_0bit: nothing to encode
+    }
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 
     memset(codes, 0, code_size * n);
@@ -138,6 +149,10 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
 }
 
 void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
+    if (code_size == 0) {
+        memset(x, 0, sizeof(float) * d * n);
+        return; // QT_0bit: no per-vector data, zero-fill
+    }
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 
 #pragma omp parallel for
diff --git a/faiss/impl/ScalarQuantizer.h b/faiss/impl/ScalarQuantizer.h
@@ -33,6 +33,7 @@ struct ScalarQuantizer : Quantizer {
         QT_bf16,
         QT_8bit_direct_signed, ///< fast indexing of signed int8s ranging from
                                ///< [-128 to 127]
+        QT_0bit, ///< 0 bits per component, centroid-only distance (for IVF)
         QT_count
     };
 
diff --git a/faiss/impl/index_read.cpp b/faiss/impl/index_read.cpp
@@ -903,6 +903,7 @@ void read_ScalarQuantizer(
             case ScalarQuantizer::QT_bf16:
             case ScalarQuantizer::QT_8bit_direct:
             case ScalarQuantizer::QT_8bit_direct_signed:
+            case ScalarQuantizer::QT_0bit:
             case ScalarQuantizer::QT_count:
                 expected = 0;
                 break;
diff --git a/faiss/impl/scalar_quantizer/scanners.h b/faiss/impl/scalar_quantizer/scanners.h
@@ -159,6 +159,32 @@ InvertedListScanner* sq_select_InvertedListScanner(
         const IDSelector* sel,
         bool by_residual);
 
+/// Scanner for QT_0bit / centroid-only distance: always returns the
+/// coarse distance that was set via set_list().
+struct IVFCoarseDistanceScanner : InvertedListScanner {
+    float coarse_dis = 0;
+
+    IVFCoarseDistanceScanner(
+            bool is_similarity,
+            bool store_pairs,
+            const IDSelector* sel)
+            : InvertedListScanner(store_pairs, sel) {
+        code_size = 0;
+        keep_max = is_similarity;
+    }
+
+    void set_query(const float* /*query_vector*/) override {}
+
+    void set_list(idx_t list_no_in, float coarse_dis_in) override {
+        this->list_no = list_no_in;
+        this->coarse_dis = coarse_dis_in;
+    }
+
+    float distance_to_code(const uint8_t* /*code*/) const override {
+        return coarse_dis;
+    }
+};
+
 } // namespace scalar_quantizer
 
 } // namespace faiss
diff --git a/faiss/impl/scalar_quantizer/sq-dispatch.h b/faiss/impl/scalar_quantizer/sq-dispatch.h
@@ -85,6 +85,9 @@ ScalarQuantizer::SQuantizer* sq_select_quantizer<THE_LEVEL_TO_DISPATCH>(
             return new Quantizer8bitDirect<SL>(d, trained);
         case ScalarQuantizer::QT_8bit_direct_signed:
             return new Quantizer8bitDirectSigned<SL>(d, trained);
+        case ScalarQuantizer::QT_0bit:
+            FAISS_THROW_MSG(
+                    "QT_0bit does not support standalone quantization, use IndexIVFScalarQuantizer");
         default:
             FAISS_THROW_MSG("unknown qtype");
     }
@@ -175,6 +178,9 @@ SQDistanceComputer* select_distance_computer_body(
         case ScalarQuantizer::QT_8bit_direct_signed:
             return new DCTemplate<Quantizer8bitDirectSigned<SL2>, Sim, SL2>(
                     d, trained);
+        case ScalarQuantizer::QT_0bit:
+            FAISS_THROW_MSG(
+                    "QT_0bit does not support standalone distance computation, use IndexIVFScalarQuantizer");
         default:
             FAISS_THROW_MSG("unknown qtype");
     }
@@ -309,6 +315,9 @@ InvertedListScanner* sq_select_InvertedListScanner<THE_LEVEL_TO_DISPATCH>(
                         Quantizer8bitDirectSigned<SL2>,
                         Similarity,
                         SL2>>();
+            case ScalarQuantizer::QT_0bit:
+                return new IVFCoarseDistanceScanner(
+                        Similarity::metric_type != METRIC_L2, store_pairs, sel);
             default:
                 FAISS_THROW_MSG("unknown qtype");
         }
diff --git a/faiss/index_factory.cpp b/faiss/index_factory.cpp
@@ -154,9 +154,10 @@ std::map<std::string, ScalarQuantizer::QuantizerType> sq_types = {
         {"SQbf16", ScalarQuantizer::QT_bf16},
         {"SQ8_direct_signed", ScalarQuantizer::QT_8bit_direct_signed},
         {"SQ8_direct", ScalarQuantizer::QT_8bit_direct},
+        {"SQ0", ScalarQuantizer::QT_0bit},
 };
 const std::string sq_pattern =
-        "(SQ4|SQ8|SQ6|SQfp16|SQbf16|SQ8_direct_signed|SQ8_direct)";
+        "(SQ0|SQ4|SQ8|SQ6|SQfp16|SQbf16|SQ8_direct_signed|SQ8_direct)";
 
 std::map<std::string, AdditiveQuantizer::Search_type_t> aq_search_type = {
         {"_Nfloat", AdditiveQuantizer::ST_norm_float},
diff --git a/faiss/invlists/InvertedLists.cpp b/faiss/invlists/InvertedLists.cpp
@@ -289,7 +289,9 @@ size_t ArrayInvertedLists::add_entries(
     ids[list_no].resize(o + n_entry);
     memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
     codes[list_no].resize((o + n_entry) * code_size);
-    memcpy(&codes[list_no][o * code_size], code, code_size * n_entry);
+    if (code_size > 0) {
+        memcpy(&codes[list_no][o * code_size], code, code_size * n_entry);
+    }
     return o;
 }
 
@@ -328,7 +330,11 @@ void ArrayInvertedLists::update_entries(
     assert(list_no < nlist);
     assert(n_entry + offset <= ids[list_no].size());
     memcpy(&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
-    memcpy(&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
+    if (code_size > 0) {
+        memcpy(&codes[list_no][offset * code_size],
+               codes_in,
+               code_size * n_entry);
+    }
 }
 
 void ArrayInvertedLists::permute_invlists(const idx_t* map) {
diff --git a/tests/test_scalar_quantizer.cpp b/tests/test_scalar_quantizer.cpp
@@ -7,9 +7,15 @@
 
 #include <gtest/gtest.h>
 
+#include <cmath>
+#include <memory>
 #include <vector>
 
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/IndexScalarQuantizer.h>
 #include <faiss/impl/ScalarQuantizer.h>
+#include <faiss/index_factory.h>
 
 TEST(ScalarQuantizer, RSQuantilesClamping) {
     int d = 8;
@@ -95,3 +101,107 @@ TEST(ScalarQuantizer, RSQuantilesSmallDataset) {
 
     ASSERT_NO_THROW(sq.train(n, x.data()));
 }
+
+TEST(TestSQ0bit, CoarseOnlySearch) {
+    // Test QT_0bit: centroid-only distance
+    int d = 64;
+    int nlist = 8;
+    int nb = 1000;
+    int nq = 10;
+    int k = 5;
+
+    std::vector<float> xb(nb * d), xq(nq * d);
+    for (int i = 0; i < nb * d; i++)
+        xb[i] = drand48();
+    for (int i = 0; i < nq * d; i++)
+        xq[i] = drand48();
+
+    faiss::IndexFlatL2 quantizer(d);
+    faiss::IndexIVFScalarQuantizer index(
+            &quantizer,
+            d,
+            nlist,
+            faiss::ScalarQuantizer::QT_0bit,
+            faiss::METRIC_L2,
+            false);
+    EXPECT_EQ(index.code_size, 0);
+    EXPECT_FALSE(index.by_residual);
+
+    index.train(nb, xb.data());
+    index.add(nb, xb.data());
+    EXPECT_EQ(index.ntotal, nb);
+
+    index.nprobe = nlist;
+    std::vector<float> distances(nq * k);
+    std::vector<faiss::idx_t> labels(nq * k);
+    index.search(nq, xq.data(), k, distances.data(), labels.data());
+
+    // Verify we got results
+    for (int q = 0; q < nq; q++) {
+        EXPECT_GE(labels[q * k], 0);
+    }
+
+    // Compare with direct quantizer search - distances should match
+    std::vector<float> coarse_dis(nq * nlist);
+    std::vector<faiss::idx_t> coarse_ids(nq * nlist);
+    quantizer.search(
+            nq, xq.data(), nlist, coarse_dis.data(), coarse_ids.data());
+
+    for (int q = 0; q < nq; q++) {
+        float ivf_dis = distances[q * k];
+        bool found = false;
+        for (int j = 0; j < nlist; j++) {
+            if (std::abs(ivf_dis - coarse_dis[q * nlist + j]) < 1e-5) {
+                found = true;
+                break;
+            }
+        }
+        EXPECT_TRUE(found) << "IVF distance " << ivf_dis
+                           << " not found in coarse distances for query " << q;
+    }
+}
+
+TEST(TestSQ0bit, IndexFactory) {
+    int d = 32;
+    std::unique_ptr<faiss::Index> index(faiss::index_factory(d, "IVF8,SQ0"));
+    EXPECT_NE(index, nullptr);
+    auto* ivfsq = dynamic_cast<faiss::IndexIVFScalarQuantizer*>(index.get());
+    EXPECT_NE(ivfsq, nullptr);
+    EXPECT_EQ(ivfsq->sq.qtype, faiss::ScalarQuantizer::QT_0bit);
+    EXPECT_EQ(ivfsq->code_size, 0);
+}
+
+TEST(TestSQ0bit, InnerProduct) {
+    int d = 64;
+    int nlist = 4;
+    int nb = 500;
+    int nq = 5;
+    int k = 3;
+
+    std::vector<float> xb(nb * d), xq(nq * d);
+    for (int i = 0; i < nb * d; i++)
+        xb[i] = drand48();
+    for (int i = 0; i < nq * d; i++)
+        xq[i] = drand48();
+
+    faiss::IndexFlatIP quantizer(d);
+    faiss::IndexIVFScalarQuantizer index(
+            &quantizer,
+            d,
+            nlist,
+            faiss::ScalarQuantizer::QT_0bit,
+            faiss::METRIC_INNER_PRODUCT,
+            false);
+
+    index.train(nb, xb.data());
+    index.add(nb, xb.data());
+
+    index.nprobe = nlist;
+    std::vector<float> distances(nq * k);
+    std::vector<faiss::idx_t> labels(nq * k);
+    index.search(nq, xq.data(), k, distances.data(), labels.data());
+
+    for (int q = 0; q < nq; q++) {
+        EXPECT_GE(labels[q * k], 0);
+    }
+}