Fix d_out check bug and add descriptive error messages to VectorTransform assertions (#5047)

alibeklfc · meta-codesync[bot] · commit 84262f09d4d0 · 2026-04-06T13:29:46.000-07:00
Summary: Pull Request resolved: #5047 ## Summary This diff fixes a bug and improves error message quality in `VectorTransform.cpp`. ### Bug Fix (line 153) `VectorTransform::check_identical()` had a copy-paste bug where `d_in` was checked twice and `d_out` was never checked: ```cpp // Before (buggy): FAISS_THROW_IF_NOT(other.d_in == d_in && other.d_in == d_in); // After (fixed): FAISS_THROW_IF_NOT_MSG( other.d_in == d_in && other.d_out == d_out, "input and output dimensions must match"); ``` This meant two VectorTransforms with matching `d_in` but different `d_out` would incorrectly pass the identity check. This could lead to subtle bugs when comparing or serializing transform chains (e.g., in `IndexPreTransform`). ### Error Message Improvements All 28 bare `FAISS_THROW_IF_NOT()` calls in `VectorTransform.cpp` have been converted to `FAISS_THROW_IF_NOT_MSG()` with clear, actionable error messages. Previously, assertion failures would only show the raw C++ condition (e.g., `"Error: 'p > 0' failed"`), which is unhelpful for users. Now each assertion provides semantic context: - **Dynamic cast failures**: `"failed to cast to HadamardRotation"` instead of `"hr"` - **Dimension mismatches**: `"input and output dimensions must match when PCA is disabled"` instead of `"din == dout"` - **Training state errors**: `"CenteringTransform has not been trained"` instead of `"is_trained"` - **LAPACK errors**: `"LAPACK dgesvd workspace query failed"` instead of `"info == 0"` - **Parameter validation**: `"map entries must be -1 (unused) or valid input dimension indices"` instead of raw condition ### Affected classes - `VectorTransform` (base class) - `LinearTransform` - `HadamardRotation` - `PCAMatrix` - `ITQMatrix` - `ITQTransform` - `OPQMatrix` - `NormalizationTransform` - `CenteringTransform` - `RemapDimensionsTransform` ### Design decisions - Used `FAISS_THROW_IF_NOT_MSG` (not `FAISS_THROW_IF_NOT_FMT`) since all messages are static strings — no runtime formatting needed, keeping zero overhead. - Error messages follow existing Faiss patterns seen in `index_read.cpp` and other files. - Each message describes the semantic meaning of the condition, not just the code. Reviewed By: mnorris11 Differential Revision: D99674067 fbshipit-source-id: cf0fe9a8a7f047013011683d76221682d97beb6c
diff --git a/faiss/VectorTransform.cpp b/faiss/VectorTransform.cpp
@@ -150,7 +150,9 @@ void VectorTransform::reverse_transform(idx_t, const float*, float*) const {
 }
 
 void VectorTransform::check_identical(const VectorTransform& other) const {
-    FAISS_THROW_IF_NOT(other.d_in == d_in && other.d_in == d_in);
+    FAISS_THROW_IF_NOT_MSG(
+            other.d_in == d_in && other.d_out == d_out,
+            "transforms must have matching d_in and d_out");
 }
 
 /*********************************************
@@ -303,7 +305,9 @@ void LinearTransform::print_if_verbose(
     if (!verbose)
         return;
     printf("matrix %s: %d*%d [\n", name, n, d);
-    FAISS_THROW_IF_NOT(mat.size() >= static_cast<size_t>(n) * d);
+    FAISS_THROW_IF_NOT_MSG(
+            mat.size() >= static_cast<size_t>(n) * d,
+            "matrix size is too small for the given dimensions");
     for (int i = 0; i < n; i++) {
         for (int j = 0; j < d; j++) {
             printf("%10.5g ", mat[i * d + j]);
@@ -316,8 +320,10 @@ void LinearTransform::print_if_verbose(
 void LinearTransform::check_identical(const VectorTransform& other_in) const {
     VectorTransform::check_identical(other_in);
     auto other = dynamic_cast<const LinearTransform*>(&other_in);
-    FAISS_THROW_IF_NOT(other);
-    FAISS_THROW_IF_NOT(other->A == A && other->b == b);
+    FAISS_THROW_IF_NOT_MSG(other, "failed to cast to LinearTransform");
+    FAISS_THROW_IF_NOT_MSG(
+            other->A == A && other->b == b,
+            "LinearTransform matrix A and bias vector b must match");
 }
 
 /*********************************************
@@ -390,7 +396,8 @@ static void generate_signs(
         std::vector<float>& s1,
         std::vector<float>& s2,
         std::vector<float>& s3) {
-    FAISS_THROW_IF_NOT(p > 0);
+    FAISS_THROW_IF_NOT_MSG(
+            p > 0, "number of Hadamard factors p must be positive");
     SplitMix64RandomGenerator rng(seed);
     s1.resize(p);
     s2.resize(p);
@@ -426,9 +433,15 @@ void HadamardRotation::apply_noalloc(idx_t n, const float* x, float* xt) const {
 
     size_t d = d_in;
     size_t p = d_out;
-    FAISS_THROW_IF_NOT(signs1.size() == p);
-    FAISS_THROW_IF_NOT(signs2.size() == p);
-    FAISS_THROW_IF_NOT(signs3.size() == p);
+    FAISS_THROW_IF_NOT_MSG(
+            signs1.size() == p,
+            "sign-flip vector 1 size must match output dimension");
+    FAISS_THROW_IF_NOT_MSG(
+            signs2.size() == p,
+            "sign-flip vector 2 size must match output dimension");
+    FAISS_THROW_IF_NOT_MSG(
+            signs3.size() == p,
+            "sign-flip vector 3 size must match output dimension");
 
     // Each unnormalized FWHT scales norms by sqrt(p).
     // Three rounds scale by p^(3/2). Normalize once at the end.
@@ -468,10 +481,14 @@ void HadamardRotation::apply_noalloc(idx_t n, const float* x, float* xt) const {
 
 void HadamardRotation::check_identical(const VectorTransform& other) const {
     auto* hr = dynamic_cast<const HadamardRotation*>(&other);
-    FAISS_THROW_IF_NOT(hr);
-    FAISS_THROW_IF_NOT(d_in == hr->d_in);
-    FAISS_THROW_IF_NOT(d_out == hr->d_out);
-    FAISS_THROW_IF_NOT(seed == hr->seed);
+    FAISS_THROW_IF_NOT_MSG(hr, "failed to cast to HadamardRotation");
+    FAISS_THROW_IF_NOT_MSG(
+            d_in == hr->d_in, "HadamardRotation input dimensions must match");
+    FAISS_THROW_IF_NOT_MSG(
+            d_out == hr->d_out,
+            "HadamardRotation output dimensions must match");
+    FAISS_THROW_IF_NOT_MSG(
+            seed == hr->seed, "HadamardRotation seeds must match");
 }
 
 /*********************************************
@@ -731,7 +748,9 @@ void PCAMatrix::train(idx_t n, const float* x_in) {
 }
 
 void PCAMatrix::copy_from(const PCAMatrix& other) {
-    FAISS_THROW_IF_NOT(other.is_trained);
+    FAISS_THROW_IF_NOT_MSG(
+            other.is_trained,
+            "source PCAMatrix must be trained before copying");
     mean = other.mean;
     eigenvalues = other.eigenvalues;
     PCAMat = other.PCAMat;
@@ -761,7 +780,9 @@ void PCAMatrix::prepare_Ab() {
         }
 
         if (balanced_bins != 0) {
-            FAISS_THROW_IF_NOT(d_out % balanced_bins == 0);
+            FAISS_THROW_IF_NOT_MSG(
+                    d_out % balanced_bins == 0,
+                    "output dimension must be divisible by balanced_bins");
             int dsub = d_out / balanced_bins;
             std::vector<float> Ain;
             std::swap(A, Ain);
@@ -945,7 +966,8 @@ void ITQMatrix::train(idx_t n, const float* xf) {
                     &lwork,
                     &info);
 
-            FAISS_THROW_IF_NOT(info == 0);
+            FAISS_THROW_IF_NOT_MSG(
+                    info == 0, "LAPACK dgesvd workspace query failed");
             lwork = size_t(lwork1);
             std::vector<double> work(lwork);
             dgesvd_("A",
@@ -1001,14 +1023,17 @@ ITQTransform::ITQTransform(int din, int dout, bool do_pca_in)
           itq(dout),
           pca_then_itq(din, dout, false) {
     if (!do_pca_in) {
-        FAISS_THROW_IF_NOT(din == dout);
+        FAISS_THROW_IF_NOT_MSG(
+                din == dout,
+                "input and output dimensions must match when PCA is disabled");
     }
     max_train_per_dim = 10;
     is_trained = false;
 }
 
 void ITQTransform::train(idx_t n, const float* x_in) {
-    FAISS_THROW_IF_NOT(!is_trained);
+    FAISS_THROW_IF_NOT_MSG(
+            !is_trained, "ITQTransform has already been trained");
 
     size_t max_train_points = std::max(d_in * max_train_per_dim, 32768);
     const float* x =
@@ -1100,9 +1125,10 @@ void ITQTransform::apply_noalloc(idx_t n, const float* x, float* xt) const {
 void ITQTransform::check_identical(const VectorTransform& other_in) const {
     VectorTransform::check_identical(other_in);
     auto other = dynamic_cast<const ITQTransform*>(&other_in);
-    FAISS_THROW_IF_NOT(other);
+    FAISS_THROW_IF_NOT_MSG(other, "failed to cast to ITQTransform");
     pca_then_itq.check_identical(other->pca_then_itq);
-    FAISS_THROW_IF_NOT(other->mean == mean);
+    FAISS_THROW_IF_NOT_MSG(
+            other->mean == mean, "ITQTransform mean vectors must match");
 }
 
 /*********************************************
@@ -1184,7 +1210,8 @@ void OPQMatrix::train(idx_t n, const float* x_in) {
         // we use only the d * d2 upper part of the matrix
         A.resize(d * d2);
     } else {
-        FAISS_THROW_IF_NOT(A.size() == d * d2);
+        FAISS_THROW_IF_NOT_MSG(
+                A.size() == d * d2, "rotation matrix A has incorrect size");
         rotation = A.data();
     }
 
@@ -1360,8 +1387,9 @@ void NormalizationTransform::check_identical(
         const VectorTransform& other_in) const {
     VectorTransform::check_identical(other_in);
     auto other = dynamic_cast<const NormalizationTransform*>(&other_in);
-    FAISS_THROW_IF_NOT(other);
-    FAISS_THROW_IF_NOT(other->norm == norm);
+    FAISS_THROW_IF_NOT_MSG(other, "failed to cast to NormalizationTransform");
+    FAISS_THROW_IF_NOT_MSG(
+            other->norm == norm, "normalization type must match");
 }
 
 /*********************************************
@@ -1389,7 +1417,8 @@ void CenteringTransform::train(idx_t n, const float* x) {
 
 void CenteringTransform::apply_noalloc(idx_t n, const float* x, float* xt)
         const {
-    FAISS_THROW_IF_NOT(is_trained);
+    FAISS_THROW_IF_NOT_MSG(
+            is_trained, "CenteringTransform has not been trained");
 
     for (idx_t i = 0; i < n; i++) {
         for (int j = 0; j < d_in; j++) {
@@ -1400,7 +1429,8 @@ void CenteringTransform::apply_noalloc(idx_t n, const float* x, float* xt)
 
 void CenteringTransform::reverse_transform(idx_t n, const float* xt, float* x)
         const {
-    FAISS_THROW_IF_NOT(is_trained);
+    FAISS_THROW_IF_NOT_MSG(
+            is_trained, "CenteringTransform has not been trained");
 
     for (idx_t i = 0; i < n; i++) {
         for (int j = 0; j < d_in; j++) {
@@ -1413,8 +1443,9 @@ void CenteringTransform::check_identical(
         const VectorTransform& other_in) const {
     VectorTransform::check_identical(other_in);
     auto other = dynamic_cast<const CenteringTransform*>(&other_in);
-    FAISS_THROW_IF_NOT(other);
-    FAISS_THROW_IF_NOT(other->mean == mean);
+    FAISS_THROW_IF_NOT_MSG(other, "failed to cast to CenteringTransform");
+    FAISS_THROW_IF_NOT_MSG(
+            other->mean == mean, "CenteringTransform mean vectors must match");
 }
 
 /*********************************************
@@ -1429,7 +1460,9 @@ RemapDimensionsTransform::RemapDimensionsTransform(
     map.resize(dout);
     for (int i = 0; i < dout; i++) {
         map[i] = map_in[i];
-        FAISS_THROW_IF_NOT(map[i] == -1 || (map[i] >= 0 && map[i] < din));
+        FAISS_THROW_IF_NOT_MSG(
+                map[i] == -1 || (map[i] >= 0 && map[i] < din),
+                "map entries must be -1 (unused) or valid input dimension indices");
     }
 }
 
@@ -1486,6 +1519,7 @@ void RemapDimensionsTransform::check_identical(
         const VectorTransform& other_in) const {
     VectorTransform::check_identical(other_in);
     auto other = dynamic_cast<const RemapDimensionsTransform*>(&other_in);
-    FAISS_THROW_IF_NOT(other);
-    FAISS_THROW_IF_NOT(other->map == map);
+    FAISS_THROW_IF_NOT_MSG(other, "failed to cast to RemapDimensionsTransform");
+    FAISS_THROW_IF_NOT_MSG(
+            other->map == map, "RemapDimensionsTransform maps must match");
 }