diff --git a/faiss/IVFlib.cpp b/faiss/IVFlib.cpp
index a39fb83f23..03a9755125 100644
--- a/faiss/IVFlib.cpp
+++ b/faiss/IVFlib.cpp
@@ -601,10 +601,14 @@ void handle_ivf(
                     sharded_centroids[i].data());
         }
         char fname[256];
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         snprintf(fname, 256, filename_template.c_str(), i);
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
         faiss::write_index(sharded_index, fname);
         delete sharded_index;
     }
@@ -655,10 +659,14 @@ void handle_binary_ivf(
                     sharded_centroids[i].data());
         }
         char fname[256];
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         snprintf(fname, 256, filename_template.c_str(), i);
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
         faiss::write_index_binary(sharded_index, fname);
         delete sharded_index;
     }
diff --git a/faiss/IndexIVFFlat.h b/faiss/IndexIVFFlat.h
index 711a663296..ae74794e5a 100644
--- a/faiss/IndexIVFFlat.h
+++ b/faiss/IndexIVFFlat.h
@@ -91,7 +91,7 @@ struct IVFFlatScanner : InvertedListScanner {
             size_t list_size,
             const uint8_t* codes,
             const idx_t* ids,
-            ResultHandler& handler) const;
+            ResultHandler& handler) const override;
 };
 
 struct IndexIVFFlatDedup : IndexIVFFlat {
diff --git a/faiss/IndexIVFSpectralHash.cpp b/faiss/IndexIVFSpectralHash.cpp
index fc35675214..f0e53d36ea 100644
--- a/faiss/IndexIVFSpectralHash.cpp
+++ b/faiss/IndexIVFSpectralHash.cpp
@@ -38,7 +38,8 @@ IndexIVFSpectralHash::IndexIVFSpectralHash(
                   own_invlists_in),
           nbit(nbit_in),
           period(period_in) {
-    auto rr = std::make_unique<RandomRotationMatrix>(d_in, nbit_in);
+    auto rr = std::make_unique<RandomRotationMatrix>(
+            static_cast<int>(d_in), nbit_in);
     rr->init(1234);
     vt = rr.release();
     own_fields = true;
diff --git a/faiss/IndexRaBitQFastScan.h b/faiss/IndexRaBitQFastScan.h
index 988a5c26b7..eeda760e4c 100644
--- a/faiss/IndexRaBitQFastScan.h
+++ b/faiss/IndexRaBitQFastScan.h
@@ -279,11 +279,11 @@ struct RaBitQHeapHandler
         rabitq_stats.n_multibit_evaluations += local_multibit_evaluations;
     }
 
-    void begin(const float* norms) {
+    void begin(const float* norms) override {
         normalizers = norms;
     }
 
-    void end() {
+    void end() override {
 #pragma omp parallel for if (nq > 100)
         for (int64_t q = 0; q < static_cast<int64_t>(nq); q++) {
             float* heap_dis = heap_distances + q * k;
diff --git a/faiss/impl/PolysemousTraining.cpp b/faiss/impl/PolysemousTraining.cpp
index 9d61a6452c..512ecadf76 100644
--- a/faiss/impl/PolysemousTraining.cpp
+++ b/faiss/impl/PolysemousTraining.cpp
@@ -823,10 +823,14 @@ void PolysemousTraining::optimize_reproduce_distances(
 
         if (log_pattern.size()) {
             char fname[256];
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
             snprintf(fname, 256, log_pattern.c_str(), m);
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
             printf("opening log file %s\n", fname);
             optim.logfile = fopen(fname, "w");
             FAISS_THROW_IF_NOT_MSG(optim.logfile, "could not open logfile");
@@ -938,10 +942,14 @@ void PolysemousTraining::optimize_ranking(
 
         if (log_pattern.size()) {
             char fname[256];
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
             snprintf(fname, 256, log_pattern.c_str(), m);
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
             printf("opening log file %s\n", fname);
             optim.logfile = fopen(fname, "w");
             FAISS_THROW_IF_NOT_FMT(
diff --git a/faiss/impl/simdlib/simdlib_avx2.h b/faiss/impl/simdlib/simdlib_avx2.h
index d89bd3f832..734c4095aa 100644
--- a/faiss/impl/simdlib/simdlib_avx2.h
+++ b/faiss/impl/simdlib/simdlib_avx2.h
@@ -141,12 +141,17 @@ struct simd16uint16_tpl<SIMDLevel::AVX2> : simd256bit_tpl<SIMDLevel::AVX2> {
         storeu((void*)bytes);
         char res[1000];
         char* ptr = res;
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         for (int idx = 0; idx < 16; idx++) {
-            ptr += sprintf(ptr, fmt, bytes[idx]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[idx]);
         }
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
         // strip last ,
         ptr[-1] = 0;
         return std::string(res);
@@ -462,12 +467,17 @@ struct simd32uint8_tpl<SIMDLevel::AVX2> : simd256bit_tpl<SIMDLevel::AVX2> {
         storeu((void*)bytes);
         char res[1000];
         char* ptr = res;
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         for (int idx = 0; idx < 32; idx++) {
-            ptr += sprintf(ptr, fmt, bytes[idx]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[idx]);
         }
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
         // strip last ,
         ptr[-1] = 0;
         return std::string(res);
@@ -601,12 +611,17 @@ struct simd8uint32_tpl<SIMDLevel::AVX2> : simd256bit_tpl<SIMDLevel::AVX2> {
         storeu((void*)bytes);
         char res[1000];
         char* ptr = res;
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         for (int idx = 0; idx < 8; idx++) {
-            ptr += sprintf(ptr, fmt, bytes[idx]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[idx]);
         }
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
         // strip last ,
         ptr[-1] = 0;
         return std::string(res);
@@ -727,7 +742,8 @@ struct simd8float32_tpl<SIMDLevel::AVX2> : simd256bit_tpl<SIMDLevel::AVX2> {
         char res[1000];
         char* ptr = res;
         for (int idx = 0; idx < 8; idx++) {
-            ptr += sprintf(ptr, "%g,", tab[idx]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), "%g,", tab[idx]);
         }
         // strip last ,
         ptr[-1] = 0;
diff --git a/faiss/impl/simdlib/simdlib_avx512.h b/faiss/impl/simdlib/simdlib_avx512.h
index c92190c727..1968b8ae23 100644
--- a/faiss/impl/simdlib/simdlib_avx512.h
+++ b/faiss/impl/simdlib/simdlib_avx512.h
@@ -127,9 +127,17 @@ struct simd32uint16_tpl<SIMDLevel::AVX512> : simd512bit_tpl<SIMDLevel::AVX512> {
         storeu((void*)bytes);
         char res[2000];
         char* ptr = res;
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         for (int i = 0; i < 32; i++) {
-            ptr += sprintf(ptr, fmt, bytes[i]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[i]);
         }
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
         // strip last ,
         ptr[-1] = 0;
         return std::string(res);
@@ -263,9 +271,17 @@ struct simd64uint8_tpl<SIMDLevel::AVX512> : simd512bit_tpl<SIMDLevel::AVX512> {
         storeu((void*)bytes);
         char res[2000];
         char* ptr = res;
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         for (int i = 0; i < 64; i++) {
-            ptr += sprintf(ptr, fmt, bytes[i]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[i]);
         }
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
         // strip last ,
         ptr[-1] = 0;
         return std::string(res);
@@ -374,7 +390,8 @@ struct simd16float32_tpl<SIMDLevel::AVX512>
         char res[1000];
         char* ptr = res;
         for (int i = 0; i < 16; i++) {
-            ptr += sprintf(ptr, "%g,", tab[i]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), "%g,", tab[i]);
         }
         ptr[-1] = 0;
         return std::string(res);
diff --git a/faiss/impl/simdlib/simdlib_emulated.h b/faiss/impl/simdlib/simdlib_emulated.h
index 69fbaf80b3..6d56d97097 100644
--- a/faiss/impl/simdlib/simdlib_emulated.h
+++ b/faiss/impl/simdlib/simdlib_emulated.h
@@ -129,12 +129,17 @@ struct simd16uint16_tpl<SIMDLevel::NONE> : simd256bit_tpl<SIMDLevel::NONE> {
 
     std::string elements_to_string(const char* fmt) const {
         char res[1000], *ptr = res;
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         for (int i = 0; i < 16; i++) {
-            ptr += sprintf(ptr, fmt, u16[i]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), fmt, u16[i]);
         }
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
         // strip last ,
         ptr[-1] = 0;
         return std::string(res);
@@ -507,12 +512,16 @@ struct simd32uint8_tpl<SIMDLevel::NONE> : simd256bit_tpl<SIMDLevel::NONE> {
 
     std::string elements_to_string(const char* fmt) const {
         char res[1000], *ptr = res;
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         for (int i = 0; i < 32; i++) {
-            ptr += sprintf(ptr, fmt, u8[i]);
+            ptr += snprintf(ptr, (size_t)(res + sizeof(res) - ptr), fmt, u8[i]);
         }
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
         // strip last ,
         ptr[-1] = 0;
         return std::string(res);
@@ -707,12 +716,17 @@ struct simd8uint32_tpl<SIMDLevel::NONE> : simd256bit_tpl<SIMDLevel::NONE> {
 
     std::string elements_to_string(const char* fmt) const {
         char res[1000], *ptr = res;
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
         for (int i = 0; i < 8; i++) {
-            ptr += sprintf(ptr, fmt, u32[i]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), fmt, u32[i]);
         }
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
+#endif
         // strip last ,
         ptr[-1] = 0;
         return std::string(res);
@@ -863,7 +877,8 @@ struct simd8float32_tpl<SIMDLevel::NONE> : simd256bit_tpl<SIMDLevel::NONE> {
     std::string tostring() const {
         char res[1000], *ptr = res;
         for (int i = 0; i < 8; i++) {
-            ptr += sprintf(ptr, "%g,", f32[i]);
+            ptr += snprintf(
+                    ptr, (size_t)(res + sizeof(res) - ptr), "%g,", f32[i]);
         }
         // strip last ,
         ptr[-1] = 0;
diff --git a/faiss/python/faiss_example_external_module.swig b/faiss/python/faiss_example_external_module.swig
index e1a21e9731..4eec0e4a1c 100644
--- a/faiss/python/faiss_example_external_module.swig
+++ b/faiss/python/faiss_example_external_module.swig
@@ -77,7 +77,7 @@ struct IDSelectorModulo : faiss::IDSelector {
 
     IDSelectorModulo(int mod): mod(mod) {}
 
-    bool is_member(faiss::idx_t id) const {
+    bool is_member(faiss::idx_t id) const override {
         return id % mod == 0;
     }
 
diff --git a/faiss/utils/distances.h b/faiss/utils/distances.h
index f44772366b..7dc8896454 100644
--- a/faiss/utils/distances.h
+++ b/faiss/utils/distances.h
@@ -600,4 +600,67 @@ int fvec_madd_and_argmin(
         const float* b,
         float* c);
 
+/* Explicit specialization declarations for all SIMD-templated distance
+   functions.  C++ [temp.expl.spec]/7 requires that these appear before any
+   translation unit that might implicitly instantiate them.  GCC/Clang are
+   lenient about this, but MSVC is not — without these declarations the
+   linker emits LNK2001 for the specializations defined in the _avx2
+   translation units. */
+
+// clang-format off
+#define FAISS_DECLARE_DISTANCES_SPECIALIZATIONS(SL)                            \
+    template <> float fvec_L2sqr<SL>(                                          \
+            const float* x, const float* y, size_t d);                         \
+    template <> float fvec_inner_product<SL>(                                  \
+            const float* x, const float* y, size_t d);                         \
+    template <> float fvec_L1<SL>(                                             \
+            const float* x, const float* y, size_t d);                         \
+    template <> float fvec_Linf<SL>(                                           \
+            const float* x, const float* y, size_t d);                         \
+    template <> void fvec_inner_product_batch_4<SL>(                           \
+            const float* x, const float* y0, const float* y1,                  \
+            const float* y2, const float* y3, const size_t d,                  \
+            float& dis0, float& dis1, float& dis2, float& dis3);              \
+    template <> void fvec_L2sqr_batch_4<SL>(                                   \
+            const float* x, const float* y0, const float* y1,                  \
+            const float* y2, const float* y3, const size_t d,                  \
+            float& dis0, float& dis1, float& dis2, float& dis3);              \
+    template <> void fvec_inner_products_ny<SL>(                               \
+            float* ip, const float* x, const float* y,                         \
+            size_t d, size_t ny);                                              \
+    template <> void fvec_L2sqr_ny<SL>(                                        \
+            float* dis, const float* x, const float* y,                        \
+            size_t d, size_t ny);                                              \
+    template <> void fvec_L2sqr_ny_transposed<SL>(                             \
+            float* dis, const float* x, const float* y,                        \
+            const float* y_sqlen, size_t d, size_t d_offset, size_t ny);       \
+    template <> size_t fvec_L2sqr_ny_nearest<SL>(                              \
+            float* distances_tmp_buffer, const float* x,                       \
+            const float* y, size_t d, size_t ny);                              \
+    template <> size_t fvec_L2sqr_ny_nearest_y_transposed<SL>(                 \
+            float* distances_tmp_buffer, const float* x,                       \
+            const float* y, const float* y_sqlen,                              \
+            size_t d, size_t d_offset, size_t ny);                             \
+    template <> float fvec_norm_L2sqr<SL>(const float* x, size_t d);           \
+    template <> void fvec_add<SL>(                                             \
+            size_t d, const float* a, const float* b, float* c);               \
+    template <> void fvec_add<SL>(                                             \
+            size_t d, const float* a, float b, float* c);                      \
+    template <> void fvec_sub<SL>(                                             \
+            size_t d, const float* a, const float* b, float* c);               \
+    template <> void compute_PQ_dis_tables_dsub2<SL>(                          \
+            size_t d, size_t ksub, const float* centroids,                     \
+            size_t nx, const float* x, bool is_inner_product,                  \
+            float* dis_tables);                                                \
+    template <> void fvec_madd<SL>(                                            \
+            size_t n, const float* a, float bf, const float* b, float* c);     \
+    template <> int fvec_madd_and_argmin<SL>(                                  \
+            size_t n, const float* a, float bf, const float* b, float* c);
+
+FAISS_DECLARE_DISTANCES_SPECIALIZATIONS(SIMDLevel::NONE)
+FAISS_DECLARE_DISTANCES_SPECIALIZATIONS(SIMDLevel::AVX2)
+
+#undef FAISS_DECLARE_DISTANCES_SPECIALIZATIONS
+// clang-format on
+
 } // namespace faiss
diff --git a/faiss/utils/partitioning.cpp b/faiss/utils/partitioning.cpp
index 7c2decfe9d..fd2c6d7703 100644
--- a/faiss/utils/partitioning.cpp
+++ b/faiss/utils/partitioning.cpp
@@ -8,6 +8,7 @@
 #include <faiss/utils/partitioning.h>
 
 #include <cassert>
+#include <cinttypes>
 #include <cmath>
 
 #include <faiss/impl/FaissAssert.h>
@@ -432,7 +433,7 @@ uint16_t simd_partition_fuzzy_with_bounds(
     // lower bound inclusive, upper exclusive
     size_t s0 = s0i, s1 = s1i + 1;
 
-    IFV printf("bounds: %ld %ld\n", s0, s1 - 1);
+    IFV printf("bounds: %zu %zu\n", s0, s1 - 1);
 
     int thresh;
     size_t n_eq = 0, n_lt = 0;
@@ -444,7 +445,7 @@ uint16_t simd_partition_fuzzy_with_bounds(
         count_lt_and_eq<C>(vals, n, thresh, n_lt, n_eq);
 
         IFV printf(
-                "   [%ld %ld] thresh=%d n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
+                "   [%zu %zu] thresh=%d n_lt=%zu n_eq=%zu, q=%zu:%zu/%zu\n",
                 s0,
                 s1,
                 thresh,
@@ -481,7 +482,8 @@ uint16_t simd_partition_fuzzy_with_bounds(
     // number of equal values to keep
     int64_t n_eq_1 = q - n_lt;
 
-    IFV printf("shrink: thresh=%d q=%ld n_eq_1=%ld\n", thresh, q, n_eq_1);
+    IFV printf(
+            "shrink: thresh=%d q=%zu n_eq_1=%" PRId64 "\n", thresh, q, n_eq_1);
     if (n_eq_1 < 0) { // happens when > q elements are at lower bound
         assert(s0 + 1 == s1);
         q = q_min;
@@ -491,14 +493,15 @@ uint16_t simd_partition_fuzzy_with_bounds(
             thresh++;
         }
         n_eq_1 = q;
-        IFV printf("  override: thresh=%d n_eq_1=%ld\n", thresh, n_eq_1);
+        IFV printf(
+                "  override: thresh=%d n_eq_1=%" PRId64 "\n", thresh, n_eq_1);
     } else {
         assert(n_eq_1 <= n_eq);
     }
 
     size_t wp = simd_compress_array<C>(vals, ids, n, thresh, n_eq_1);
 
-    IFV printf("wp=%ld\n", wp);
+    IFV printf("wp=%zu\n", wp);
     assert(wp == q);
     if (q_out) {
         *q_out = q;
diff --git a/faiss/utils/simd_impl/distances_sse-inl.h b/faiss/utils/simd_impl/distances_sse-inl.h
index 5b69dc48be..38b748dc04 100644
--- a/faiss/utils/simd_impl/distances_sse-inl.h
+++ b/faiss/utils/simd_impl/distances_sse-inl.h
@@ -360,7 +360,8 @@ inline int fvec_madd_and_argmin_sse(
         float bf,
         const float* b,
         float* c) {
-    if ((n & 3) == 0 && ((((long)a) | ((long)b) | ((long)c)) & 15) == 0) {
+    if ((n & 3) == 0 &&
+        ((((uintptr_t)a) | ((uintptr_t)b) | ((uintptr_t)c)) & 15) == 0) {
         return fvec_madd_and_argmin_sse_ref(n, a, bf, b, c);
     } else {
         return fvec_madd_and_argmin<SIMDLevel::NONE>(n, a, bf, b, c);