diff --git a/faiss/impl/ClusteringInitialization.cpp b/faiss/impl/ClusteringInitialization.cpp index 35cd325bb1..04f2bdde8e 100644 --- a/faiss/impl/ClusteringInitialization.cpp +++ b/faiss/impl/ClusteringInitialization.cpp @@ -221,27 +221,27 @@ void ClusteringInitialization::init_kmeans_plus_plus( std::vector cumsum(n); // Select remaining centroids using D² sampling - for (size_t c = result.first_new_centroid_idx; c < k; c++) { - // Compute cumulative sum - cumsum[0] = min_distances[0]; - for (size_t i = 1; i < n; i++) { - cumsum[i] = cumsum[i - 1] + min_distances[i]; - } + with_simd_level([&]() { + for (size_t c = result.first_new_centroid_idx; c < k; c++) { + // Compute cumulative sum + cumsum[0] = min_distances[0]; + for (size_t i = 1; i < n; i++) { + cumsum[i] = cumsum[i - 1] + min_distances[i]; + } - // Sample using precomputed cumsum - size_t next_idx = sample_from_cumsum(cumsum, rng); + // Sample using precomputed cumsum + size_t next_idx = sample_from_cumsum(cumsum, rng); - float* new_centroid = centroids + c * d; - std::memcpy(new_centroid, x + next_idx * d, d * sizeof(float)); + float* new_centroid = centroids + c * d; + std::memcpy(new_centroid, x + next_idx * d, d * sizeof(float)); - // Update min distances incrementally - with_simd_level([&]() { + // Update min distances incrementally for (size_t i = 0; i < n; i++) { double dist = fvec_L2sqr(x + i * d, new_centroid, d); min_distances[i] = std::min(min_distances[i], dist); } - }); - } + } + }); } void ClusteringInitialization::init_afkmc2( diff --git a/faiss/utils/NeuralNet.cpp b/faiss/utils/NeuralNet.cpp index cf5df9cf0c..74663e2189 100644 --- a/faiss/utils/NeuralNet.cpp +++ b/faiss/utils/NeuralNet.cpp @@ -268,12 +268,12 @@ nn::Int32Tensor2D QINCoStep::encode( res = residuals->data(); } - for (size_t i = 0; i < n; i++) { - const float* q = x.data() + i * d; - const float* db = zqs_r.data() + i * K * d; - float dis_min = HUGE_VALF; - int64_t idx = -1; - with_simd_level([&]() { + with_simd_level([&]() { + for (size_t i = 0; i < n; i++) { + const float* q = x.data() + i * d; + const float* db = zqs_r.data() + i * K * d; + float dis_min = HUGE_VALF; + int64_t idx = -1; for (size_t j = 0; j < static_cast(K); j++) { float dis = fvec_L2sqr(q, db, d); if (dis < dis_min) { @@ -282,17 +282,17 @@ nn::Int32Tensor2D QINCoStep::encode( } db += d; } - }); - codes.v[i] = idx; - if (res) { - const float* xhat_row = xhat.data() + i * d; - const float* xhat_next_row = zqs_r.data() + (i * K + idx) * d; - for (size_t j = 0; j < static_cast(d); j++) { - res[j] = xhat_next_row[j] - xhat_row[j]; + codes.v[i] = idx; + if (res) { + const float* xhat_row = xhat.data() + i * d; + const float* xhat_next_row = zqs_r.data() + (i * K + idx) * d; + for (size_t j = 0; j < static_cast(d); j++) { + res[j] = xhat_next_row[j] - xhat_row[j]; + } + res += d; } - res += d; } - } + }); return codes; } diff --git a/faiss/utils/distances.cpp b/faiss/utils/distances.cpp index e5c22118a4..abfc7ced22 100644 --- a/faiss/utils/distances.cpp +++ b/faiss/utils/distances.cpp @@ -838,16 +838,16 @@ void knn_inner_products_by_idx( ld_ids = ny; } + with_simd_level([&]() { #pragma omp parallel for if (nx > 100) - for (int64_t i = 0; i < static_cast(nx); i++) { - const float* x_ = x + i * d; - const int64_t* idsi = ids + i * ld_ids; - size_t j; - float* __restrict simi = res_vals + i * k; - int64_t* __restrict idxi = res_ids + i * k; - minheap_heapify(k, simi, idxi); + for (int64_t i = 0; i < static_cast(nx); i++) { + const float* x_ = x + i * d; + const int64_t* idsi = ids + i * ld_ids; + size_t j; + float* __restrict simi = res_vals + i * k; + int64_t* __restrict idxi = res_ids + i * k; + minheap_heapify(k, simi, idxi); - with_simd_level([&]() { for (j = 0; j < nsubset; j++) { if (idsi[j] < 0 || static_cast(idsi[j]) >= ny) { break; @@ -858,9 +858,9 @@ void knn_inner_products_by_idx( minheap_replace_top(k, simi, idxi, ip, idsi[j]); } } - }); - minheap_reorder(k, simi, idxi); - } + minheap_reorder(k, simi, idxi); + } + }); } void knn_L2sqr_by_idx( @@ -878,14 +878,14 @@ void knn_L2sqr_by_idx( if (ld_ids < 0) { ld_ids = ny; } + with_simd_level([&]() { #pragma omp parallel for if (nx > 100) - for (int64_t i = 0; i < static_cast(nx); i++) { - const float* x_ = x + i * d; - const int64_t* __restrict idsi = ids + i * ld_ids; - float* __restrict simi = res_vals + i * k; - int64_t* __restrict idxi = res_ids + i * k; - maxheap_heapify(k, simi, idxi); - with_simd_level([&]() { + for (int64_t i = 0; i < static_cast(nx); i++) { + const float* x_ = x + i * d; + const int64_t* __restrict idsi = ids + i * ld_ids; + float* __restrict simi = res_vals + i * k; + int64_t* __restrict idxi = res_ids + i * k; + maxheap_heapify(k, simi, idxi); for (size_t j = 0; j < nsubset; j++) { if (idsi[j] < 0 || static_cast(idsi[j]) >= ny) { break; @@ -896,9 +896,9 @@ void knn_L2sqr_by_idx( maxheap_replace_top(k, simi, idxi, disij, idsi[j]); } } - }); - maxheap_reorder(k, simi, idxi); - } + maxheap_reorder(k, simi, idxi); + } + }); } void pairwise_L2sqr(