Skip to content

Commit b49833b

Browse files
authored
Refactor/add scalar dist function and setup cmake with detailed cpu flag (alibaba#238)
* fix: add scalar * fix: add avx512fp16 * fix: cmake config
1 parent e5ba11b commit b49833b

File tree

64 files changed

+2911
-2226
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+2911
-2226
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
2121
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
2222

2323
if(NOT ANDROID AND AUTO_DETECT_ARCH AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386|x64")
24-
setup_compiler_march_for_x86(MATH_MARCH_FLAG_SSE MATH_MARCH_FLAG_AVX2 MATH_MARCH_FLAG_AVX512)
25-
message(STATUS "best compiler march, sse: " ${MATH_MARCH_FLAG_SSE} ", avx2: " ${MATH_MARCH_FLAG_AVX2} ", avx512: " ${MATH_MARCH_FLAG_AVX512})
24+
setup_compiler_march_for_x86(MATH_MARCH_FLAG_SSE MATH_MARCH_FLAG_AVX2 MATH_MARCH_FLAG_AVX512 MATH_MARCH_FLAG_AVX512FP16)
25+
message(STATUS "best compiler march, sse: " ${MATH_MARCH_FLAG_SSE} ", avx2: " ${MATH_MARCH_FLAG_AVX2} ", avx512: " ${MATH_MARCH_FLAG_AVX512} ", avx512fp16: " ${MATH_MARCH_FLAG_AVX512FP16})
2626
endif()
2727

2828
include_directories(${PROJECT_ROOT_DIR}/src/include)

cmake/option.cmake

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ option(ENABLE_OPENMP "Enable OpenMP support" OFF)
3535

3636
set(ARCH_OPTIONS
3737
ENABLE_NEHALEM ENABLE_SANDYBRIDGE ENABLE_HASWELL ENABLE_BROADWELL ENABLE_SKYLAKE
38-
ENABLE_SKYLAKE_AVX512 ENABLE_ICELAKE ENABLE_SAPPHIRERAPIDS ENABLE_EMERALDRAPIDS
39-
ENABLE_GRANITERAPIDS ENABLE_ZEN1 ENABLE_ZEN2 ENABLE_ZEN3
38+
ENABLE_SKYLAKE_AVX512 ENABLE_ICELAKE ENABLE_SAPPHIRERAPIDS ENABLE_EMERALDRAPIDS ENABLE_GRANITERAPIDS
39+
ENABLE_ZEN1 ENABLE_ZEN2 ENABLE_ZEN3
4040
ENABLE_ARMV8A ENABLE_ARMV8.1A ENABLE_ARMV8.2A ENABLE_ARMV8.3A ENABLE_ARMV8.4A
4141
ENABLE_ARMV8.5A ENABLE_ARMV8.6A
4242
ENABLE_NATIVE
@@ -103,30 +103,34 @@ function(_setup_x86_march)
103103
endif()
104104
endfunction()
105105

106-
function(setup_compiler_march_for_x86 VAR_NAME_SSE VAR_NAME_AVX2 VAR_NAME_AVX512)
106+
function(setup_compiler_march_for_x86 VAR_NAME_SSE VAR_NAME_AVX2 VAR_NAME_AVX512 VAR_NAME_AVX512FP16)
107107
#sse
108108
set(${VAR_NAME_SSE} "-march=corei7" PARENT_SCOPE)
109109

110110
#avx 2
111111
set(${VAR_NAME_AVX2} "-march=core-avx2" PARENT_SCOPE)
112112

113113
#avx512
114-
set(_x86_flags
115-
"graniterapids" "emeraldrapids" "sapphirerapids"
116-
"icelake-server" "skylake-avx512"
117-
)
118-
foreach(_arch IN LISTS _x86_flags)
119-
check_c_compiler_flag("-march=${_arch}" _COMP_SUPP_${_arch})
120-
if(_COMP_SUPP_${_arch})
121-
set(${VAR_NAME_AVX512} "-march=${_arch}" PARENT_SCOPE)
122-
return()
114+
set(_x86_flags_avx512 "icelake-server" "skylake-avx512" "core-avx2" "x86-64")
115+
foreach(_arch_avx512 IN LISTS _x86_flags_avx512)
116+
check_c_compiler_flag("-march=${_arch_avx512}" _COMP_SUPP_${_arch_avx512})
117+
if(_COMP_SUPP_${_arch_avx512})
118+
set(${VAR_NAME_AVX512} "-march=${_arch_avx512}" PARENT_SCOPE)
119+
break()
123120
endif()
124121
endforeach()
125122

126-
127-
set(${VAR_NAME_AVX512} "-march=core-avx2" PARENT_SCOPE)
128-
message(WARNING "No known avx512 microarchitecture flag found. Set up as core-avx2")
129-
123+
#avx512fp16
124+
set(_x86_flags_avx512fp16
125+
"sapphirerapids" "icelake-server" "skylake-avx512" "core-avx2" "x86-64"
126+
)
127+
foreach(_arch_avx512fp16 IN LISTS _x86_flags_avx512fp16)
128+
check_c_compiler_flag("-march=${_arch_avx512fp16}" _COMP_SUPP_${_arch_avx512fp16})
129+
if(_COMP_SUPP_${_arch_avx512fp16})
130+
set(${VAR_NAME_AVX512FP16} "-march=${_arch_avx512fp16}" PARENT_SCOPE)
131+
break()
132+
endif()
133+
endforeach()
130134
endfunction()
131135

132136
if(MSVC)

src/ailego/CMakeLists.txt

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ endif()
2020

2121
if(NOT ANDROID AND AUTO_DETECT_ARCH)
2222
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386|x64")
23-
setup_compiler_march_for_x86(MATH_MARCH_FLAG_SSE MATH_MARCH_FLAG_AVX2 MATH_MARCH_FLAG_AVX512)
24-
message(STATUS "best compiler march, sse: " ${MATH_MARCH_FLAG_SSE} ", avx2: " ${MATH_MARCH_FLAG_AVX2} ", avx512: " ${MATH_MARCH_FLAG_AVX512})
23+
setup_compiler_march_for_x86(MATH_MARCH_FLAG_SSE MATH_MARCH_FLAG_AVX2 MATH_MARCH_FLAG_AVX512 MATH_MARCH_FLAG_AVX512FP16)
24+
message(STATUS "best compiler march, sse: " ${MATH_MARCH_FLAG_SSE} ", avx2: " ${MATH_MARCH_FLAG_AVX2} ", avx512: " ${MATH_MARCH_FLAG_AVX512} ", avx512fp16: " ${MATH_MARCH_FLAG_AVX512FP16})
2525

2626
file(GLOB_RECURSE MATH_FILES_SSE
2727
${CMAKE_CURRENT_SOURCE_DIR}/math/*_sse.cc
@@ -42,16 +42,23 @@ if(NOT ANDROID AND AUTO_DETECT_ARCH)
4242
)
4343

4444
file(GLOB_RECURSE MATH_FILES_AVX512
45-
${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.cc
46-
${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.c
4745
${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512.cc
4846
${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512.c
49-
${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.cc
50-
${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.c
5147
${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512.cc
5248
${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512.c
5349
)
5450

51+
file(GLOB_RECURSE MATH_FILES_AVX512FP16
52+
${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.cc
53+
${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.c
54+
${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512fp16.cc
55+
${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512fp16.c
56+
${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.cc
57+
${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.c
58+
${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512fp16.cc
59+
${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512fp16.c
60+
)
61+
5562
foreach(MATH_FILE ${MATH_FILES_SSE})
5663
set_source_files_properties(
5764
${MATH_FILE}
@@ -75,6 +82,14 @@ if(NOT ANDROID AND AUTO_DETECT_ARCH)
7582
COMPILE_FLAGS "${MATH_MARCH_FLAG_AVX512}"
7683
)
7784
endforeach()
85+
86+
foreach(MATH_FILE ${MATH_FILES_AVX512FP16})
87+
set_source_files_properties(
88+
${MATH_FILE}
89+
PROPERTIES
90+
COMPILE_FLAGS "${MATH_MARCH_FLAG_AVX512FP16}"
91+
)
92+
endforeach()
7893
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
7994
# set(CMAKE_CXX_FLAGS "-march=armv8-a")
8095
# set(CMAKE_C_FLAGS "-march=armv8-a")

src/ailego/math/euclidean_distance_matrix.h

Lines changed: 50 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
namespace zvec {
2323
namespace ailego {
2424

25+
//--------------------------------------------------
26+
// Dense
27+
//--------------------------------------------------
2528
/*! Squared Euclidean Distance Matrix
2629
*/
2730
template <typename T, size_t M, size_t N, typename = void>
@@ -48,6 +51,46 @@ struct SquaredEuclideanDistanceMatrix<
4851
}
4952
};
5053

54+
template <>
55+
struct SquaredEuclideanDistanceMatrix<uint8_t, 1, 1> {
56+
//! Type of value
57+
using ValueType = uint8_t;
58+
59+
//! Compute the distance between matrix and query
60+
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
61+
float *out);
62+
};
63+
64+
template <>
65+
struct SquaredEuclideanDistanceMatrix<int8_t, 1, 1> {
66+
//! Type of value
67+
using ValueType = int8_t;
68+
69+
//! Compute the distance between matrix and query
70+
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
71+
float *out);
72+
};
73+
74+
template <>
75+
struct SquaredEuclideanDistanceMatrix<Float16, 1, 1> {
76+
//! Type of value
77+
using ValueType = Float16;
78+
79+
//! Compute the distance between matrix and query
80+
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
81+
float *out);
82+
};
83+
84+
template <>
85+
struct SquaredEuclideanDistanceMatrix<float, 1, 1> {
86+
//! Type of value
87+
using ValueType = float;
88+
89+
//! Compute the distance between matrix and query
90+
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
91+
float *out);
92+
};
93+
5194
/*! Squared Euclidean Distance Matrix
5295
*/
5396
template <typename T, size_t M, size_t N>
@@ -353,32 +396,6 @@ struct SquaredEuclideanDistanceMatrix<uint8_t, M, 1,
353396
}
354397
};
355398

356-
#if !defined(__SSE4_1__)
357-
/*! Squared Euclidean Distance Matrix (INT4, M=1, N=1)
358-
*/
359-
template <>
360-
struct SquaredEuclideanDistanceMatrix<uint8_t, 1, 1> {
361-
//! Type of value
362-
using ValueType = uint8_t;
363-
364-
//! Compute the distance between matrix and query
365-
static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
366-
float *out) {
367-
ailego_assert(m && q && dim && !(dim & 1) && out);
368-
369-
float sum = 0.0;
370-
for (size_t i = 0; i < (dim >> 1); ++i) {
371-
uint8_t m_val = m[i];
372-
uint8_t q_val = q[i];
373-
sum +=
374-
Int4SquaredDiffTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +
375-
Int4SquaredDiffTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];
376-
}
377-
*out = sum;
378-
}
379-
};
380-
#endif // !__SSE4_1__
381-
382399
/*! Euclidean Distance Matrix
383400
*/
384401
template <typename T, size_t M, size_t N,
@@ -424,76 +441,26 @@ struct EuclideanDistanceMatrix<
424441
}
425442
};
426443

427-
#if !defined(__SSE4_1__)
428-
/*! Euclidean Distance Matrix (INT4, M=1, N=1)
429-
*/
430444
template <>
431445
struct EuclideanDistanceMatrix<uint8_t, 1, 1> {
432446
//! Type of value
433447
using ValueType = uint8_t;
434448

435-
//! Compute the distance between matrix and query
436-
static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
437-
float *out) {
438-
ailego_assert(m && q && dim && !(dim & 1) && out);
439-
440-
float sum = 0.0;
441-
for (size_t i = 0; i < (dim >> 1); ++i) {
442-
uint8_t m_val = m[i];
443-
uint8_t q_val = q[i];
444-
sum +=
445-
Int4SquaredDiffTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +
446-
Int4SquaredDiffTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];
447-
}
448-
*out = std::sqrt(sum);
449-
}
450-
};
451-
#endif // !__SSE4_1__
452-
453-
#if defined(__SSE__) || defined(__ARM_NEON)
454-
/*! Squared Euclidean Distance Matrix (FP32, M=1, N=1)
455-
*/
456-
template <>
457-
struct SquaredEuclideanDistanceMatrix<float, 1, 1> {
458-
//! Type of value
459-
using ValueType = float;
460-
461-
//! Compute the distance between matrix and query
462-
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
463-
float *out);
464-
};
465-
#endif // __SSE__ || __ARM_NEON
466-
467-
#if defined(__SSE__) || (defined(__ARM_NEON) && (defined(__aarch64__)))
468-
/*! Euclidean Distance Matrix (FP32, M=1, N=1)
469-
*/
470-
template <>
471-
struct EuclideanDistanceMatrix<float, 1, 1> {
472-
//! Type of value
473-
using ValueType = float;
474-
475449
//! Compute the distance between matrix and query
476450
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
477451
float *out);
478452
};
479-
#endif // __SSE__ || __ARM_NEON && __aarch64__
480453

481-
#if (defined(__F16C__) && defined(__AVX__)) || \
482-
(defined(__ARM_NEON) && defined(__aarch64__))
483-
/*! Squared Euclidean Distance Matrix (FP16, M=1, N=1)
484-
*/
485454
template <>
486-
struct SquaredEuclideanDistanceMatrix<Float16, 1, 1> {
455+
struct EuclideanDistanceMatrix<int8_t, 1, 1> {
487456
//! Type of value
488-
using ValueType = Float16;
457+
using ValueType = int8_t;
489458

490459
//! Compute the distance between matrix and query
491460
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
492461
float *out);
493462
};
494463

495-
/*! Euclidean Distance Matrix (FP16, M=1, N=1)
496-
*/
497464
template <>
498465
struct EuclideanDistanceMatrix<Float16, 1, 1> {
499466
//! Type of value
@@ -503,58 +470,21 @@ struct EuclideanDistanceMatrix<Float16, 1, 1> {
503470
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
504471
float *out);
505472
};
506-
#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)
507473

508-
#if defined(__SSE4_1__)
509-
/*! Squared Euclidean Distance Matrix (INT8, M=1, N=1)
510-
*/
511474
template <>
512-
struct SquaredEuclideanDistanceMatrix<int8_t, 1, 1> {
513-
//! Type of value
514-
using ValueType = int8_t;
515-
516-
//! Compute the distance between matrix and query
517-
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
518-
float *out);
519-
};
520-
521-
/*! Euclidean Distance Matrix (INT8, M=1, N=1)
522-
*/
523-
template <>
524-
struct EuclideanDistanceMatrix<int8_t, 1, 1> {
525-
//! Type of value
526-
using ValueType = int8_t;
527-
528-
//! Compute the distance between matrix and query
529-
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
530-
float *out);
531-
};
532-
533-
/*! Squared Euclidean Distance Matrix (INT4, M=1, N=1)
534-
*/
535-
template <>
536-
struct SquaredEuclideanDistanceMatrix<uint8_t, 1, 1> {
475+
struct EuclideanDistanceMatrix<float, 1, 1> {
537476
//! Type of value
538-
using ValueType = uint8_t;
477+
using ValueType = float;
539478

540479
//! Compute the distance between matrix and query
541480
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
542481
float *out);
543482
};
544483

545-
/*! Euclidean Distance Matrix (INT4, M=1, N=1)
546-
*/
547-
template <>
548-
struct EuclideanDistanceMatrix<uint8_t, 1, 1> {
549-
//! Type of value
550-
using ValueType = uint8_t;
551-
552-
//! Compute the distance between matrix and query
553-
static void Compute(const ValueType *m, const ValueType *q, size_t dim,
554-
float *out);
555-
};
556-
#endif // __SSE4_1__
557484

485+
//--------------------------------------------------
486+
// Sparse
487+
//--------------------------------------------------
558488
/*! Squared Euclidean Distance Sparse Matrix
559489
*/
560490
template <typename T>

src/ailego/math/euclidean_distance_matrix_fp16_avx.cc

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,13 @@ namespace ailego {
2121

2222
#if defined(__AVX__)
2323

24-
void SquaredEuclideanDistanceAVX(const Float16 *lhs, const Float16 *rhs,
25-
size_t size, float *out) {
26-
ACCUM_FP16_1X1_AVX(lhs, rhs, size, out, 0ull, )
27-
}
24+
float SquaredEuclideanDistanceFp16AVX(const Float16 *lhs, const Float16 *rhs,
25+
size_t size) {
26+
float score{0.0f};
27+
28+
ACCUM_FP16_1X1_AVX(lhs, rhs, size, &score, 0ull, )
2829

29-
//! EuclideanDistance
30-
void EuclideanDistanceAVX(const Float16 *lhs, const Float16 *rhs, size_t size,
31-
float *out) {
32-
ACCUM_FP16_1X1_AVX(lhs, rhs, size, out, 0ull, std::sqrt)
30+
return score;
3331
}
3432

3533
#endif // __AVX__

0 commit comments

Comments
 (0)