Skip to content

Commit 32ba4fa

Browse files
committed
improvements for ring lpn
1 parent 05a8e77 commit 32ba4fa

File tree

6 files changed

+211
-13
lines changed

6 files changed

+211
-13
lines changed

cmake/CheckISA.cmake

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
include(CheckCXXCompilerFlag)
2+
include(CheckCXXSourceRuns)
3+
4+
# Helper to test: compiler accepts flag AND the produced binary actually runs.
5+
function(check_isa_runs ISA_NAME ISA_FLAG TEST_SRC OUT_VAR)
6+
set(${OUT_VAR} FALSE PARENT_SCOPE)
7+
8+
# 1) Does the compiler accept the flag?
9+
10+
#if("${ISA_FLAG}" STREQUAL "")
11+
# # No flag to test, assume true (e.g. BMI2 on MSVC)
12+
# message(STATUS " ${ISA_NAME}: no special compiler flag to #test")
13+
# set(HAVE_FLAG TRUE)
14+
#else()
15+
# message(STATUS " ${ISA_NAME}: testing compiler flag: #${ISA_FLAG}")
16+
# check_cxx_compiler_flag(${ISA_FLAG} HAVE_FLAG)
17+
#endif()
18+
#
19+
#if (NOT HAVE_FLAG)
20+
# message(STATUS " ${ISA_NAME}: compiler does not accept #flag: ${ISA_FLAG}")
21+
# return()
22+
#endif()
23+
24+
# 2) On cross builds we can’t run the test — be conservative
25+
if (CMAKE_CROSSCOMPILING)
26+
message(STATUS "${ISA_NAME}: cross-compiling; skipping run test -> FALSE")
27+
return()
28+
endif()
29+
30+
# 3) Unique cache key per (ISA, flag, source)
31+
string(MD5 _hash "${TEST_SRC}||${ISA_FLAG}")
32+
set(_KEY "RUN_${ISA_NAME}_${_hash}")
33+
34+
# Ensure a fresh check whenever source/flag changes
35+
#unset(${_KEY} CACHE)
36+
37+
# Pass the ISA flag to the try_run compile
38+
set(_SAVE_REQ_FLAGS "${CMAKE_REQUIRED_FLAGS}")
39+
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${ISA_FLAG}")
40+
41+
# Optional: see compiler errors from try_compile/try_run
42+
set(CMAKE_REQUIRED_QUIET TRUE)
43+
44+
45+
check_cxx_source_runs("${TEST_SRC}" ${_KEY})
46+
47+
#message(STATUS "${CMAKE_REQUIRED_FLAGS}\n\n${TEST_SRC}")
48+
49+
set(CMAKE_REQUIRED_FLAGS "${_SAVE_REQ_FLAGS}")
50+
51+
set(${OUT_VAR} ${${_KEY}} PARENT_SCOPE)
52+
if(${${_KEY}})
53+
set(RESULT TRUE)
54+
else()
55+
set(RESULT FALSE)
56+
endif()
57+
message(STATUS "${ISA_NAME}: runtime support ${RESULT} (flag=${ISA_FLAG})")
58+
endfunction()
59+
60+
# ---- Test sources (each *executes* a distinctive intrinsic) ----
61+
set(TEST_SSE2 "
62+
#include <emmintrin.h>
63+
int main(){
64+
__m128i a = _mm_set1_epi32(1), b = _mm_set1_epi32(2);
65+
__m128i c = _mm_add_epi32(a,b);
66+
volatile int v = _mm_cvtsi128_si32(c);
67+
(void)v; return 0;
68+
}")
69+
70+
set(TEST_AVX "
71+
#include <immintrin.h>
72+
int main(){
73+
__m256 a = _mm256_set1_ps(1.0f), b = _mm256_set1_ps(2.0f);
74+
__m256 c = _mm256_add_ps(a,b);
75+
volatile float v = ((float*)&c)[0];
76+
(void)v; return 0;
77+
}")
78+
79+
set(TEST_AVX2 "
80+
#include <immintrin.h>
81+
int main(){
82+
__m256i a = _mm256_set1_epi32(3), b = _mm256_set1_epi32(4);
83+
__m256i c = _mm256_mullo_epi32(a,b); // AVX2-specific integer op
84+
volatile int v = ((int*)&c)[0];
85+
(void)v; return 0;
86+
}")
87+
88+
set(TEST_AVX512 [[
89+
#include <immintrin.h>
90+
int main(){
91+
alignas(64) int d[16] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
92+
volatile int u = d[0];
93+
__m512i X = _mm512_loadu_si512((const void*)d);
94+
__m512i Y = _mm512_add_epi32(X, _mm512_set1_epi32(u));
95+
_mm512_storeu_si512((void*)d, Y);
96+
return d[0]==0;
97+
}]])
98+
99+
set(TEST_BMI2 [[
100+
#include <immintrin.h>
101+
#include <cstdlib>
102+
int main(){
103+
long long unsigned int x = 0b1100;
104+
volatile unsigned long long y = _mulx_u64(rand(), rand(), &x);
105+
(void)y; return y==0b10;
106+
}]])
107+
108+
# ---- Run the checks (MSVC flags; add GCC/Clang alternates if you use them) ----
109+
if (MSVC)
110+
set(FLAG_SSE2 "/arch:SSE2") # default on x64 but the test is harmless
111+
set(FLAG_AVX "/arch:AVX")
112+
set(FLAG_AVX2 "/arch:AVX2")
113+
set(FLAG_AVX512 "/arch:AVX512")
114+
set(FLAG_BMI2 "/arch:AVX2") # no /arch flag for BMI2
115+
else()
116+
# clang/gcc (helps when using clang-cl or mingw)
117+
set(FLAG_SSE2 "-msse2")
118+
set(FLAG_AVX "-mavx")
119+
set(FLAG_AVX2 "-mavx2")
120+
set(FLAG_AVX512 "-mavx512f")
121+
set(FLAG_BMI2 "-mbmi2")
122+
endif()
123+
124+
check_isa_runs("SSE2" "${FLAG_SSE2}" "${TEST_SSE2}" SSE2_RUNS)
125+
check_isa_runs("AVX" "${FLAG_AVX}" "${TEST_AVX}" AVX_RUNS)
126+
check_isa_runs("AVX2" "${FLAG_AVX2}" "${TEST_AVX2}" AVX2_RUNS)
127+
check_isa_runs("AVX512" "${FLAG_AVX512}" "${TEST_AVX512}" AVX512_RUNS)
128+
check_isa_runs("BMI2" "${FLAG_BMI2}" "${TEST_BMI2}" BMI2_RUNS)
129+
130+
131+
# Expose as cache options so users can override if they *really* want.
132+
set(ENABLE_SSE_DEFAULT ${SSE2_RUNS} CACHE BOOL "Enable SSE2 codepaths by default")
133+
set(ENABLE_AVX_DEFAULT ${AVX_RUNS} CACHE BOOL "Enable AVX codepaths by default")
134+
set(ENABLE_AVX2_DEFAULT ${AVX2_RUNS} CACHE BOOL "Enable AVX2 codepaths by default")
135+
set(ENABLE_AVX512_DEFAULT ${AVX512_RUNS} CACHE BOOL "Enable AVX-512 codepaths by default")
136+
set(ENABLE_BMI2_DEFAULT ${AVX512_RUNS} CACHE BOOL "Enable AVX-512 codepaths by default")

cmake/cryptoToolsBuildOptions.cmake

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11

22
include_guard(GLOBAL)
3+
include(CheckCXXCompilerFlag)
4+
35

46
set(CRYPTOTOOLS_BUILD ON)
57

@@ -22,11 +24,32 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
2224
message(STATUS "Building for ARM")
2325
set(ENABLE_ARM_AES_DEFAULT true)
2426
set(ENABLE_SSE_DEFAULT false)
27+
set(ENABLE_AVX_DEFAULT false)
28+
set(ENABLE_AVX2_DEFAULT false)
29+
set(ENABLE_AVX512_DEFAULT false)
2530
else()
2631
# Code for other architectures
2732
message(STATUS "Building for x86-64")
2833
set(ENABLE_ARM_AES_DEFAULT false)
29-
set(ENABLE_SSE_DEFAULT true)
34+
35+
# defines ENABLE_SSE_DEFAULT, etc
36+
include(${CMAKE_CURRENT_LIST_DIR}/CheckISA.cmake)
37+
38+
#set(ENABLE_SSE_DEFAULT true)
39+
#if(MSVC)
40+
# check_cxx_compiler_flag("/arch:SSE2" ENABLE_SSE_DEFAULT)
41+
# check_cxx_compiler_flag("/arch:AVX" ENABLE_AVX_DEFAULT)
42+
# check_cxx_compiler_flag("/arch:AVX2" ENABLE_AVX2_DEFAULT)
43+
# check_cxx_compiler_flag("/arch:AVX512" ENABLE_AVX512_DEFAULT)
44+
#else()
45+
# check_cxx_compiler_flag("-msse2" ENABLE_SSE_DEFAULT)
46+
# check_cxx_compiler_flag("-mavx" ENABLE_AVX_DEFAULT)
47+
# check_cxx_compiler_flag("-mavx2" ENABLE_AVX2_DEFAULT)
48+
# check_cxx_compiler_flag("-mavx512f" ENABLE_AVX512_DEFAULT)
49+
#endif()
50+
51+
52+
3053
endif()
3154

3255

@@ -43,11 +66,15 @@ option(ENABLE_NET_LOG "compile with network logging" OFF)
4366
option(ENABLE_WOLFSSL "compiler with WolfSSL enabled" OFF)
4467
option(ENABLE_ARM_AES "compile with ARM AES instructions" ${ENABLE_ARM_AES_DEFAULT})
4568
option(ENABLE_SSE "compile with SSE instructions" ${ENABLE_SSE_DEFAULT})
46-
option(ENABLE_AVX "compile with AVX instructions" ${ENABLE_SSE})
69+
option(ENABLE_AVX "compile with AVX instructions" ${ENABLE_AVX_DEFAULT})
70+
option(ENABLE_AVX2 "compile with AVX2 instructions" ${ENABLE_AVX2_DEFAULT})
71+
option(ENABLE_AVX512 "compile with AVX512 instructions" ${ENABLE_AVX512_DEFAULT})
72+
option(ENABLE_BMI2 "compile with BMI2 instructions" ${ENABLE_BMI2_DEFAULT})
4773
option(ENABLE_BOOST "compile with BOOST networking integration" OFF)
4874
option(ENABLE_OPENSSL "compile with OpenSSL networking integration" OFF)
4975
option(ENABLE_ASAN "build with asan" OFF)
5076
option(ENABLE_PIC "compile with -fPIC " OFF)
77+
5178
option(VERBOSE_FETCH "" ON)
5279

5380
if(NOT DEFINED CRYPTO_TOOLS_STD_VER)
@@ -148,6 +175,9 @@ message(STATUS "Option: ENABLE_PORTABLE_AES = ${ENABLE_PORTABLE_AES}")
148175

149176
message(STATUS "Option: ENABLE_SSE = ${ENABLE_SSE}")
150177
message(STATUS "Option: ENABLE_AVX = ${ENABLE_AVX}")
178+
message(STATUS "Option: ENABLE_AVX2 = ${ENABLE_AVX2}")
179+
message(STATUS "Option: ENABLE_AVX512 = ${ENABLE_AVX512}")
180+
message(STATUS "Option: ENABLE_BMI2 = ${ENABLE_BMI2}")
151181
message(STATUS "Option: ENABLE_PIC = ${ENABLE_PIC}")
152182
message(STATUS "Option: ENABLE_ASAN = ${ENABLE_ASAN}\n\n")
153183

cryptoTools/CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ if(MSVC)
2525

2626
target_compile_options(cryptoTools PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/std:c++${CRYPTO_TOOLS_STD_VER}>)
2727

28+
if(ENABLE_AVX512)
29+
target_compile_options(cryptoTools PUBLIC /arch:AVX512)
30+
elseif(ENABLE_AVX2)
31+
target_compile_options(cryptoTools PUBLIC /arch:AVX2)
32+
elseif(ENABLE_AVX)
33+
target_compile_options(cryptoTools PUBLIC /arch:AVX)
34+
elseif(ENABLE_SSE)
35+
target_compile_options(cryptoTools PUBLIC /arch:SSE2)
36+
endif()
37+
2838
else()
2939
target_compile_options(cryptoTools PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-std=c++${CRYPTO_TOOLS_STD_VER}> -pthread)
3040

@@ -35,8 +45,18 @@ else()
3545
target_compile_options(cryptoTools PUBLIC -maes -msse2 -msse3 -mssse3 -msse4.1 -mpclmul)
3646
endif()
3747
if(ENABLE_AVX)
48+
target_compile_options(cryptoTools PUBLIC -mavx)
49+
endif()
50+
if(ENABLE_AVX2)
3851
target_compile_options(cryptoTools PUBLIC -mavx2)
3952
endif()
53+
if(ENABLE_AVX512)
54+
target_compile_options(cryptoTools PUBLIC -mavx512f -mavx512vl -mavx512bw -mavx512dq)
55+
endif()
56+
if(ENABLE_BMI2)
57+
target_compile_options(cryptoTools PUBLIC -mbmi2)
58+
endif()
59+
4060
if(ENABLE_ARM_AES)
4161
if(NOT DEFINED ARM_ARCHITECTURE)
4262
set(ARM_ARCHITECTURE native)

cryptoTools/Common/Aligned.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,8 @@ namespace osuCrypto
276276
if (oldCap)
277277
{
278278
auto m = std::min<size_t>(oldSpan.size(), n);
279-
std::copy(oldSpan.begin(), oldSpan.begin() + m, mSpan.begin());
279+
if(m)
280+
std::copy(oldSpan.begin(), oldSpan.begin() + m, mSpan.begin());
280281

281282
Allocator::deallocate(oldSpan.data(), mCapacity);
282283
}

cryptoTools/Common/config.h.in

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@
4141
// enable the use of intel AVX instructions.
4242
#cmakedefine ENABLE_AVX @ENABLE_AVX@
4343

44+
// enable the use of intel AVX2 instructions.
45+
#cmakedefine ENABLE_AVX2 @ENABLE_AVX2@
46+
47+
// enable the use of intel AVX512 instructions.
48+
#cmakedefine ENABLE_AVX512 @ENABLE_AVX512@
49+
50+
// enable the use of intel BMI2 instructions.
51+
#cmakedefine ENABLE_BMI2 @ENABLE_BMI2@
52+
53+
54+
4455
// enable the use of the portable AES implementation.
4556
#cmakedefine ENABLE_PORTABLE_AES @ENABLE_PORTABLE_AES@
4657

cryptoTools/Crypto/AES.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ namespace osuCrypto {
522522

523523

524524
template<AESTypes type>
525-
block AES<type>::roundEnc(block state, const block& roundKey)
525+
OC_FORCEINLINE block AES<type>::roundEnc(block state, const block& roundKey)
526526
{
527527
if constexpr (type == AESTypes::ARM)
528528
{
@@ -537,7 +537,7 @@ namespace osuCrypto {
537537

538538
#ifdef OC_ENABLE_PORTABLE_AES
539539
template<>
540-
inline block AES<Portable>::firstFn(block state, const block& roundKey)
540+
OC_FORCEINLINE block AES<Portable>::firstFn(block state, const block& roundKey)
541541
{
542542
return state ^ roundKey;
543543
}
@@ -548,34 +548,34 @@ namespace osuCrypto {
548548
#ifdef OC_ENABLE_AESNI
549549

550550
template<>
551-
inline block AES<NI>::firstFn(block state, const block& roundKey)
551+
OC_FORCEINLINE block AES<NI>::firstFn(block state, const block& roundKey)
552552
{
553553
return state ^ roundKey;
554554
}
555555

556556
template<>
557-
inline block AES<NI>::roundFn(block state, const block& roundKey)
557+
OC_FORCEINLINE block AES<NI>::roundFn(block state, const block& roundKey)
558558
{
559559
return _mm_aesenc_si128(state, roundKey);
560560
}
561561

562562
template<>
563-
inline block AES<NI>::penultimateFn(block state, const block& roundKey)
563+
OC_FORCEINLINE block AES<NI>::penultimateFn(block state, const block& roundKey)
564564
{
565565
return roundFn(state, roundKey);
566566
}
567567

568568

569569
template<>
570-
inline block AES<NI>::finalFn(block state, const block& roundKey)
570+
OC_FORCEINLINE block AES<NI>::finalFn(block state, const block& roundKey)
571571
{
572572
return _mm_aesenclast_si128(state, roundKey);
573573
}
574574

575575
#elif defined(ENABLE_ARM_AES)
576576

577577
template<>
578-
inline block AES<ARM>::firstFn(block state, const block& roundKey)
578+
OC_FORCEINLINE block AES<ARM>::firstFn(block state, const block& roundKey)
579579
{
580580
block r;
581581
r.mData = vaeseq_u8(state.mData, roundKey.mData);
@@ -584,21 +584,21 @@ namespace osuCrypto {
584584
}
585585

586586
template<>
587-
inline block AES<ARM>::roundFn(block state, const block& roundKey)
587+
OC_FORCEINLINE block AES<ARM>::roundFn(block state, const block& roundKey)
588588
{
589589
return firstFn(state, roundKey);
590590
}
591591

592592
template<>
593-
inline block AES<ARM>::penultimateFn(block state, const block& roundKey)
593+
OC_FORCEINLINE block AES<ARM>::penultimateFn(block state, const block& roundKey)
594594
{
595595
block r;
596596
r.mData = vaeseq_u8(state.mData, roundKey.mData);
597597
return r;
598598
}
599599

600600
template<>
601-
inline block AES<ARM>::finalFn(block state, const block& roundKey)
601+
OC_FORCEINLINE block AES<ARM>::finalFn(block state, const block& roundKey)
602602
{
603603
return state ^ roundKey;
604604
}

0 commit comments

Comments
 (0)