Skip to content

Commit 685ee99

Browse files
jakemasclaude
authored andcommitted
ML-DSA: add build support and importer for x86_64 assembly backend
Add CMake support to compile mldsa-native x86_64 assembly files, a custom mldsa_x86_64_meta.h declaring only the assembly-backed native operations (NTT, INTT, nttunpack, pointwise, polyvecl_pointwise_acc), and the importer script to pull them from upstream. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c6104fa commit 685ee99

5 files changed

Lines changed: 248 additions & 9 deletions

File tree

crypto/fipsmodule/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,24 @@ if((ARCH STREQUAL "x86_64") AND UNIX AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)
397397

398398
endif()
399399

400+
# mldsa-native assembly files can be compiled on Unix platforms for x86_64 only.
401+
if((ARCH STREQUAL "x86_64") AND UNIX AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)
402+
403+
set(MLDSA_NATIVE_DIR "${AWSLC_SOURCE_DIR}/crypto/fipsmodule/ml_dsa")
404+
405+
# Every .S file in this directory is imported by importer.sh and must be
406+
# compiled; glob so that refreshes which add/remove files don't need a
407+
# matching edit here. CONFIGURE_DEPENDS makes CMake re-run when the set of
408+
# matching files changes.
409+
file(GLOB MLDSA_NATIVE_X86_64_ASM_SOURCES CONFIGURE_DEPENDS
410+
"${MLDSA_NATIVE_DIR}/mldsa/native/x86_64/src/*.S")
411+
412+
list(APPEND BCM_ASM_SOURCES ${MLDSA_NATIVE_X86_64_ASM_SOURCES})
413+
414+
set(S2N_BIGNUM_INCLUDE_DIR "${AWSLC_SOURCE_DIR}/third_party/s2n-bignum/s2n-bignum-imported/include")
415+
416+
endif()
417+
400418

401419
if(FIPS_DELOCATE)
402420
if(FIPS_SHARED)

crypto/fipsmodule/ml_dsa/importer.sh

Lines changed: 88 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,56 @@ popd
7272

7373
echo "Pull source code from remote repository..."
7474

75-
# Copy mldsa-native source tree -- C source only (no native backends for now)
75+
# Copy mldsa-native source tree -- C source
7676
mkdir $SRC
77-
cp $TMP/mldsa/src/* $SRC
77+
# Copy only files (not subdirectories like native/ and fips202/)
78+
find $TMP/mldsa/src -maxdepth 1 -type f -exec cp {} $SRC \;
79+
80+
# Copy x86_64 backend
81+
# We import only the assembly-backed operations (NTT, INTT, nttunpack,
82+
# pointwise, polyvecl_pointwise_acc). The AVX2 C-intrinsic operations
83+
# (rej_uniform, decompose, use_hint, chknorm, caddq, polyz_unpack) are
84+
# intentionally excluded.
85+
#
86+
# The upstream meta.h advertises both assembly and C-intrinsic operations.
87+
# Rather than modify it, we keep a hand-maintained replacement in
88+
# ../mldsa_x86_64_meta.h (referenced via MLD_CONFIG_ARITH_BACKEND_FILE) that
89+
# declares only the assembly-backed subset. Upstream meta.h is not copied.
90+
mkdir -p $SRC/native/x86_64/src
91+
# Backend API and specification assumed by mldsa-native frontend
92+
cp $TMP/mldsa/src/native/api.h $SRC/native
93+
# Backend header -- unused C-intrinsic declarations are harmless and left intact
94+
cp $TMP/mldsa/src/native/x86_64/src/arith_native_x86_64.h $SRC/native/x86_64/src
95+
# Shared constants (zetas table); needed by the assembly kernels
96+
cp $TMP/mldsa/src/native/x86_64/src/consts.h $SRC/native/x86_64/src
97+
cp $TMP/mldsa/src/native/x86_64/src/consts.c $SRC/native/x86_64/src
98+
# Assembly source files for the operations we import (NTT, INTT, nttunpack,
99+
# pointwise, polyvecl_pointwise_acc). Only files with verified proofs are
100+
# included.
101+
cp $TMP/mldsa/src/native/x86_64/src/ntt_avx2_asm.S $SRC/native/x86_64/src
102+
cp $TMP/mldsa/src/native/x86_64/src/intt_avx2_asm.S $SRC/native/x86_64/src
103+
cp $TMP/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S $SRC/native/x86_64/src
104+
cp $TMP/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S $SRC/native/x86_64/src
105+
cp $TMP/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S $SRC/native/x86_64/src
106+
cp $TMP/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S $SRC/native/x86_64/src
107+
cp $TMP/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S $SRC/native/x86_64/src
78108

79109
# We use the custom `mldsa_native_config.h`, so can remove the default one
80-
rm $SRC/config.h
110+
rm -f $SRC/config.h
81111

82112
# Copy formatting file
83113
cp $TMP/.clang-format $SRC
84114

115+
if [[ "$(uname)" == "Darwin" ]]; then
116+
SED_I=(-i "")
117+
else
118+
SED_I=(-i)
119+
fi
120+
121+
# ================================================================
122+
# Process mldsa_native_bcm.c
123+
# ================================================================
124+
85125
# Copy and statically simplify BCM file
86126
# The static simplification is not necessary, but improves readability
87127
# by removing directives related to the FIPS-202 backend that we provide
@@ -91,12 +131,6 @@ unifdef -DMLD_CONFIG_FIPS202_CUSTOM_HEADER \
91131
$TMP/mldsa/mldsa_native.c \
92132
> $SRC/mldsa_native_bcm.c
93133

94-
if [[ "$(uname)" == "Darwin" ]]; then
95-
SED_I=(-i "")
96-
else
97-
SED_I=(-i)
98-
fi
99-
100134
# Copy mldsa-native header
101135
# This is only needed for access to the various macros defining key sizes.
102136
# The function declarations itself are all visible in ml_dsa.c by virtue
@@ -110,6 +144,51 @@ cp $TMP/mldsa/mldsa_native.h $SRC
110144
echo "Fixup include paths"
111145
sed "${SED_I[@]}" 's/#include "src\/\([^"]*\)"/#include "\1"/' $SRC/mldsa_native_bcm.c
112146

147+
# Drop #include directives for the C-intrinsic .c files we did not import.
148+
# Only consts.c (shared with the assembly backend) needs to be compiled.
149+
echo "Strip C-intrinsic includes from mldsa_native_bcm.c"
150+
BCM=$SRC/mldsa_native_bcm.c
151+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/poly_caddq_avx2\.c"/d' "$BCM"
152+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/poly_chknorm_avx2\.c"/d' "$BCM"
153+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/poly_decompose_32_avx2\.c"/d' "$BCM"
154+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/poly_decompose_88_avx2\.c"/d' "$BCM"
155+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/poly_use_hint_32_avx2\.c"/d' "$BCM"
156+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/poly_use_hint_88_avx2\.c"/d' "$BCM"
157+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/polyz_unpack_17_avx2\.c"/d' "$BCM"
158+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/polyz_unpack_19_avx2\.c"/d' "$BCM"
159+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/rej_uniform_avx2\.c"/d' "$BCM"
160+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/rej_uniform_eta2_avx2\.c"/d' "$BCM"
161+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/rej_uniform_eta4_avx2\.c"/d' "$BCM"
162+
sed "${SED_I[@]}" '/^#include "native\/x86_64\/src\/rej_uniform_table\.c"/d' "$BCM"
163+
164+
# ================================================================
165+
# Fixup x86_64 assembly backend to use s2n-bignum macros
166+
# ================================================================
167+
168+
echo "Fixup x86_64 assembly backend to use s2n-bignum macros"
169+
for file in $SRC/native/x86_64/src/*.S; do
170+
echo "Processing $file"
171+
tmp_file=$(mktemp)
172+
173+
backend_define="MLD_ARITH_BACKEND_X86_64_DEFAULT"
174+
175+
# Flatten multiline preprocessor directives, then process with unifdef
176+
sed -e ':a' -e 'N' -e '$!ba' -e 's/\\\n/ /g' "$file" | \
177+
unifdef -D$backend_define -UMLD_CONFIG_MULTILEVEL_NO_SHARED -DMLD_CONFIG_MULTILEVEL_WITH_SHARED > "$tmp_file"
178+
mv "$tmp_file" "$file"
179+
180+
# Replace common.h include and assembly macros
181+
s2n_header="_internal_s2n_bignum_x86_att.h"
182+
sed "${SED_I[@]}" "s/#include \"\.\.\/\.\.\/\.\.\/common\.h\"/#include \"$s2n_header\"/" "$file"
183+
184+
func_name=$(grep -o '\.global MLD_ASM_NAMESPACE(\([^)]*\))' "$file" | sed 's/\.global MLD_ASM_NAMESPACE(\([^)]*\))/\1/')
185+
if [ -n "$func_name" ]; then
186+
sed "${SED_I[@]}" "s/\.global MLD_ASM_NAMESPACE($func_name)/ S2N_BN_SYM_VISIBILITY_DIRECTIVE(mldsa_$func_name)\n S2N_BN_SYM_PRIVACY_DIRECTIVE(mldsa_$func_name)/" "$file"
187+
sed "${SED_I[@]}" "s/MLD_ASM_FN_SYMBOL($func_name)/S2N_BN_SYMBOL(mldsa_$func_name):/" "$file"
188+
sed "${SED_I[@]}" "s/MLD_ASM_FN_SIZE($func_name)/S2N_BN_SIZE_DIRECTIVE(mldsa_$func_name)/" "$file"
189+
fi
190+
done
191+
113192
echo "Remove temporary artifacts ..."
114193
rm -rf $TMP
115194

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0 OR ISC
3+
4+
#ifndef MLDSA_NATIVE_BACKEND_H
5+
#define MLDSA_NATIVE_BACKEND_H
6+
7+
#include <openssl/target.h>
8+
9+
#if !defined(OPENSSL_NO_ASM) && \
10+
(defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE))
11+
12+
#if defined(OPENSSL_X86_64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)
13+
#include "mldsa_x86_64_meta.h"
14+
#endif
15+
16+
#endif
17+
18+
#endif /* MLDSA_NATIVE_BACKEND_H */

crypto/fipsmodule/ml_dsa/mldsa_native_config.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,8 @@ static MLD_INLINE void *mld_memset(void *s, int c, size_t n) {
116116
#define MLD_CONFIG_NO_ASM
117117
#endif
118118

119+
// Enable x86_64 arithmetic backend and set path
120+
#define MLD_CONFIG_USE_NATIVE_BACKEND_ARITH
121+
#define MLD_CONFIG_ARITH_BACKEND_FILE "../mldsa_native_backend.h"
122+
119123
#endif // MLD_CONFIG_H
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0 OR ISC
3+
4+
/*
5+
* Custom x86_64 backend header for the mldsa-native import.
6+
*
7+
* mldsa-native's upstream meta.h declares native implementations for both
8+
* assembly-backed operations (NTT, INTT, pointwise multiplication) and
9+
* AVX2 C-intrinsic operations (rej_uniform, decompose, use_hint, chknorm,
10+
* caddq, polyz_unpack). AWS-LC only imports the assembly-backed operations,
11+
* so we replace the upstream meta.h with this trimmed-down version that
12+
* declares only the subset we actually provide.
13+
*
14+
* Kept outside the imported `mldsa/` tree so that `importer.sh` does not
15+
* need to modify upstream sources.
16+
*/
17+
18+
#ifndef MLD_NATIVE_X86_64_META_H
19+
#define MLD_NATIVE_X86_64_META_H
20+
21+
/* Identifier for this backend so that source and assembly files
22+
* in the build can be appropriately guarded. */
23+
#define MLD_ARITH_BACKEND_X86_64_DEFAULT
24+
25+
#define MLD_USE_NATIVE_NTT_CUSTOM_ORDER
26+
#define MLD_USE_NATIVE_NTT
27+
#define MLD_USE_NATIVE_INTT
28+
#define MLD_USE_NATIVE_POINTWISE_MONTGOMERY
29+
#define MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L4
30+
#define MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L5
31+
#define MLD_USE_NATIVE_POLYVECL_POINTWISE_ACC_MONTGOMERY_L7
32+
33+
#if !defined(__ASSEMBLER__)
34+
#include "mldsa/native/api.h"
35+
#include "mldsa/native/x86_64/src/arith_native_x86_64.h"
36+
37+
static MLD_INLINE void mld_poly_permute_bitrev_to_custom(int32_t data[MLDSA_N])
38+
{
39+
if (mld_sys_check_capability(MLD_SYS_CAP_AVX2))
40+
{
41+
mld_nttunpack_avx2_asm(data);
42+
}
43+
}
44+
45+
MLD_MUST_CHECK_RETURN_VALUE
46+
static MLD_INLINE int mld_ntt_native(int32_t data[MLDSA_N])
47+
{
48+
if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2))
49+
{
50+
return MLD_NATIVE_FUNC_FALLBACK;
51+
}
52+
mld_ntt_avx2_asm(data, mld_qdata);
53+
return MLD_NATIVE_FUNC_SUCCESS;
54+
}
55+
56+
MLD_MUST_CHECK_RETURN_VALUE
57+
static MLD_INLINE int mld_intt_native(int32_t data[MLDSA_N])
58+
{
59+
if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2))
60+
{
61+
return MLD_NATIVE_FUNC_FALLBACK;
62+
}
63+
mld_invntt_avx2_asm(data, mld_qdata);
64+
return MLD_NATIVE_FUNC_SUCCESS;
65+
}
66+
67+
MLD_MUST_CHECK_RETURN_VALUE
68+
static MLD_INLINE int mld_poly_pointwise_montgomery_native(
69+
int32_t a[MLDSA_N], const int32_t b[MLDSA_N])
70+
{
71+
if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2))
72+
{
73+
return MLD_NATIVE_FUNC_FALLBACK;
74+
}
75+
mld_pointwise_avx2_asm(a, b, mld_qdata);
76+
return MLD_NATIVE_FUNC_SUCCESS;
77+
}
78+
79+
MLD_MUST_CHECK_RETURN_VALUE
80+
static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l4_native(
81+
int32_t w[MLDSA_N], const int32_t u[4][MLDSA_N],
82+
const int32_t v[4][MLDSA_N])
83+
{
84+
if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2))
85+
{
86+
return MLD_NATIVE_FUNC_FALLBACK;
87+
}
88+
mld_pointwise_acc_l4_avx2_asm(w, u, v, mld_qdata);
89+
return MLD_NATIVE_FUNC_SUCCESS;
90+
}
91+
92+
MLD_MUST_CHECK_RETURN_VALUE
93+
static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l5_native(
94+
int32_t w[MLDSA_N], const int32_t u[5][MLDSA_N],
95+
const int32_t v[5][MLDSA_N])
96+
{
97+
if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2))
98+
{
99+
return MLD_NATIVE_FUNC_FALLBACK;
100+
}
101+
mld_pointwise_acc_l5_avx2_asm(w, u, v, mld_qdata);
102+
return MLD_NATIVE_FUNC_SUCCESS;
103+
}
104+
105+
MLD_MUST_CHECK_RETURN_VALUE
106+
static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l7_native(
107+
int32_t w[MLDSA_N], const int32_t u[7][MLDSA_N],
108+
const int32_t v[7][MLDSA_N])
109+
{
110+
if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2))
111+
{
112+
return MLD_NATIVE_FUNC_FALLBACK;
113+
}
114+
mld_pointwise_acc_l7_avx2_asm(w, u, v, mld_qdata);
115+
return MLD_NATIVE_FUNC_SUCCESS;
116+
}
117+
118+
#endif /* !__ASSEMBLER__ */
119+
120+
#endif /* !MLD_NATIVE_X86_64_META_H */

0 commit comments

Comments
 (0)