Skip to content

Commit 7e2cbb8

Browse files
committed
aes-gcm-avx512: guard preload/ADDBE aliasing and add a build-time toggle
The AES round-key preload aliases the ADDBE counter constants (%zmm27/28) onto preloaded key registers in the >=704-byte 16-/32-block loops. Extend the counter-setup hardening already present in GHASH_16_ENCRYPT_16_PARALLEL to its siblings INITIAL_BLOCKS_16 and GHASH_16_ENCRYPT_N_GHASH_N: their non-overflow counter increments now use the ddq_addbe_*(%rip) memory constants with a build-time check that the ADDBE register args are not referenced, so a future reordering cannot add AES-key bytes into the CTR block (keystream/nonce reuse, CWE-323). Also add $PRELOAD_AES_ROUND_KEYS (overridable at build time via the OPENSSL_AESGCM_NO_KEYPRELOAD environment variable, default = preload on) so the with/without-preload variants can be built from a single source for A/B evaluation.
1 parent a15ced5 commit 7e2cbb8

1 file changed

Lines changed: 86 additions & 14 deletions

File tree

crypto/modes/asm/aes-gcm-avx512.pl

Lines changed: 86 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,17 @@
125125
# ; Currently disabled, as this check is handled outside.
126126
my $CHECK_FUNCTION_ARGUMENTS = 0;
127127

128+
# ; Preload AES round keys 0-3 into dedicated ZMM registers for the >=704-byte
129+
# ; 16-/32-block hot loops, instead of reloading them from memory each
130+
# ; iteration. This is the optimization currently under evaluation: on some
131+
# ; microarchitectures its benefit is negligible, and it forces aliasing the
132+
# ; ADDBE counter constants onto AES-key registers (see the counter-setup
133+
# ; invariants in GHASH_16_ENCRYPT_16_PARALLEL / _N_GHASH_N / INITIAL_BLOCKS_16).
134+
# ; Set to 0 to disable it (round keys are then reloaded from memory). The
135+
# ; OPENSSL_AESGCM_NO_KEYPRELOAD build-time environment variable overrides this
136+
# ; to 0, so both variants can be built from a single source for A/B evaluation.
137+
my $PRELOAD_AES_ROUND_KEYS = $ENV{OPENSSL_AESGCM_NO_KEYPRELOAD} ? 0 : 1;
138+
128139
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
129140
# ;;; Global constants
130141
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -2351,9 +2362,38 @@ sub GHASH_16_ENCRYPT_N_GHASH_N {
23512362
jae .L_16_blocks_overflow_${label_suffix}
23522363
___
23532364
2354-
&ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16(
2355-
$NUM_BLOCKS, "vpaddd", $B00_03, $B04_07, $B08_11, $B12_15, $CTR_BE,
2356-
$B00_03, $B04_07, $B08_11, $ADDBE_1234, $ADDBE_4x4, $ADDBE_4x4, $ADDBE_4x4);
2365+
# ;; INVARIANT (do not break): the counter increments below MUST use the
2366+
# ;; ddq_addbe_*(%rip) memory constants and MUST NOT reference the
2367+
# ;; $ADDBE_4x4 / $ADDBE_1234 register arguments (args 34/35).
2368+
# ;;
2369+
# ;; Rationale: the large-message callers in GCM_ENC_DEC (the 16-block hot
2370+
# ;; loop and the 32-block big loop) deliberately alias those two ZMM
2371+
# ;; registers (%zmm27/%zmm28) to preloaded AES round keys and may pass them
2372+
# ;; in here only as dead placeholders. If a counter addend were emitted from
2373+
# ;; $ADDBE_4x4 / $ADDBE_1234 it would add AES key bytes to the CTR block,
2374+
# ;; corrupting the keystream and causing catastrophic keystream/nonce reuse
2375+
# ;; (CWE-323). Whole-ZMM adds are used unconditionally (the unused upper
2376+
# ;; lanes are harmless), mirroring the overflow branch below.
2377+
my $ctr_setup_code = " vpaddd ddq_addbe_1234(%rip),$CTR_BE,$B00_03\n";
2378+
if ($NUM_BLOCKS > 4) {
2379+
$ctr_setup_code .= " vpaddd ddq_addbe_4444(%rip),$B00_03,$B04_07\n";
2380+
}
2381+
if ($NUM_BLOCKS > 8) {
2382+
$ctr_setup_code .= " vpaddd ddq_addbe_4444(%rip),$B04_07,$B08_11\n";
2383+
}
2384+
if ($NUM_BLOCKS > 12) {
2385+
$ctr_setup_code .= " vpaddd ddq_addbe_4444(%rip),$B08_11,$B12_15\n";
2386+
}
2387+
2388+
# ;; Build-time enforcement of the invariant documented above.
2389+
if ($ctr_setup_code =~ /\Q$ADDBE_4x4\E\b/ || $ctr_setup_code =~ /\Q$ADDBE_1234\E\b/) {
2390+
die "GHASH_16_ENCRYPT_N_GHASH_N: counter setup must not reference the "
2391+
. "ADDBE register args ($ADDBE_4x4/$ADDBE_1234); use ddq_addbe_*(%rip). "
2392+
. "Callers alias these registers to preloaded AES round keys, so this "
2393+
. "would corrupt the CTR keystream (keystream/nonce reuse).\n";
2394+
}
2395+
2396+
$code .= $ctr_setup_code;
23572397
$code .= <<___;
23582398
jmp .L_16_blocks_ok_${label_suffix}
23592399
@@ -4119,10 +4159,12 @@ sub GCM_ENC_DEC {
41194159
# ; Unused in the small packet path
41204160
my $ADDBE_4x4 = "%zmm27";
41214161
my $ADDBE_1234 = "%zmm28";
4122-
my $PRELOADED_AES_KEY0 = "%zmm9";
4123-
my $PRELOADED_AES_KEY1 = "%zmm23";
4124-
my $PRELOADED_AES_KEY2 = "%zmm27";
4125-
my $PRELOADED_AES_KEY3 = "%zmm28";
4162+
# ; Empty when key preloading is disabled => the stitched macros reload the
4163+
# ; round keys from memory (see $PRELOAD_AES_ROUND_KEYS).
4164+
my $PRELOADED_AES_KEY0 = $PRELOAD_AES_ROUND_KEYS ? "%zmm9" : "";
4165+
my $PRELOADED_AES_KEY1 = $PRELOAD_AES_ROUND_KEYS ? "%zmm23" : "";
4166+
my $PRELOADED_AES_KEY2 = $PRELOAD_AES_ROUND_KEYS ? "%zmm27" : "";
4167+
my $PRELOADED_AES_KEY3 = $PRELOAD_AES_ROUND_KEYS ? "%zmm28" : "";
41264168

41274169
my $MASKREG = "%k1";
41284170

@@ -4225,10 +4267,12 @@ sub GCM_ENC_DEC {
42254267
___
42264268
42274269
# ;; preload AES round keys 0 and 1 (only msg >= 704; 4/8-block path reloads from $AES_KEYS)
4228-
$code .= <<___;
4270+
if ($PRELOAD_AES_ROUND_KEYS) {
4271+
$code .= <<___;
42294272
vbroadcastf64x2 `(16 * 0)`($AES_KEYS),$PRELOADED_AES_KEY0
42304273
vbroadcastf64x2 `(16 * 1)`($AES_KEYS),$PRELOADED_AES_KEY1
42314274
___
4275+
}
42324276
42334277
# ;; ==== AES-CTR - first 16 blocks
42344278
my $aesout_offset = ($STACK_LOCAL_OFFSET + (0 * 16));
@@ -4261,8 +4305,10 @@ sub GCM_ENC_DEC {
42614305
$code .= "mov \$1,$HKEYS_READY\n";
42624306
42634307
# ;; Preload extra AES round keys (overwrites ADDBE registers)
4308+
if ($PRELOAD_AES_ROUND_KEYS) {
42644309
$code .= " vbroadcastf64x2 `(16 * 2)`($AES_KEYS),$PRELOADED_AES_KEY2\n";
42654310
$code .= " vbroadcastf64x2 `(16 * 3)`($AES_KEYS),$PRELOADED_AES_KEY3\n";
4311+
}
42664312
42674313
$code .= <<___;
42684314
add \$`(16 * 16)`,$DATA_OFFSET
@@ -4670,8 +4716,10 @@ sub GCM_ENC_DEC {
46704716
$code .= "mov \$1,$HKEYS_READY\n";
46714717
46724718
# ;; Overwrite ADDBE registers with preloaded AES round keys 2 and 3
4673-
$code .= " vbroadcastf64x2 `(16 * 2)`($AES_KEYS),$PRELOADED_AES_KEY2\n";
4674-
$code .= " vbroadcastf64x2 `(16 * 3)`($AES_KEYS),$PRELOADED_AES_KEY3\n";
4719+
if ($PRELOAD_AES_ROUND_KEYS) {
4720+
$code .= " vbroadcastf64x2 `(16 * 2)`($AES_KEYS),$PRELOADED_AES_KEY2\n";
4721+
$code .= " vbroadcastf64x2 `(16 * 3)`($AES_KEYS),$PRELOADED_AES_KEY3\n";
4722+
}
46754723
46764724
$code .= <<___;
46774725
add \$`(32 * 16)`,$DATA_OFFSET
@@ -4917,16 +4965,40 @@ sub INITIAL_BLOCKS_16 {
49174965
my $label_suffix = $label_count++;
49184966
49194967
my $stack_offset = $BLK_OFFSET;
4968+
4969+
# ;; INVARIANT (do not break): the counter increments in the non-overflow path
4970+
# ;; MUST use the ddq_addbe_*(%rip) memory constants and MUST NOT reference the
4971+
# ;; $ADDBE_4x4 / $ADDBE_1234 register arguments (args 8/7).
4972+
# ;;
4973+
# ;; Rationale: the large-message callers in GCM_ENC_DEC deliberately alias
4974+
# ;; those two ZMM registers (%zmm27/%zmm28) to preloaded AES round keys. Even
4975+
# ;; though this macro currently runs before that aliasing, emitting a counter
4976+
# ;; addend from $ADDBE_4x4 / $ADDBE_1234 would add AES key bytes to the CTR
4977+
# ;; block if the call order ever changed, corrupting the keystream and causing
4978+
# ;; catastrophic keystream/nonce reuse (CWE-323). The register args are
4979+
# ;; therefore intentionally unused here; the build-time check below enforces it.
4980+
my $ctr_setup_code = <<___;
4981+
vpaddd ddq_addbe_1234(%rip),$CTR,$B00_03
4982+
vpaddd ddq_addbe_4444(%rip),$B00_03,$B04_07
4983+
vpaddd ddq_addbe_4444(%rip),$B04_07,$B08_11
4984+
vpaddd ddq_addbe_4444(%rip),$B08_11,$B12_15
4985+
___
4986+
if ($ctr_setup_code =~ /\Q$ADDBE_4x4\E\b/ || $ctr_setup_code =~ /\Q$ADDBE_1234\E\b/) {
4987+
die "INITIAL_BLOCKS_16: counter setup must not reference the "
4988+
. "ADDBE register args ($ADDBE_4x4/$ADDBE_1234); use ddq_addbe_*(%rip). "
4989+
. "Callers alias these registers to preloaded AES round keys, so this "
4990+
. "would corrupt the CTR keystream (keystream/nonce reuse).\n";
4991+
}
4992+
49204993
$code .= <<___;
49214994
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49224995
# ;; prepare counter blocks
49234996
49244997
cmpb \$`(256 - 16)`,@{[BYTE($CTR_CHECK)]}
49254998
jae .L_next_16_overflow_${label_suffix}
4926-
vpaddd $ADDBE_1234,$CTR,$B00_03
4927-
vpaddd $ADDBE_4x4,$B00_03,$B04_07
4928-
vpaddd $ADDBE_4x4,$B04_07,$B08_11
4929-
vpaddd $ADDBE_4x4,$B08_11,$B12_15
4999+
___
5000+
$code .= $ctr_setup_code;
5001+
$code .= <<___;
49305002
jmp .L_next_16_ok_${label_suffix}
49315003
.L_next_16_overflow_${label_suffix}:
49325004
vpshufb $SHUF_MASK,$CTR,$CTR

0 commit comments

Comments
 (0)