Skip to content

Commit 1a190dc

Browse files
Ard Biesheuvelmosimchah
Ard Biesheuvel
authored andcommitted
UPSTREAM: crypto: arm64/aes-ce-cipher - move assembler code to .S file
Most crypto drivers involving kernel mode NEON take care to put the code that actually touches the NEON register file in a separate compilation unit, to prevent the compiler from reordering code that preserves or restores the NEON context with code that may corrupt it. This is necessary because we currently have no way to express the restrictions imposed upon use of the NEON in kernel mode in a way that the compiler understands. However, in the case of aes-ce-cipher, it did not seem unreasonable to deviate from this rule, given how it does not seem possible for the compiler to reorder cross object function calls with asm blocks whose in- and output constraints reflect that it reads from and writes to memory. Now that LTO is being proposed for the arm64 kernel, it is time to revisit this. The link time optimization may replace the function calls to kernel_neon_begin() and kernel_neon_end() with instantiations of the IR that make up its implementation, allowing further reordering with the asm block. So let's clean this up, and move the asm() blocks into a separate .S file. Signed-off-by: Ard Biesheuvel <[email protected]> Reviewed-By: Nick Desaulniers <[email protected]> Signed-off-by: Herbert Xu <[email protected]> Link: https://git.kernel.org/linus/019cd46984d04703a39924178f503a98436ac0d7 Signed-off-by: Nathan Chancellor <[email protected]>
1 parent aaf08a0 commit 1a190dc

File tree

3 files changed

+100
-104
lines changed

3 files changed

+100
-104
lines changed

arch/arm64/crypto/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
1818
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
1919

2020
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
21-
CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
21+
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
2222

2323
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
2424
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o

arch/arm64/crypto/aes-ce-core.S

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Copyright (C) 2013 - 2017 Linaro Ltd <[email protected]>
3+
*
4+
* This program is free software; you can redistribute it and/or modify
5+
* it under the terms of the GNU General Public License version 2 as
6+
* published by the Free Software Foundation.
7+
*/
8+
9+
#include <linux/linkage.h>
10+
#include <asm/assembler.h>
11+
12+
.arch armv8-a+crypto
13+
14+
ENTRY(__aes_ce_encrypt)
15+
sub w3, w3, #2
16+
ld1 {v0.16b}, [x2]
17+
ld1 {v1.4s}, [x0], #16
18+
cmp w3, #10
19+
bmi 0f
20+
bne 3f
21+
mov v3.16b, v1.16b
22+
b 2f
23+
0: mov v2.16b, v1.16b
24+
ld1 {v3.4s}, [x0], #16
25+
1: aese v0.16b, v2.16b
26+
aesmc v0.16b, v0.16b
27+
2: ld1 {v1.4s}, [x0], #16
28+
aese v0.16b, v3.16b
29+
aesmc v0.16b, v0.16b
30+
3: ld1 {v2.4s}, [x0], #16
31+
subs w3, w3, #3
32+
aese v0.16b, v1.16b
33+
aesmc v0.16b, v0.16b
34+
ld1 {v3.4s}, [x0], #16
35+
bpl 1b
36+
aese v0.16b, v2.16b
37+
eor v0.16b, v0.16b, v3.16b
38+
st1 {v0.16b}, [x1]
39+
ret
40+
ENDPROC(__aes_ce_encrypt)
41+
42+
ENTRY(__aes_ce_decrypt)
43+
sub w3, w3, #2
44+
ld1 {v0.16b}, [x2]
45+
ld1 {v1.4s}, [x0], #16
46+
cmp w3, #10
47+
bmi 0f
48+
bne 3f
49+
mov v3.16b, v1.16b
50+
b 2f
51+
0: mov v2.16b, v1.16b
52+
ld1 {v3.4s}, [x0], #16
53+
1: aesd v0.16b, v2.16b
54+
aesimc v0.16b, v0.16b
55+
2: ld1 {v1.4s}, [x0], #16
56+
aesd v0.16b, v3.16b
57+
aesimc v0.16b, v0.16b
58+
3: ld1 {v2.4s}, [x0], #16
59+
subs w3, w3, #3
60+
aesd v0.16b, v1.16b
61+
aesimc v0.16b, v0.16b
62+
ld1 {v3.4s}, [x0], #16
63+
bpl 1b
64+
aesd v0.16b, v2.16b
65+
eor v0.16b, v0.16b, v3.16b
66+
st1 {v0.16b}, [x1]
67+
ret
68+
ENDPROC(__aes_ce_decrypt)
69+
70+
/*
71+
* __aes_ce_sub() - use the aese instruction to perform the AES sbox
72+
* substitution on each byte in 'input'
73+
*/
74+
ENTRY(__aes_ce_sub)
75+
dup v1.4s, w0
76+
movi v0.16b, #0
77+
aese v0.16b, v1.16b
78+
umov w0, v0.s[0]
79+
ret
80+
ENDPROC(__aes_ce_sub)
81+
82+
ENTRY(__aes_ce_invert)
83+
ld1 {v0.4s}, [x1]
84+
aesimc v1.16b, v0.16b
85+
st1 {v1.4s}, [x0]
86+
ret
87+
ENDPROC(__aes_ce_invert)

arch/arm64/crypto/aes-ce-cipher.c renamed to arch/arm64/crypto/aes-ce-glue.c

+12-103
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ struct aes_block {
2929
u8 b[AES_BLOCK_SIZE];
3030
};
3131

32+
asmlinkage void __aes_ce_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
33+
asmlinkage void __aes_ce_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
34+
35+
asmlinkage u32 __aes_ce_sub(u32 l);
36+
asmlinkage void __aes_ce_invert(struct aes_block *out,
37+
const struct aes_block *in);
38+
3239
static int num_rounds(struct crypto_aes_ctx *ctx)
3340
{
3441
/*
@@ -44,123 +51,31 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
4451
static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
4552
{
4653
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
47-
struct aes_block *out = (struct aes_block *)dst;
48-
struct aes_block const *in = (struct aes_block *)src;
49-
void *dummy0;
50-
int dummy1;
5154

5255
if (!may_use_simd()) {
5356
__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
5457
return;
5558
}
5659

5760
kernel_neon_begin();
58-
59-
__asm__(" ld1 {v0.16b}, %[in] ;"
60-
" ld1 {v1.4s}, [%[key]], #16 ;"
61-
" cmp %w[rounds], #10 ;"
62-
" bmi 0f ;"
63-
" bne 3f ;"
64-
" mov v3.16b, v1.16b ;"
65-
" b 2f ;"
66-
"0: mov v2.16b, v1.16b ;"
67-
" ld1 {v3.4s}, [%[key]], #16 ;"
68-
"1: aese v0.16b, v2.16b ;"
69-
" aesmc v0.16b, v0.16b ;"
70-
"2: ld1 {v1.4s}, [%[key]], #16 ;"
71-
" aese v0.16b, v3.16b ;"
72-
" aesmc v0.16b, v0.16b ;"
73-
"3: ld1 {v2.4s}, [%[key]], #16 ;"
74-
" subs %w[rounds], %w[rounds], #3 ;"
75-
" aese v0.16b, v1.16b ;"
76-
" aesmc v0.16b, v0.16b ;"
77-
" ld1 {v3.4s}, [%[key]], #16 ;"
78-
" bpl 1b ;"
79-
" aese v0.16b, v2.16b ;"
80-
" eor v0.16b, v0.16b, v3.16b ;"
81-
" st1 {v0.16b}, %[out] ;"
82-
83-
: [out] "=Q"(*out),
84-
[key] "=r"(dummy0),
85-
[rounds] "=r"(dummy1)
86-
: [in] "Q"(*in),
87-
"1"(ctx->key_enc),
88-
"2"(num_rounds(ctx) - 2)
89-
: "cc");
90-
61+
__aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
9162
kernel_neon_end();
9263
}
9364

9465
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
9566
{
9667
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
97-
struct aes_block *out = (struct aes_block *)dst;
98-
struct aes_block const *in = (struct aes_block *)src;
99-
void *dummy0;
100-
int dummy1;
10168

10269
if (!may_use_simd()) {
10370
__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
10471
return;
10572
}
10673

10774
kernel_neon_begin();
108-
109-
__asm__(" ld1 {v0.16b}, %[in] ;"
110-
" ld1 {v1.4s}, [%[key]], #16 ;"
111-
" cmp %w[rounds], #10 ;"
112-
" bmi 0f ;"
113-
" bne 3f ;"
114-
" mov v3.16b, v1.16b ;"
115-
" b 2f ;"
116-
"0: mov v2.16b, v1.16b ;"
117-
" ld1 {v3.4s}, [%[key]], #16 ;"
118-
"1: aesd v0.16b, v2.16b ;"
119-
" aesimc v0.16b, v0.16b ;"
120-
"2: ld1 {v1.4s}, [%[key]], #16 ;"
121-
" aesd v0.16b, v3.16b ;"
122-
" aesimc v0.16b, v0.16b ;"
123-
"3: ld1 {v2.4s}, [%[key]], #16 ;"
124-
" subs %w[rounds], %w[rounds], #3 ;"
125-
" aesd v0.16b, v1.16b ;"
126-
" aesimc v0.16b, v0.16b ;"
127-
" ld1 {v3.4s}, [%[key]], #16 ;"
128-
" bpl 1b ;"
129-
" aesd v0.16b, v2.16b ;"
130-
" eor v0.16b, v0.16b, v3.16b ;"
131-
" st1 {v0.16b}, %[out] ;"
132-
133-
: [out] "=Q"(*out),
134-
[key] "=r"(dummy0),
135-
[rounds] "=r"(dummy1)
136-
: [in] "Q"(*in),
137-
"1"(ctx->key_dec),
138-
"2"(num_rounds(ctx) - 2)
139-
: "cc");
140-
75+
__aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
14176
kernel_neon_end();
14277
}
14378

144-
/*
145-
* aes_sub() - use the aese instruction to perform the AES sbox substitution
146-
* on each byte in 'input'
147-
*/
148-
static u32 aes_sub(u32 input)
149-
{
150-
u32 ret;
151-
152-
__asm__("dup v1.4s, %w[in] ;"
153-
"movi v0.16b, #0 ;"
154-
"aese v0.16b, v1.16b ;"
155-
"umov %w[out], v0.4s[0] ;"
156-
157-
: [out] "=r"(ret)
158-
: [in] "r"(input)
159-
: "v0","v1");
160-
161-
return ret;
162-
}
163-
16479
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
16580
unsigned int key_len)
16681
{
@@ -189,7 +104,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
189104
u32 *rki = ctx->key_enc + (i * kwords);
190105
u32 *rko = rki + kwords;
191106

192-
rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
107+
rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
193108
rko[1] = rko[0] ^ rki[1];
194109
rko[2] = rko[1] ^ rki[2];
195110
rko[3] = rko[2] ^ rki[3];
@@ -202,7 +117,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
202117
} else if (key_len == AES_KEYSIZE_256) {
203118
if (i >= 6)
204119
break;
205-
rko[4] = aes_sub(rko[3]) ^ rki[4];
120+
rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
206121
rko[5] = rko[4] ^ rki[5];
207122
rko[6] = rko[5] ^ rki[6];
208123
rko[7] = rko[6] ^ rki[7];
@@ -221,13 +136,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
221136

222137
key_dec[0] = key_enc[j];
223138
for (i = 1, j--; j > 0; i++, j--)
224-
__asm__("ld1 {v0.4s}, %[in] ;"
225-
"aesimc v1.16b, v0.16b ;"
226-
"st1 {v1.4s}, %[out] ;"
227-
228-
: [out] "=Q"(key_dec[i])
229-
: [in] "Q"(key_enc[j])
230-
: "v0","v1");
139+
__aes_ce_invert(key_dec + i, key_enc + j);
231140
key_dec[i] = key_enc[0];
232141

233142
kernel_neon_end();

0 commit comments

Comments
 (0)