Skip to content

Commit dc8b7db

Browse files
Ard Biesheuvelmosimchah
Ard Biesheuvel
authored andcommitted
UPSTREAM: crypto: arm64/aes-ce-cipher - match round key endianness with generic code
In order to be able to reuse the generic AES code as a fallback for situations where the NEON may not be used, update the key handling to match the byte order of the generic code: it stores round keys as sequences of 32-bit quantities rather than streams of bytes, and so our code needs to be updated to reflect that. Signed-off-by: Ard Biesheuvel <[email protected]> Signed-off-by: Herbert Xu <[email protected]> Link: https://git.kernel.org/linus/f402e3115e20b345bd6fbfcf463a506d958c7bf6 Signed-off-by: Nathan Chancellor <[email protected]>
1 parent 767f0c0 commit dc8b7db

File tree

3 files changed

+37
-40
lines changed

3 files changed

+37
-40
lines changed

arch/arm64/crypto/aes-ce-ccm-core.S

+15-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
33
*
4-
* Copyright (C) 2013 - 2014 Linaro Ltd <[email protected]>
4+
* Copyright (C) 2013 - 2017 Linaro Ltd <[email protected]>
55
*
66
* This program is free software; you can redistribute it and/or modify
77
* it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
3232
beq 8f /* out of input? */
3333
cbnz w8, 0b
3434
eor v0.16b, v0.16b, v1.16b
35-
1: ld1 {v3.16b}, [x4] /* load first round key */
35+
1: ld1 {v3.4s}, [x4] /* load first round key */
3636
prfm pldl1strm, [x1]
3737
cmp w5, #12 /* which key size? */
3838
add x6, x4, #16
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
4242
mov v5.16b, v3.16b
4343
b 4f
4444
2: mov v4.16b, v3.16b
45-
ld1 {v5.16b}, [x6], #16 /* load 2nd round key */
45+
ld1 {v5.4s}, [x6], #16 /* load 2nd round key */
4646
3: aese v0.16b, v4.16b
4747
aesmc v0.16b, v0.16b
48-
4: ld1 {v3.16b}, [x6], #16 /* load next round key */
48+
4: ld1 {v3.4s}, [x6], #16 /* load next round key */
4949
aese v0.16b, v5.16b
5050
aesmc v0.16b, v0.16b
51-
5: ld1 {v4.16b}, [x6], #16 /* load next round key */
51+
5: ld1 {v4.4s}, [x6], #16 /* load next round key */
5252
subs w7, w7, #3
5353
aese v0.16b, v3.16b
5454
aesmc v0.16b, v0.16b
55-
ld1 {v5.16b}, [x6], #16 /* load next round key */
55+
ld1 {v5.4s}, [x6], #16 /* load next round key */
5656
bpl 3b
5757
aese v0.16b, v4.16b
5858
subs w2, w2, #16 /* last data? */
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data)
9090
* u32 rounds);
9191
*/
9292
ENTRY(ce_aes_ccm_final)
93-
ld1 {v3.16b}, [x2], #16 /* load first round key */
93+
ld1 {v3.4s}, [x2], #16 /* load first round key */
9494
ld1 {v0.16b}, [x0] /* load mac */
9595
cmp w3, #12 /* which key size? */
9696
sub w3, w3, #2 /* modified # of rounds */
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final)
100100
mov v5.16b, v3.16b
101101
b 2f
102102
0: mov v4.16b, v3.16b
103-
1: ld1 {v5.16b}, [x2], #16 /* load next round key */
103+
1: ld1 {v5.4s}, [x2], #16 /* load next round key */
104104
aese v0.16b, v4.16b
105105
aesmc v0.16b, v0.16b
106106
aese v1.16b, v4.16b
107107
aesmc v1.16b, v1.16b
108-
2: ld1 {v3.16b}, [x2], #16 /* load next round key */
108+
2: ld1 {v3.4s}, [x2], #16 /* load next round key */
109109
aese v0.16b, v5.16b
110110
aesmc v0.16b, v0.16b
111111
aese v1.16b, v5.16b
112112
aesmc v1.16b, v1.16b
113-
3: ld1 {v4.16b}, [x2], #16 /* load next round key */
113+
3: ld1 {v4.4s}, [x2], #16 /* load next round key */
114114
subs w3, w3, #3
115115
aese v0.16b, v3.16b
116116
aesmc v0.16b, v0.16b
@@ -137,31 +137,31 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
137137
cmp w4, #12 /* which key size? */
138138
sub w7, w4, #2 /* get modified # of rounds */
139139
ins v1.d[1], x9 /* no carry in lower ctr */
140-
ld1 {v3.16b}, [x3] /* load first round key */
140+
ld1 {v3.4s}, [x3] /* load first round key */
141141
add x10, x3, #16
142142
bmi 1f
143143
bne 4f
144144
mov v5.16b, v3.16b
145145
b 3f
146146
1: mov v4.16b, v3.16b
147-
ld1 {v5.16b}, [x10], #16 /* load 2nd round key */
147+
ld1 {v5.4s}, [x10], #16 /* load 2nd round key */
148148
2: /* inner loop: 3 rounds, 2x interleaved */
149149
aese v0.16b, v4.16b
150150
aesmc v0.16b, v0.16b
151151
aese v1.16b, v4.16b
152152
aesmc v1.16b, v1.16b
153-
3: ld1 {v3.16b}, [x10], #16 /* load next round key */
153+
3: ld1 {v3.4s}, [x10], #16 /* load next round key */
154154
aese v0.16b, v5.16b
155155
aesmc v0.16b, v0.16b
156156
aese v1.16b, v5.16b
157157
aesmc v1.16b, v1.16b
158-
4: ld1 {v4.16b}, [x10], #16 /* load next round key */
158+
4: ld1 {v4.4s}, [x10], #16 /* load next round key */
159159
subs w7, w7, #3
160160
aese v0.16b, v3.16b
161161
aesmc v0.16b, v0.16b
162162
aese v1.16b, v3.16b
163163
aesmc v1.16b, v1.16b
164-
ld1 {v5.16b}, [x10], #16 /* load next round key */
164+
ld1 {v5.4s}, [x10], #16 /* load next round key */
165165
bpl 2b
166166
aese v0.16b, v4.16b
167167
aese v1.16b, v4.16b

arch/arm64/crypto/aes-ce-cipher.c

+16-19
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
/*
22
* aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
33
*
4-
* Copyright (C) 2013 - 2014 Linaro Ltd <[email protected]>
4+
* Copyright (C) 2013 - 2017 Linaro Ltd <[email protected]>
55
*
66
* This program is free software; you can redistribute it and/or modify
77
* it under the terms of the GNU General Public License version 2 as
88
* published by the Free Software Foundation.
99
*/
1010

1111
#include <asm/neon.h>
12+
#include <asm/unaligned.h>
1213
#include <crypto/aes.h>
1314
#include <linux/cpufeature.h>
1415
#include <linux/crypto.h>
@@ -47,24 +48,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
4748
kernel_neon_begin_partial(4);
4849

4950
__asm__(" ld1 {v0.16b}, %[in] ;"
50-
" ld1 {v1.16b}, [%[key]], #16 ;"
51+
" ld1 {v1.4s}, [%[key]], #16 ;"
5152
" cmp %w[rounds], #10 ;"
5253
" bmi 0f ;"
5354
" bne 3f ;"
5455
" mov v3.16b, v1.16b ;"
5556
" b 2f ;"
5657
"0: mov v2.16b, v1.16b ;"
57-
" ld1 {v3.16b}, [%[key]], #16 ;"
58+
" ld1 {v3.4s}, [%[key]], #16 ;"
5859
"1: aese v0.16b, v2.16b ;"
5960
" aesmc v0.16b, v0.16b ;"
60-
"2: ld1 {v1.16b}, [%[key]], #16 ;"
61+
"2: ld1 {v1.4s}, [%[key]], #16 ;"
6162
" aese v0.16b, v3.16b ;"
6263
" aesmc v0.16b, v0.16b ;"
63-
"3: ld1 {v2.16b}, [%[key]], #16 ;"
64+
"3: ld1 {v2.4s}, [%[key]], #16 ;"
6465
" subs %w[rounds], %w[rounds], #3 ;"
6566
" aese v0.16b, v1.16b ;"
6667
" aesmc v0.16b, v0.16b ;"
67-
" ld1 {v3.16b}, [%[key]], #16 ;"
68+
" ld1 {v3.4s}, [%[key]], #16 ;"
6869
" bpl 1b ;"
6970
" aese v0.16b, v2.16b ;"
7071
" eor v0.16b, v0.16b, v3.16b ;"
@@ -92,24 +93,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
9293
kernel_neon_begin_partial(4);
9394

9495
__asm__(" ld1 {v0.16b}, %[in] ;"
95-
" ld1 {v1.16b}, [%[key]], #16 ;"
96+
" ld1 {v1.4s}, [%[key]], #16 ;"
9697
" cmp %w[rounds], #10 ;"
9798
" bmi 0f ;"
9899
" bne 3f ;"
99100
" mov v3.16b, v1.16b ;"
100101
" b 2f ;"
101102
"0: mov v2.16b, v1.16b ;"
102-
" ld1 {v3.16b}, [%[key]], #16 ;"
103+
" ld1 {v3.4s}, [%[key]], #16 ;"
103104
"1: aesd v0.16b, v2.16b ;"
104105
" aesimc v0.16b, v0.16b ;"
105-
"2: ld1 {v1.16b}, [%[key]], #16 ;"
106+
"2: ld1 {v1.4s}, [%[key]], #16 ;"
106107
" aesd v0.16b, v3.16b ;"
107108
" aesimc v0.16b, v0.16b ;"
108-
"3: ld1 {v2.16b}, [%[key]], #16 ;"
109+
"3: ld1 {v2.4s}, [%[key]], #16 ;"
109110
" subs %w[rounds], %w[rounds], #3 ;"
110111
" aesd v0.16b, v1.16b ;"
111112
" aesimc v0.16b, v0.16b ;"
112-
" ld1 {v3.16b}, [%[key]], #16 ;"
113+
" ld1 {v3.4s}, [%[key]], #16 ;"
113114
" bpl 1b ;"
114115
" aesd v0.16b, v2.16b ;"
115116
" eor v0.16b, v0.16b, v3.16b ;"
@@ -165,20 +166,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
165166
key_len != AES_KEYSIZE_256)
166167
return -EINVAL;
167168

168-
memcpy(ctx->key_enc, in_key, key_len);
169169
ctx->key_length = key_len;
170+
for (i = 0; i < kwords; i++)
171+
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
170172

171173
kernel_neon_begin_partial(2);
172174
for (i = 0; i < sizeof(rcon); i++) {
173175
u32 *rki = ctx->key_enc + (i * kwords);
174176
u32 *rko = rki + kwords;
175177

176-
#ifndef CONFIG_CPU_BIG_ENDIAN
177178
rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
178-
#else
179-
rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
180-
rki[0];
181-
#endif
182179
rko[1] = rko[0] ^ rki[1];
183180
rko[2] = rko[1] ^ rki[2];
184181
rko[3] = rko[2] ^ rki[3];
@@ -210,9 +207,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
210207

211208
key_dec[0] = key_enc[j];
212209
for (i = 1, j--; j > 0; i++, j--)
213-
__asm__("ld1 {v0.16b}, %[in] ;"
210+
__asm__("ld1 {v0.4s}, %[in] ;"
214211
"aesimc v1.16b, v0.16b ;"
215-
"st1 {v1.16b}, %[out] ;"
212+
"st1 {v1.4s}, %[out] ;"
216213

217214
: [out] "=Q"(key_dec[i])
218215
: [in] "Q"(key_enc[j])

arch/arm64/crypto/aes-ce.S

+6-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
33
* Crypto Extensions
44
*
5-
* Copyright (C) 2013 Linaro Ltd <[email protected]>
5+
* Copyright (C) 2013 - 2017 Linaro Ltd <[email protected]>
66
*
77
* This program is free software; you can redistribute it and/or modify
88
* it under the terms of the GNU General Public License version 2 as
@@ -21,11 +21,11 @@
2121
cmp \rounds, #12
2222
blo 2222f /* 128 bits */
2323
beq 1111f /* 192 bits */
24-
ld1 {v17.16b-v18.16b}, [\rk], #32
25-
1111: ld1 {v19.16b-v20.16b}, [\rk], #32
26-
2222: ld1 {v21.16b-v24.16b}, [\rk], #64
27-
ld1 {v25.16b-v28.16b}, [\rk], #64
28-
ld1 {v29.16b-v31.16b}, [\rk]
24+
ld1 {v17.4s-v18.4s}, [\rk], #32
25+
1111: ld1 {v19.4s-v20.4s}, [\rk], #32
26+
2222: ld1 {v21.4s-v24.4s}, [\rk], #64
27+
ld1 {v25.4s-v28.4s}, [\rk], #64
28+
ld1 {v29.4s-v31.4s}, [\rk]
2929
.endm
3030

3131
/* prepare for encryption with key in rk[] */

0 commit comments

Comments
 (0)