Skip to content

Commit 7e080da

Browse files
committed
Added sbox_1word_gpr() to vpsm4_ex-armv8.pl. Enabled checking for Neon+AES in test.
1 parent f9371dd commit 7e080da

3 files changed

Lines changed: 67 additions & 28 deletions

File tree

crypto/sm4/asm/vpsm4-armv8.pl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,6 @@ ()
288288
eor $tmpw,$word2,$word3
289289
eor $wtmp2,$wtmp0,$word1
290290
eor $tmpw,$tmpw,$wtmp2
291-
// Pre-load next round keys
292-
//ldp w16,w17,[$kptr],8
293291
___
294292
&sbox_1word_gpr($tmpw);
295293
$code.=<<___;
@@ -306,7 +304,6 @@ ()
306304
// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)
307305
eor $tmpw,$word0,$word1
308306
eor $wtmp2,$wtmp0,$word3
309-
//eor $wtmp2,w16,$word3
310307
eor $tmpw,$tmpw,$wtmp2
311308
___
312309
&sbox_1word_gpr($tmpw);
@@ -315,7 +312,6 @@ ()
315312
// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)
316313
eor $tmpw,$word0,$word1
317314
eor $wtmp2,$word2,$wtmp1
318-
//eor $wtmp2,$word2,w17
319315
eor $tmpw,$tmpw,$wtmp2
320316
___
321317
&sbox_1word_gpr($tmpw);

crypto/sm4/asm/vpsm4_ex-armv8.pl

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ ()
226226
}
227227

228228
# sbox operation for one single word
229+
# This is very slow - using sbox_1word_gpr instead!
229230
sub sbox_1word () {
230231
my $word = shift;
231232

@@ -250,6 +251,35 @@ ()
250251
___
251252
}
252253

254+
sub sbox_1word_gpr () {
255+
my $word = shift;
256+
my ($ptr, $byte0, $byte1, $byte2) = ("x7", "w9", "w16", "w17");
257+
258+
$code.=<<___;
259+
// Get the base address of the S-Box lookup table
260+
adrp $ptr, .Lsbox
261+
add $ptr, $ptr, #:lo12:.Lsbox
262+
263+
// Extract each byte from the 32-bit input word, look it up, and reassemble
264+
and $byte0, $word, #0xff
265+
ubfx $byte1, $word, #8, #8
266+
ubfx $byte2, $word, #16, #8
267+
ldrb $byte0, [$ptr, x9]
268+
ldrb $byte1, [$ptr, x16]
269+
ldrb $byte2, [$ptr, x17]
270+
orr $byte0, $byte0, $byte1, lsl #8
271+
lsr $byte1, $word, #24
272+
orr $byte0, $byte0, $byte2, lsl #16
273+
ldrb $byte1, [$ptr, x16]
274+
orr $byte0, $byte0, $byte1, lsl #24
275+
276+
eor $word,$byte0,$byte0,ror #32-2
277+
eor $word,$word,$byte0,ror #32-10
278+
eor $word,$word,$byte0,ror #32-18
279+
eor $word,$word,$byte0,ror #32-24
280+
___
281+
}
282+
253283
# sm4 for one block of data, in scalar registers word0/word1/word2/word3
254284
sub sm4_1blk () {
255285
my $kptr = shift;
@@ -260,37 +290,33 @@ ()
260290
eor $tmpw,$word2,$word3
261291
eor $wtmp2,$wtmp0,$word1
262292
eor $tmpw,$tmpw,$wtmp2
263-
// Pre-load next round keys
264-
ldp w16,w17,[$kptr],8
265293
___
266-
&sbox_1word($tmpw);
294+
&sbox_1word_gpr($tmpw);
267295
$code.=<<___;
268296
eor $word0,$word0,$tmpw
269297
// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)
270298
eor $tmpw,$word2,$word3
271299
eor $wtmp2,$word0,$wtmp1
272300
eor $tmpw,$tmpw,$wtmp2
273301
___
274-
&sbox_1word($tmpw);
302+
&sbox_1word_gpr($tmpw);
275303
$code.=<<___;
276-
//ldp $wtmp0,$wtmp1,[$kptr],8
304+
ldp $wtmp0,$wtmp1,[$kptr],8
277305
eor $word1,$word1,$tmpw
278306
// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)
279307
eor $tmpw,$word0,$word1
280-
//eor $wtmp2,$wtmp0,$word3
281-
eor $wtmp2,w16,$word3
308+
eor $wtmp2,$wtmp0,$word3
282309
eor $tmpw,$tmpw,$wtmp2
283310
___
284-
&sbox_1word($tmpw);
311+
&sbox_1word_gpr($tmpw);
285312
$code.=<<___;
286313
eor $word2,$word2,$tmpw
287314
// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)
288315
eor $tmpw,$word0,$word1
289-
//eor $wtmp2,$word2,$wtmp1
290-
eor $wtmp2,$word2,w17
316+
eor $wtmp2,$word2,$wtmp1
291317
eor $tmpw,$tmpw,$wtmp2
292318
___
293-
&sbox_1word($tmpw);
319+
&sbox_1word_gpr($tmpw);
294320
$code.=<<___;
295321
eor $word3,$word3,$tmpw
296322
___
@@ -549,6 +575,23 @@ ()
549575
.type _${prefix}_consts,%object
550576
.align 7
551577
_${prefix}_consts:
578+
.Lsbox:
579+
.byte 0xD6,0x90,0xE9,0xFE,0xCC,0xE1,0x3D,0xB7,0x16,0xB6,0x14,0xC2,0x28,0xFB,0x2C,0x05
580+
.byte 0x2B,0x67,0x9A,0x76,0x2A,0xBE,0x04,0xC3,0xAA,0x44,0x13,0x26,0x49,0x86,0x06,0x99
581+
.byte 0x9C,0x42,0x50,0xF4,0x91,0xEF,0x98,0x7A,0x33,0x54,0x0B,0x43,0xED,0xCF,0xAC,0x62
582+
.byte 0xE4,0xB3,0x1C,0xA9,0xC9,0x08,0xE8,0x95,0x80,0xDF,0x94,0xFA,0x75,0x8F,0x3F,0xA6
583+
.byte 0x47,0x07,0xA7,0xFC,0xF3,0x73,0x17,0xBA,0x83,0x59,0x3C,0x19,0xE6,0x85,0x4F,0xA8
584+
.byte 0x68,0x6B,0x81,0xB2,0x71,0x64,0xDA,0x8B,0xF8,0xEB,0x0F,0x4B,0x70,0x56,0x9D,0x35
585+
.byte 0x1E,0x24,0x0E,0x5E,0x63,0x58,0xD1,0xA2,0x25,0x22,0x7C,0x3B,0x01,0x21,0x78,0x87
586+
.byte 0xD4,0x00,0x46,0x57,0x9F,0xD3,0x27,0x52,0x4C,0x36,0x02,0xE7,0xA0,0xC4,0xC8,0x9E
587+
.byte 0xEA,0xBF,0x8A,0xD2,0x40,0xC7,0x38,0xB5,0xA3,0xF7,0xF2,0xCE,0xF9,0x61,0x15,0xA1
588+
.byte 0xE0,0xAE,0x5D,0xA4,0x9B,0x34,0x1A,0x55,0xAD,0x93,0x32,0x30,0xF5,0x8C,0xB1,0xE3
589+
.byte 0x1D,0xF6,0xE2,0x2E,0x82,0x66,0xCA,0x60,0xC0,0x29,0x23,0xAB,0x0D,0x53,0x4E,0x6F
590+
.byte 0xD5,0xDB,0x37,0x45,0xDE,0xFD,0x8E,0x2F,0x03,0xFF,0x6A,0x72,0x6D,0x6C,0x5B,0x51
591+
.byte 0x8D,0x1B,0xAF,0x92,0xBB,0xDD,0xBC,0x7F,0x11,0xD9,0x5C,0x41,0x1F,0x10,0x5A,0xD8
592+
.byte 0x0A,0xC1,0x31,0x88,0xA5,0xCD,0x7B,0xBD,0x2D,0x74,0xD0,0x12,0xB8,0xE5,0xB4,0xB0
593+
.byte 0x89,0x69,0x97,0x4A,0x0C,0x96,0x77,0x7E,0x65,0xB9,0xF1,0x09,0xC5,0x6E,0xC6,0x84
594+
.byte 0x18,0xF0,0x7D,0xEC,0x3A,0xDC,0x4D,0x20,0x79,0xEE,0x5F,0x3E,0xD7,0xCB,0x39,0x48
552595
.Lck:
553596
.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269
554597
.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9

test/sm4_internal_test.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,12 @@ static int test_sm4_cbc(void)
169169
/* --- Test Encryption --- */
170170
ossl_sm4_set_key(key_bytes, &key);
171171
memcpy(iv, iv_bytes, SM4_BLOCK_SIZE); /* Use a working copy of the IV */
172-
//#if defined(VPSM4_EX_CAPABLE)
173-
// if (vpsm4_ex_capable()) {
174-
// vpsm4_ex_cbc_encrypt(plaintext, ciphertext, sizeof(plaintext), &key, iv,
175-
// SM4_ENCRYPT);
176-
// } else
177-
//#endif
172+
#if defined(VPSM4_EX_CAPABLE)
173+
if (vpsm4_ex_capable()) {
174+
vpsm4_ex_cbc_encrypt(plaintext, ciphertext, sizeof(plaintext), &key, iv,
175+
SM4_ENCRYPT);
176+
} else
177+
#endif
178178
#if defined(VPSM4_CAPABLE)
179179
if (vpsm4_capable()) {
180180
vpsm4_cbc_encrypt(plaintext, ciphertext, sizeof(plaintext), &key, iv,
@@ -192,13 +192,13 @@ static int test_sm4_cbc(void)
192192

193193
/* --- Test Decryption --- */
194194
memcpy(iv, iv_bytes, SM4_BLOCK_SIZE); /* Reset IV for decryption */
195-
//#if defined(VPSM4_EX_CAPABLE)
196-
// if (vpsm4_ex_capable()) {
197-
// vpsm4_ex_set_decrypt_key(key_bytes, &key);
198-
// vpsm4_ex_cbc_encrypt(ciphertext, decrypted, sizeof(ciphertext), &key, iv,
199-
// SM4_DECRYPT);
200-
// } else
201-
//#endif
195+
#if defined(VPSM4_EX_CAPABLE)
196+
if (vpsm4_ex_capable()) {
197+
vpsm4_ex_set_decrypt_key(key_bytes, &key);
198+
vpsm4_ex_cbc_encrypt(ciphertext, decrypted, sizeof(ciphertext), &key, iv,
199+
SM4_DECRYPT);
200+
} else
201+
#endif
202202
#if defined(VPSM4_CAPABLE)
203203
if (vpsm4_capable()) {
204204
vpsm4_set_decrypt_key(key_bytes, &key);

0 commit comments

Comments
 (0)