|
226 | 226 | } |
227 | 227 |
|
228 | 228 | # sbox operation for one single word |
| 229 | +# This is very slow - using sbox_1word_gpr instead! |
229 | 230 | sub sbox_1word () { |
230 | 231 | my $word = shift; |
231 | 232 |
|
|
250 | 251 | ___ |
251 | 252 | } |
252 | 253 |
|
| 254 | +sub sbox_1word_gpr () { |
| 255 | + my $word = shift; |
| 256 | + my ($ptr, $byte0, $byte1, $byte2) = ("x7", "w9", "w16", "w17"); |
| 257 | + |
| 258 | +$code.=<<___; |
| 259 | + // Get the base address of the S-Box lookup table |
| 260 | + adrp $ptr, .Lsbox |
| 261 | + add $ptr, $ptr, #:lo12:.Lsbox |
| 262 | +
|
| 263 | + // Extract each byte from the 32-bit input word, look it up, and reassemble |
| 264 | + and $byte0, $word, #0xff |
| 265 | + ubfx $byte1, $word, #8, #8 |
| 266 | + ubfx $byte2, $word, #16, #8 |
| 267 | + ldrb $byte0, [$ptr, x9] |
| 268 | + ldrb $byte1, [$ptr, x16] |
| 269 | + ldrb $byte2, [$ptr, x17] |
| 270 | + orr $byte0, $byte0, $byte1, lsl #8 |
| 271 | + lsr $byte1, $word, #24 |
| 272 | + orr $byte0, $byte0, $byte2, lsl #16 |
| 273 | + ldrb $byte1, [$ptr, x16] |
| 274 | + orr $byte0, $byte0, $byte1, lsl #24 |
| 275 | +
|
| 276 | + eor $word,$byte0,$byte0,ror #32-2 |
| 277 | + eor $word,$word,$byte0,ror #32-10 |
| 278 | + eor $word,$word,$byte0,ror #32-18 |
| 279 | + eor $word,$word,$byte0,ror #32-24 |
| 280 | +___ |
| 281 | +} |
| 282 | + |
253 | 283 | # sm4 for one block of data, in scalar registers word0/word1/word2/word3 |
254 | 284 | sub sm4_1blk () { |
255 | 285 | my $kptr = shift; |
|
260 | 290 | eor $tmpw,$word2,$word3 |
261 | 291 | eor $wtmp2,$wtmp0,$word1 |
262 | 292 | eor $tmpw,$tmpw,$wtmp2 |
263 | | - // Pre-load next round keys |
264 | | - ldp w16,w17,[$kptr],8 |
265 | 293 | ___ |
266 | | - &sbox_1word($tmpw); |
| 294 | + &sbox_1word_gpr($tmpw); |
267 | 295 | $code.=<<___; |
268 | 296 | eor $word0,$word0,$tmpw |
269 | 297 | // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) |
270 | 298 | eor $tmpw,$word2,$word3 |
271 | 299 | eor $wtmp2,$word0,$wtmp1 |
272 | 300 | eor $tmpw,$tmpw,$wtmp2 |
273 | 301 | ___ |
274 | | - &sbox_1word($tmpw); |
| 302 | + &sbox_1word_gpr($tmpw); |
275 | 303 | $code.=<<___; |
276 | | - //ldp $wtmp0,$wtmp1,[$kptr],8 |
| 304 | + ldp $wtmp0,$wtmp1,[$kptr],8 |
277 | 305 | eor $word1,$word1,$tmpw |
278 | 306 | // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) |
279 | 307 | eor $tmpw,$word0,$word1 |
280 | | - //eor $wtmp2,$wtmp0,$word3 |
281 | | - eor $wtmp2,w16,$word3 |
| 308 | + eor $wtmp2,$wtmp0,$word3 |
282 | 309 | eor $tmpw,$tmpw,$wtmp2 |
283 | 310 | ___ |
284 | | - &sbox_1word($tmpw); |
| 311 | + &sbox_1word_gpr($tmpw); |
285 | 312 | $code.=<<___; |
286 | 313 | eor $word2,$word2,$tmpw |
287 | 314 | // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) |
288 | 315 | eor $tmpw,$word0,$word1 |
289 | | - //eor $wtmp2,$word2,$wtmp1 |
290 | | - eor $wtmp2,$word2,w17 |
| 316 | + eor $wtmp2,$word2,$wtmp1 |
291 | 317 | eor $tmpw,$tmpw,$wtmp2 |
292 | 318 | ___ |
293 | | - &sbox_1word($tmpw); |
| 319 | + &sbox_1word_gpr($tmpw); |
294 | 320 | $code.=<<___; |
295 | 321 | eor $word3,$word3,$tmpw |
296 | 322 | ___ |
|
549 | 575 | .type _${prefix}_consts,%object |
550 | 576 | .align 7 |
551 | 577 | _${prefix}_consts: |
| 578 | +.Lsbox: |
| 579 | + .byte 0xD6,0x90,0xE9,0xFE,0xCC,0xE1,0x3D,0xB7,0x16,0xB6,0x14,0xC2,0x28,0xFB,0x2C,0x05 |
| 580 | + .byte 0x2B,0x67,0x9A,0x76,0x2A,0xBE,0x04,0xC3,0xAA,0x44,0x13,0x26,0x49,0x86,0x06,0x99 |
| 581 | + .byte 0x9C,0x42,0x50,0xF4,0x91,0xEF,0x98,0x7A,0x33,0x54,0x0B,0x43,0xED,0xCF,0xAC,0x62 |
| 582 | + .byte 0xE4,0xB3,0x1C,0xA9,0xC9,0x08,0xE8,0x95,0x80,0xDF,0x94,0xFA,0x75,0x8F,0x3F,0xA6 |
| 583 | + .byte 0x47,0x07,0xA7,0xFC,0xF3,0x73,0x17,0xBA,0x83,0x59,0x3C,0x19,0xE6,0x85,0x4F,0xA8 |
| 584 | + .byte 0x68,0x6B,0x81,0xB2,0x71,0x64,0xDA,0x8B,0xF8,0xEB,0x0F,0x4B,0x70,0x56,0x9D,0x35 |
| 585 | + .byte 0x1E,0x24,0x0E,0x5E,0x63,0x58,0xD1,0xA2,0x25,0x22,0x7C,0x3B,0x01,0x21,0x78,0x87 |
| 586 | + .byte 0xD4,0x00,0x46,0x57,0x9F,0xD3,0x27,0x52,0x4C,0x36,0x02,0xE7,0xA0,0xC4,0xC8,0x9E |
| 587 | + .byte 0xEA,0xBF,0x8A,0xD2,0x40,0xC7,0x38,0xB5,0xA3,0xF7,0xF2,0xCE,0xF9,0x61,0x15,0xA1 |
| 588 | + .byte 0xE0,0xAE,0x5D,0xA4,0x9B,0x34,0x1A,0x55,0xAD,0x93,0x32,0x30,0xF5,0x8C,0xB1,0xE3 |
| 589 | + .byte 0x1D,0xF6,0xE2,0x2E,0x82,0x66,0xCA,0x60,0xC0,0x29,0x23,0xAB,0x0D,0x53,0x4E,0x6F |
| 590 | + .byte 0xD5,0xDB,0x37,0x45,0xDE,0xFD,0x8E,0x2F,0x03,0xFF,0x6A,0x72,0x6D,0x6C,0x5B,0x51 |
| 591 | + .byte 0x8D,0x1B,0xAF,0x92,0xBB,0xDD,0xBC,0x7F,0x11,0xD9,0x5C,0x41,0x1F,0x10,0x5A,0xD8 |
| 592 | + .byte 0x0A,0xC1,0x31,0x88,0xA5,0xCD,0x7B,0xBD,0x2D,0x74,0xD0,0x12,0xB8,0xE5,0xB4,0xB0 |
| 593 | + .byte 0x89,0x69,0x97,0x4A,0x0C,0x96,0x77,0x7E,0x65,0xB9,0xF1,0x09,0xC5,0x6E,0xC6,0x84 |
| 594 | + .byte 0x18,0xF0,0x7D,0xEC,0x3A,0xDC,0x4D,0x20,0x79,0xEE,0x5F,0x3E,0xD7,0xCB,0x39,0x48 |
552 | 595 | .Lck: |
553 | 596 | .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 |
554 | 597 | .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 |
|
0 commit comments