Skip to content

Commit b724ea1

Browse files
Shark64pablodelara
authored andcommitted
Use SIMD instructions to update pointers.
Signed-off-by: Nicola Torracca <[email protected]>
1 parent d3a248d commit b724ea1

6 files changed

+32
-70
lines changed

sha1_mb/sha1_mb_x16_avx512.asm

+5-10
Original file line numberDiff line numberDiff line change
@@ -490,16 +490,11 @@ lastLoop:
490490
vpaddd E,E,EE
491491

492492
;; update into data pointers
493-
%assign I 0
494-
%rep 8
495-
mov inp0, [IN + (2*I)*8]
496-
mov inp1, [IN + (2*I +1)*8]
497-
add inp0, IDX
498-
add inp1, IDX
499-
mov [IN + (2*I)*8], inp0
500-
mov [IN + (2*I+1)*8], inp1
501-
%assign I (I+1)
502-
%endrep
493+
vpbroadcastq TMP1, IDX
494+
vpaddq TMP0, TMP1, [IN]
495+
vpaddq TMP1, TMP1, [IN+64]
496+
vmovdqu64 [IN], TMP0
497+
vmovdqu64 [IN+64], TMP1
503498

504499
; Write out digest
505500
; Do we need to untranspose digests???

sha1_mb/sha1_mb_x8_avx2.asm

+7-16
Original file line numberDiff line numberDiff line change
@@ -475,22 +475,13 @@ lloop:
475475
vmovdqu [arg1 + 4*32], E
476476

477477
;; update input pointers
478-
add inp0, IDX
479-
add inp1, IDX
480-
add inp2, IDX
481-
add inp3, IDX
482-
add inp4, IDX
483-
add inp5, IDX
484-
add inp6, IDX
485-
add inp7, IDX
486-
mov [arg1+_data_ptr+0*8], inp0
487-
mov [arg1+_data_ptr+1*8], inp1
488-
mov [arg1+_data_ptr+2*8], inp2
489-
mov [arg1+_data_ptr+3*8], inp3
490-
mov [arg1+_data_ptr+4*8], inp4
491-
mov [arg1+_data_ptr+5*8], inp5
492-
mov [arg1+_data_ptr+6*8], inp6
493-
mov [arg1+_data_ptr+7*8], inp7
478+
vmovq xmm1, IDX
479+
vpbroadcastq ymm1, xmm1
480+
lea IDX, [arg1+_data_ptr]
481+
vpaddq ymm0, ymm1, [IDX]
482+
vpaddq ymm1, ymm1, [IDX+32]
483+
vmovdqu [IDX], ymm0
484+
vmovdqu [IDX+32], ymm1
494485

495486
;;;;;;;;;;;;;;;;
496487
;; Postamble

sha256_mb/sha256_mb_x16_avx512.asm

+5-10
Original file line numberDiff line numberDiff line change
@@ -607,16 +607,11 @@ lastLoop:
607607
vpaddd H, H, [rsp + _DIGEST_SAVE + 64*7]
608608

609609
;; update into data pointers
610-
%assign I 0
611-
%rep 8
612-
mov inp0, [IN + (2*I)*8]
613-
mov inp1, [IN + (2*I +1)*8]
614-
add inp0, IDX
615-
add inp1, IDX
616-
mov [IN + (2*I)*8], inp0
617-
mov [IN + (2*I+1)*8], inp1
618-
%assign I (I+1)
619-
%endrep
610+
vpbroadcastq TMP1, IDX
611+
vpaddq TMP0, TMP1, [IN]
612+
vpaddq TMP1, TMP1, [IN+64]
613+
vmovdqu64 [IN], TMP0
614+
vmovdqu64 [IN+64], TMP1
620615

621616
; Write out digest
622617
; Do we need to untranspose digests???

sha256_mb/sha256_mb_x8_avx2.asm

+7-16
Original file line numberDiff line numberDiff line change
@@ -463,22 +463,13 @@ Lrounds_16_xx:
463463
vmovdqu [STATE + 7*SHA256_DIGEST_ROW_SIZE],h
464464

465465
; update input pointers
466-
add inp0, IDX
467-
mov [STATE + _args_data_ptr + 0*8], inp0
468-
add inp1, IDX
469-
mov [STATE + _args_data_ptr + 1*8], inp1
470-
add inp2, IDX
471-
mov [STATE + _args_data_ptr + 2*8], inp2
472-
add inp3, IDX
473-
mov [STATE + _args_data_ptr + 3*8], inp3
474-
add inp4, IDX
475-
mov [STATE + _args_data_ptr + 4*8], inp4
476-
add inp5, IDX
477-
mov [STATE + _args_data_ptr + 5*8], inp5
478-
add inp6, IDX
479-
mov [STATE + _args_data_ptr + 6*8], inp6
480-
add inp7, IDX
481-
mov [STATE + _args_data_ptr + 7*8], inp7
466+
vmovq XWORD(TMP0), IDX
467+
vpbroadcastq TMP1, XWORD(TMP0)
468+
lea IDX, [STATE + _args_data_ptr]
469+
vpaddq TMP0, TMP1, [IDX]
470+
vpaddq TMP1, TMP1, [IDX + 32]
471+
vmovdqu [IDX], TMP0
472+
vmovdqu [IDX+32], TMP1
482473

483474
;;;;;;;;;;;;;;;;
484475
;; Postamble

sha512_mb/sha512_mb_x4_avx2.asm

+5-8
Original file line numberDiff line numberDiff line change
@@ -379,14 +379,11 @@ Lrounds_16_xx:
379379
vmovdqu [STATE+ 7*SHA512_DIGEST_ROW_SIZE ],h
380380

381381
;; update input data pointers
382-
add inp0, IDX
383-
mov [STATE + _data_ptr_sha512 + 0*PTR_SZ], inp0
384-
add inp1, IDX
385-
mov [STATE + _data_ptr_sha512 + 1*PTR_SZ], inp1
386-
add inp2, IDX
387-
mov [STATE + _data_ptr_sha512 + 2*PTR_SZ], inp2
388-
add inp3, IDX
389-
mov [STATE + _data_ptr_sha512 + 3*PTR_SZ], inp3
382+
vmovq xmm0, IDX
383+
lea IDX, [STATE + _data_ptr_sha512]
384+
vpbroadcastq ymm0, xmm0
385+
vpaddq ymm0, ymm0, [IDX]
386+
vmovdqu [IDX], ymm0
390387

391388
;;;;;;;;;;;;;;;;
392389
;; Postamble

sha512_mb/sha512_mb_x8_avx512.asm

+3-10
Original file line numberDiff line numberDiff line change
@@ -494,16 +494,9 @@ lastLoop:
494494
vpaddq H, H, [rsp + _DIGEST_SAVE + 64*7]
495495

496496
;; update into data pointers
497-
%assign I 0
498-
%rep 4
499-
mov inp0, [IN + (2*I)*8]
500-
mov inp1, [IN + (2*I +1)*8]
501-
add inp0, IDX
502-
add inp1, IDX
503-
mov [IN + (2*I)*8], inp0
504-
mov [IN + (2*I+1)*8], inp1
505-
%assign I (I+1)
506-
%endrep
497+
vpbroadcastq TMP0, IDX
498+
vpaddq TMP0, TMP0, [IN]
499+
vmovdqu64 [IN], TMP0
507500

508501
VMOVDQ32 [DIGEST + 0*8*8], A
509502
VMOVDQ32 [DIGEST + 1*8*8], B

0 commit comments

Comments
 (0)