@@ -445,17 +445,16 @@ sub _ghash_4x {
445
445
# Function arguments
446
446
my ( $GHASH_ACC_PTR , $HTABLE , $AAD , $AADLEN ) = @argregs [ 0 .. 3 ];
447
447
448
- # Additional local variables
449
- my ( $TMP0 , $TMP0_XMM ) = ( " %ymm0 " , " %xmm0 " );
450
- my ( $TMP1 , $TMP1_XMM ) = ( " %ymm1 " , " %xmm1 " );
451
- my ( $TMP2 , $TMP2_XMM ) = ( " %ymm2 " , " %xmm2 " );
452
- my ( $LO , $LO_XMM ) = ( " %ymm3 " , " %xmm3 " );
453
- my ( $MI , $MI_XMM ) = ( " %ymm4 " , " %xmm4 " );
454
- my ( $GHASH_ACC , $GHASH_ACC_XMM ) = ( " %ymm5 " , " %xmm5 " );
455
- my ( $BSWAP_MASK , $BSWAP_MASK_XMM ) = ( " %ymm6 " , " %xmm6 " );
456
- my ( $GFPOLY , $GFPOLY_XMM ) = ( " %ymm7 " , " %xmm7 " );
457
- my $H_POW2_XORED = " %ymm8 " ;
458
- my $H_POW1_XORED = " %ymm9 " ;
448
+ # Additional local variables.
449
+ # Unlike upstream, we avoid YMM registers in this function.
450
+ my $TMP0_XMM = " %xmm0 " ;
451
+ my $TMP1_XMM = " %xmm1 " ;
452
+ my $TMP2_XMM = " %xmm2 " ;
453
+ my $LO_XMM = " %xmm3 " ;
454
+ my $MI_XMM = " %xmm4 " ;
455
+ my $GHASH_ACC_XMM = " %xmm5 " ;
456
+ my $BSWAP_MASK_XMM = " %xmm6 " ;
457
+ my $GFPOLY_XMM = " %xmm7 " ;
459
458
460
459
$code .= <<___ ;
461
460
@{[ _save_xmmregs (6 .. 9) ]}
@@ -486,7 +485,9 @@ sub _ghash_4x {
486
485
vpshufb $BSWAP_MASK_XMM , $GHASH_ACC_XMM , $GHASH_ACC_XMM
487
486
vmovdqu $GHASH_ACC_XMM , ($GHASH_ACC_PTR )
488
487
489
- vzeroupper
488
+ # No vzeroupper is needed, unlike upstream, since we don't use YMM*. The
489
+ # epilogue may use XMM registers, so a vzeroupper might be needed here if
490
+ # we had used YMM registers.
490
491
___
491
492
}
492
493
$code .= _end_func;
0 commit comments