@@ -66,7 +66,7 @@ public static (Vector256<byte> Result, Vector256<byte> Prev0) ProcessInputN2(
66
66
}
67
67
68
68
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
69
- [ CompExactlyDependsOn ( typeof ( Avx512BW ) ) ]
69
+ [ CompExactlyDependsOn ( typeof ( Avx512Vbmi ) ) ]
70
70
public static ( Vector512 < byte > Result , Vector512 < byte > Prev0 ) ProcessInputN2 (
71
71
Vector512 < byte > input ,
72
72
Vector512 < byte > prev0 ,
@@ -183,7 +183,7 @@ public static (Vector256<byte> Result, Vector256<byte> Prev0, Vector256<byte> Pr
183
183
}
184
184
185
185
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
186
- [ CompExactlyDependsOn ( typeof ( Avx512BW ) ) ]
186
+ [ CompExactlyDependsOn ( typeof ( Avx512Vbmi ) ) ]
187
187
public static ( Vector512 < byte > Result , Vector512 < byte > Prev0 , Vector512 < byte > Prev1 ) ProcessInputN3 (
188
188
Vector512 < byte > input ,
189
189
Vector512 < byte > prev0 , Vector512 < byte > prev1 ,
@@ -414,36 +414,29 @@ private static Vector256<byte> RightShift2(Vector256<byte> left, Vector256<byte>
414
414
}
415
415
416
416
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
417
- [ CompExactlyDependsOn ( typeof ( Avx512BW ) ) ]
417
+ [ CompExactlyDependsOn ( typeof ( Avx512Vbmi ) ) ]
418
418
private static Vector512 < byte > RightShift1 ( Vector512 < byte > left , Vector512 < byte > right )
419
419
{
420
420
// Given input vectors like
421
421
// left: 0, 1, 2, 3, 4, 5, ... , 58, 59, 60, 61, 62, [63]
422
422
// right: 64, 65, 66, 67, 68, 69, ... , 122, 123, 124, 125, 126, 127
423
423
// We want to shift the last element of left (63) to be the first element of the result
424
424
// result: [63], 64, 65, 66, 67, 68, ... , 121, 122, 123, 124, 125, 126
425
- //
426
- // Avx512BW.AlignRight acts like four separate Ssse3.AlignRight calls on each 128-bit pair of the of the source operands.
427
- // Result of Avx512BW.AlignRight(right, left, 15) is
428
- // lower: [15], 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, [31], 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
429
- // upper: [47], 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, [63], 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
430
- // note how elements at indexes 0, 16, 32 and 48 are off by 48 places.
431
- // We want to read 63 instead of 15, 79 instead of 31, 95 instead of 47, and 111 instead of 63.
432
- //
433
- // Similar to Avx2 above, we create a temporary value where we shift these positions by 48 places - shift 8-byte values by 6 places (PermuteVar8x64x2).
434
- // The indices vector below could be [6, 7, 8, 9, 10, 11, 12, 13], but we only care about the last byte in each 128-bit block (positions with value 0 don't affect the result).
435
425
436
- Vector512 < byte > leftShifted = Avx512F . PermuteVar8x64x2 ( left . AsInt64 ( ) , Vector512 . Create ( 0 , 7 , 0 , 9 , 0 , 11 , 0 , 13 ) , right . AsInt64 ( ) ) . AsByte ( ) ;
437
- return Avx512BW . AlignRight ( right , leftShifted , 15 ) ;
426
+ return Avx512Vbmi . PermuteVar64x8x2 ( left , Vector512 . CreateSequence < byte > ( 63 , 1 ) , right ) ;
438
427
}
439
428
440
429
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
441
- [ CompExactlyDependsOn ( typeof ( Avx512BW ) ) ]
430
+ [ CompExactlyDependsOn ( typeof ( Avx512Vbmi ) ) ]
442
431
private static Vector512 < byte > RightShift2 ( Vector512 < byte > left , Vector512 < byte > right )
443
432
{
444
- // See comments in 'RightShift1(Vector512<byte> left, Vector512<byte> right)' above.
445
- Vector512 < byte > leftShifted = Avx512F . PermuteVar8x64x2 ( left . AsInt64 ( ) , Vector512 . Create ( 0 , 7 , 0 , 9 , 0 , 11 , 0 , 13 ) , right . AsInt64 ( ) ) . AsByte ( ) ;
446
- return Avx512BW . AlignRight ( right , leftShifted , 14 ) ;
433
+ // Given input vectors like
434
+ // left: 0, 1, 2, 3, 4, 5, ... , 58, 59, 60, 61, [62], [63]
435
+ // right: 64, 65, 66, 67, 68, 69, ... , 122, 123, 124, 125, 126, 127
436
+ // We want to shift the last two elements of left (62, 63) to be the first elements of the result
437
+ // result: [62], [63], 64, 65, 66, 67, 68, ... , 121, 122, 123, 124, 125
438
+
439
+ return Avx512Vbmi . PermuteVar64x8x2 ( left , Vector512 . CreateSequence < byte > ( 62 , 1 ) , right ) ;
447
440
}
448
441
}
449
442
}
0 commit comments