Skip to content

Commit a1e1eaa

Browse files
authored
Improve TeddyHelper.RightShift helpers for AVX512 (#107819)
1 parent 6422286 commit a1e1eaa

File tree

2 files changed

+16
-23
lines changed

2 files changed

+16
-23
lines changed

src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ protected int IndexOfAnyN2(ReadOnlySpan<char> span)
154154
{
155155
// The behavior of the rest of the function remains the same if Avx2 or Avx512BW aren't supported
156156
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
157-
if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && span.Length >= CharsPerIterationAvx512 + MatchStartOffsetN2)
157+
if (Vector512.IsHardwareAccelerated && Avx512Vbmi.IsSupported && span.Length >= CharsPerIterationAvx512 + MatchStartOffsetN2)
158158
{
159159
return IndexOfAnyN2Avx512(span);
160160
}
@@ -174,7 +174,7 @@ protected int IndexOfAnyN3(ReadOnlySpan<char> span)
174174
{
175175
// The behavior of the rest of the function remains the same if Avx2 or Avx512BW aren't supported
176176
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
177-
if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && span.Length >= CharsPerIterationAvx512 + MatchStartOffsetN3)
177+
if (Vector512.IsHardwareAccelerated && Avx512Vbmi.IsSupported && span.Length >= CharsPerIterationAvx512 + MatchStartOffsetN3)
178178
{
179179
return IndexOfAnyN3Avx512(span);
180180
}
@@ -296,7 +296,7 @@ private int IndexOfAnyN2Avx2(ReadOnlySpan<char> span)
296296
goto ContinueLoop;
297297
}
298298

299-
[CompExactlyDependsOn(typeof(Avx512BW))]
299+
[CompExactlyDependsOn(typeof(Avx512Vbmi))]
300300
private int IndexOfAnyN2Avx512(ReadOnlySpan<char> span)
301301
{
302302
// See comments in 'IndexOfAnyN3Vector128' below.
@@ -476,7 +476,7 @@ private int IndexOfAnyN3Avx2(ReadOnlySpan<char> span)
476476
goto ContinueLoop;
477477
}
478478

479-
[CompExactlyDependsOn(typeof(Avx512BW))]
479+
[CompExactlyDependsOn(typeof(Avx512Vbmi))]
480480
private int IndexOfAnyN3Avx512(ReadOnlySpan<char> span)
481481
{
482482
// See comments in 'IndexOfAnyN3Vector128' above.

src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs

+12-19
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public static (Vector256<byte> Result, Vector256<byte> Prev0) ProcessInputN2(
6666
}
6767

6868
[MethodImpl(MethodImplOptions.AggressiveInlining)]
69-
[CompExactlyDependsOn(typeof(Avx512BW))]
69+
[CompExactlyDependsOn(typeof(Avx512Vbmi))]
7070
public static (Vector512<byte> Result, Vector512<byte> Prev0) ProcessInputN2(
7171
Vector512<byte> input,
7272
Vector512<byte> prev0,
@@ -183,7 +183,7 @@ public static (Vector256<byte> Result, Vector256<byte> Prev0, Vector256<byte> Pr
183183
}
184184

185185
[MethodImpl(MethodImplOptions.AggressiveInlining)]
186-
[CompExactlyDependsOn(typeof(Avx512BW))]
186+
[CompExactlyDependsOn(typeof(Avx512Vbmi))]
187187
public static (Vector512<byte> Result, Vector512<byte> Prev0, Vector512<byte> Prev1) ProcessInputN3(
188188
Vector512<byte> input,
189189
Vector512<byte> prev0, Vector512<byte> prev1,
@@ -414,36 +414,29 @@ private static Vector256<byte> RightShift2(Vector256<byte> left, Vector256<byte>
414414
}
415415

416416
[MethodImpl(MethodImplOptions.AggressiveInlining)]
417-
[CompExactlyDependsOn(typeof(Avx512BW))]
417+
[CompExactlyDependsOn(typeof(Avx512Vbmi))]
418418
private static Vector512<byte> RightShift1(Vector512<byte> left, Vector512<byte> right)
419419
{
420420
// Given input vectors like
421421
// left: 0, 1, 2, 3, 4, 5, ... , 58, 59, 60, 61, 62, [63]
422422
// right: 64, 65, 66, 67, 68, 69, ... , 122, 123, 124, 125, 126, 127
423423
// We want to shift the last element of left (63) to be the first element of the result
424424
// result: [63], 64, 65, 66, 67, 68, ... , 121, 122, 123, 124, 125, 126
425-
//
426-
// Avx512BW.AlignRight acts like four separate Ssse3.AlignRight calls on each 128-bit pair of the of the source operands.
427-
// Result of Avx512BW.AlignRight(right, left, 15) is
428-
// lower: [15], 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, [31], 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
429-
// upper: [47], 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, [63], 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
430-
// note how elements at indexes 0, 16, 32 and 48 are off by 48 places.
431-
// We want to read 63 instead of 15, 79 instead of 31, 95 instead of 47, and 111 instead of 63.
432-
//
433-
// Similar to Avx2 above, we create a temporary value where we shift these positions by 48 places - shift 8-byte values by 6 places (PermuteVar8x64x2).
434-
// The indices vector below could be [6, 7, 8, 9, 10, 11, 12, 13], but we only care about the last byte in each 128-bit block (positions with value 0 don't affect the result).
435425

436-
Vector512<byte> leftShifted = Avx512F.PermuteVar8x64x2(left.AsInt64(), Vector512.Create(0, 7, 0, 9, 0, 11, 0, 13), right.AsInt64()).AsByte();
437-
return Avx512BW.AlignRight(right, leftShifted, 15);
426+
return Avx512Vbmi.PermuteVar64x8x2(left, Vector512.CreateSequence<byte>(63, 1), right);
438427
}
439428

440429
[MethodImpl(MethodImplOptions.AggressiveInlining)]
441-
[CompExactlyDependsOn(typeof(Avx512BW))]
430+
[CompExactlyDependsOn(typeof(Avx512Vbmi))]
442431
private static Vector512<byte> RightShift2(Vector512<byte> left, Vector512<byte> right)
443432
{
444-
// See comments in 'RightShift1(Vector512<byte> left, Vector512<byte> right)' above.
445-
Vector512<byte> leftShifted = Avx512F.PermuteVar8x64x2(left.AsInt64(), Vector512.Create(0, 7, 0, 9, 0, 11, 0, 13), right.AsInt64()).AsByte();
446-
return Avx512BW.AlignRight(right, leftShifted, 14);
433+
// Given input vectors like
434+
// left: 0, 1, 2, 3, 4, 5, ... , 58, 59, 60, 61, [62], [63]
435+
// right: 64, 65, 66, 67, 68, 69, ... , 122, 123, 124, 125, 126, 127
436+
// We want to shift the last two elements of left (62, 63) to be the first elements of the result
437+
// result: [62], [63], 64, 65, 66, 67, 68, ... , 121, 122, 123, 124, 125
438+
439+
return Avx512Vbmi.PermuteVar64x8x2(left, Vector512.CreateSequence<byte>(62, 1), right);
447440
}
448441
}
449442
}

0 commit comments

Comments
 (0)