Skip to content

Commit e9303d3

Browse files
authored
Speed up single-value SearchValues<string> candidate verification (#108365)
* Speed up single-value SearchValues<string> candidate verification * Improve comment, add extra assert * Add path for Vector128 * Add test stress for smaller vector sizes * Print type of SearchValues<string> on fuzzer failure * Expand SimpleIndexOfAnyValues test a bit more
1 parent d5925b6 commit e9303d3

File tree

9 files changed

+290
-105
lines changed

9 files changed

+290
-105
lines changed

src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ internal static unsafe void InternalCopy(string src, IntPtr dest, int len)
3838
{
3939
if (len != 0)
4040
{
41-
SpanHelpers.Memmove(ref *(byte*)dest, ref Unsafe.As<char, byte>(ref src.GetRawStringData()), (nuint)len);
41+
SpanHelpers.Memmove(ref *(byte*)dest, ref src.GetRawStringDataAsUInt8(), (nuint)len);
4242
}
4343
}
4444

src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/SearchValuesStringFuzzer.cs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ private static void Test(ReadOnlySpan<char> haystack, ReadOnlySpan<char> haystac
3636
SearchValues<string> searchValues = SearchValues.Create(needles, comparisonType);
3737

3838
int index = haystack.IndexOfAny(searchValues);
39-
Assert.Equal(index, haystackCopy.IndexOfAny(searchValues));
40-
Assert.Equal(index, IndexOfAnyReferenceImpl(haystack, needles, comparisonType));
39+
AssertEqual(index, haystackCopy.IndexOfAny(searchValues), searchValues);
40+
AssertEqual(index, IndexOfAnyReferenceImpl(haystack, needles, comparisonType), searchValues);
4141
}
4242

4343
private static int IndexOfAnyReferenceImpl(ReadOnlySpan<char> haystack, string[] needles, StringComparison comparisonType)
@@ -55,4 +55,15 @@ private static int IndexOfAnyReferenceImpl(ReadOnlySpan<char> haystack, string[]
5555

5656
return minIndex == int.MaxValue ? -1 : minIndex;
5757
}
58+
59+
private static void AssertEqual(int expected, int actual, SearchValues<string> searchValues)
60+
{
61+
if (expected != actual)
62+
{
63+
Type implType = searchValues.GetType();
64+
string impl = $"{implType.Name} [{string.Join(", ", implType.GenericTypeArguments.Select(t => t.Name))}]";
65+
66+
throw new Exception($"Expected {expected}, got {actual} for impl='{impl}'");
67+
}
68+
}
5869
}

src/libraries/System.Memory/tests/Span/StringSearchValues.cs

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Linq;
88
using System.Runtime.ExceptionServices;
99
using System.Runtime.InteropServices;
10+
using System.Runtime.Intrinsics.X86;
1011
using System.Threading;
1112
using System.Threading.Tasks;
1213
using Microsoft.DotNet.RemoteExecutor;
@@ -313,6 +314,7 @@ public static void IndexOfAny_InvalidUtf16()
313314
IndexOfAny(StringComparison.OrdinalIgnoreCase, -1, " foO\uD801bar", "oo\uD800baR, bar\uD800foo");
314315

315316
// Low surrogate without the high surrogate.
317+
IndexOfAny(StringComparison.OrdinalIgnoreCase, 1, "\uD801\uDCD8\uD8FB\uDCD8", "\uDCD8");
316318
IndexOfAny(StringComparison.OrdinalIgnoreCase, 1, "\uD801\uDCD8\uD8FB\uDCD8", "foo, \uDCD8");
317319
}
318320

@@ -337,6 +339,15 @@ public static void IndexOfAny_InvalidUtf16()
337339
[InlineData("abcd!")]
338340
[InlineData("abcdefgh")]
339341
[InlineData("abcdefghi")]
342+
[InlineData("123456789")]
343+
[InlineData("123456789a")]
344+
[InlineData("123456789ab")]
345+
[InlineData("123456789abc")]
346+
[InlineData("123456789abcd")]
347+
[InlineData("123456789abcde")]
348+
[InlineData("123456789abcdef")]
349+
[InlineData("123456789abcdefg")]
350+
[InlineData("123456789abcdefgh")]
340351
// Multiple values, but they all share the same prefix
341352
[InlineData("abc", "ab", "abcd")]
342353
// These should hit the Aho-Corasick implementation
@@ -406,9 +417,25 @@ static void TestCore(string[] valuesArray)
406417
Values_ImplementsSearchValuesBase(StringComparison.OrdinalIgnoreCase, valuesArray);
407418

408419
string values = string.Join(", ", valuesArray);
420+
string text = valuesArray[0];
409421

410-
IndexOfAny(StringComparison.Ordinal, 0, valuesArray[0], values);
411-
IndexOfAny(StringComparison.OrdinalIgnoreCase, 0, valuesArray[0], values);
422+
IndexOfAny(StringComparison.Ordinal, 0, text, values);
423+
IndexOfAny(StringComparison.OrdinalIgnoreCase, 0, text, values);
424+
425+
// Replace every position in the text with a different character.
426+
foreach (StringComparison comparisonType in new[] { StringComparison.Ordinal, StringComparison.OrdinalIgnoreCase })
427+
{
428+
SearchValues<string> stringValues = SearchValues.Create(valuesArray, comparisonType);
429+
430+
for (int i = 0; i < text.Length - 1; i++)
431+
{
432+
foreach (char replacement in "AaBb _!\u00F6")
433+
{
434+
string newText = $"{text.AsSpan(0, i)}{replacement}{text.AsSpan(i + 1)}";
435+
Assert.Equal(IndexOfAnyReferenceImpl(newText, valuesArray, comparisonType), newText.IndexOfAny(stringValues));
436+
}
437+
}
438+
}
412439
}
413440
}
414441

@@ -499,6 +526,20 @@ public static void TestIndexOfAny_RandomInputs_Stress()
499526
{
500527
RunStress();
501528

529+
if (RemoteExecutor.IsSupported && Avx512F.IsSupported)
530+
{
531+
var psi = new ProcessStartInfo();
532+
psi.Environment.Add("DOTNET_EnableAVX512F", "0");
533+
RemoteExecutor.Invoke(RunStress, new RemoteInvokeOptions { StartInfo = psi, TimeOut = 10 * 60 * 1000 }).Dispose();
534+
}
535+
536+
if (RemoteExecutor.IsSupported && Avx2.IsSupported)
537+
{
538+
var psi = new ProcessStartInfo();
539+
psi.Environment.Add("DOTNET_EnableAVX2", "0");
540+
RemoteExecutor.Invoke(RunStress, new RemoteInvokeOptions { StartInfo = psi, TimeOut = 10 * 60 * 1000 }).Dispose();
541+
}
542+
502543
if (CanTestInvariantCulture)
503544
{
504545
RunUsingInvariantCulture(static () => RunStress());

src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ public override int GetHashCode(string? obj)
8080
// The Ordinal version of Marvin32 operates over bytes.
8181
// The multiplication from # chars -> # bytes will never integer overflow.
8282
return Marvin.ComputeHash32(
83-
ref Unsafe.As<char, byte>(ref obj.GetRawStringData()),
83+
ref obj.GetRawStringDataAsUInt8(),
8484
(uint)obj.Length * 2,
8585
_seed.p0, _seed.p1);
8686
}

0 commit comments

Comments
 (0)