Skip to content

Commit 441c72c

Browse files
lemireDaniel Lemire
and
Daniel Lemire
authored
Pre-release PR (#44)
* fix: add twitter and some optimizations for runs of asciis * add twitter result to README * added sse and avx results * simplified the algorithm * updating numbers * tweaking * preparing release * integrating AVX-512 --------- Co-authored-by: Daniel Lemire <[email protected]>
1 parent 92da59a commit 441c72c

File tree

6 files changed

+15726
-145
lines changed

6 files changed

+15726
-145
lines changed

README.md

+41-14
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ To run just one benchmark, use a filter:
8888

8989
```
9090
cd benchmark
91-
dotnet run --configuration Release --filter "*Arabic-Lipsum*"
91+
dotnet run --configuration Release --filter "*Twitter*"
92+
dotnet run --configuration Release --filter "*Lipsum*"
9293
```
9394

9495
If you are under macOS or Linux, you may want to run the benchmarks in privileged mode:
@@ -98,26 +99,52 @@ cd benchmark
9899
sudo dotnet run -c Release
99100
```
100101

102+
103+
--anyCategories sse avx avx512
101104
## Results (x64)
102105

103-
To be completed.
106+
On an Intel Ice Lake system, our validation function is up to 13 times
107+
faster than the standard library.
108+
A realistic input is Twitter.json which is mostly ASCII with some Unicode content
109+
where we are 2.4 times faster.
110+
111+
| data set | SimdUnicode current AVX2 (GB/s) | .NET speed (GB/s) | speed up |
112+
|:----------------|:------------------------|:-------------------|:-------------------|
113+
| Twitter.json | 29 | 12 | 2.4 x |
114+
| Arabic-Lipsum | 12 | 2.3 | 5.2 x |
115+
| Chinese-Lipsum | 12 | 3.9 | 3.0 x |
116+
| Emoji-Lipsum | 12 | 0.9 | 13 x |
117+
| Hebrew-Lipsum |12 | 2.3 | 5.2 x |
118+
| Hindi-Lipsum | 12 | 2.1 | 5.7 x |
119+
| Japanese-Lipsum | 10  | 3.5 | 2.9 x |
120+
| Korean-Lipsum | 10 | 1.3 | 7.7 x |
121+
| Latin-Lipsum | 76 | 76 | --- |
122+
| Russian-Lipsum | 12 | 1.2 | 10 x |
123+
124+
125+
126+
On x64 system, we offer several functions: a fallback function for legacy systems,
127+
a SSE42 function for older CPUs, an AVX2 function for current x64 systems and
128+
an AVX-512 function for the most recent processors (AMD Zen 4 or better, Intel
129+
Ice Lake, etc.).
104130

105131
## Results (ARM)
106132

107-
On an Apple M2 system, our validation function is two to three times
133+
On an Apple M2 system, our validation function is 1.5 to four times
108134
faster than the standard library.
109135

110-
| data set | SimdUnicode speed (GB/s) | .NET speed (GB/s) |
111-
|:----------------|:-----------|:--------------------------|
112-
| Arabic-Lipsum | 6.7 | 3.5 |
113-
| Chinese-Lipsum | 6.7 | 4.8 |
114-
| Emoji-Lipsum | 6.7 | 2.5 |
115-
| Hebrew-Lipsum | 6.7 | 3.5 |
116-
| Hindi-Lipsum | 6.8 | 3.0 |
117-
| Japanese-Lipsum | 6.8 | 4.6  |
118-
| Korean-Lipsum | 6.6 | 1.8 |
119-
| Latin-Lipsum | 87 | 38 |
120-
| Russian-Lipsum | 6.7 | 2.6 |
136+
| data set | SimdUnicode speed (GB/s) | .NET speed (GB/s) | speed up |
137+
|:----------------|:-----------|:--------------------------|:-------------------|
138+
| Twitter.json | 25 | 14 | 1.8 x |
139+
| Arabic-Lipsum | 7.4 | 3.5 | 2.1 x |
140+
| Chinese-Lipsum | 7.4 | 4.8 | 1.5 x |
141+
| Emoji-Lipsum | 7.4 | 2.5 | 3.0 x |
142+
| Hebrew-Lipsum | 7.4 | 3.5 | 2.1 x |
143+
| Hindi-Lipsum | 7.3 | 3.0 | 2.4 x |
144+
| Japanese-Lipsum | 7.3 | 4.6  | 1.6 x |
145+
| Korean-Lipsum | 7.4 | 1.8 | 4.1 x |
146+
| Latin-Lipsum | 87 | 38 | 2.3 x |
147+
| Russian-Lipsum | 7.4 | 2.7 | 2.7 x |
121148

122149

123150
## Building the library

benchmark/Benchmark.cs

+30-19
Original file line numberDiff line numberDiff line change
@@ -62,58 +62,70 @@ public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
6262
[Config(typeof(Config))]
6363
public class RealDataBenchmark
6464
{
65+
// We only informs the user once about the SIMD support of the system.
66+
private static bool printed;
6567
#pragma warning disable CA1812
6668
private sealed class Config : ManualConfig
6769
{
6870
public Config()
6971
{
7072
AddColumn(new Speed());
7173

72-
7374
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
7475
{
76+
if (!printed)
77+
{
7578
#pragma warning disable CA1303
76-
Console.WriteLine("ARM64 system detected.");
77-
AddFilter(new AnyCategoriesFilter(["arm64", "scalar", "runtime"]));
78-
79+
Console.WriteLine("ARM64 system detected.");
80+
printed = true;
81+
}
7982
}
8083
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
8184
{
8285
if (Vector512.IsHardwareAccelerated && System.Runtime.Intrinsics.X86.Avx512Vbmi.IsSupported)
8386
{
87+
if (!printed)
88+
{
8489
#pragma warning disable CA1303
85-
Console.WriteLine("X64 system detected (Intel, AMD,...) with AVX-512 support.");
86-
AddFilter(new AnyCategoriesFilter(["avx512", "avx", "sse", "scalar", "runtime"]));
90+
Console.WriteLine("X64 system detected (Intel, AMD,...) with AVX-512 support.");
91+
printed = true;
92+
}
8793
}
8894
else if (Avx2.IsSupported)
8995
{
96+
if (!printed)
97+
{
9098
#pragma warning disable CA1303
91-
Console.WriteLine("X64 system detected (Intel, AMD,...) with AVX2 support.");
92-
AddFilter(new AnyCategoriesFilter(["avx", "sse", "scalar", "runtime"]));
99+
Console.WriteLine("X64 system detected (Intel, AMD,...) with AVX2 support.");
100+
printed = true;
101+
}
93102
}
94103
else if (Ssse3.IsSupported)
95104
{
105+
if (!printed)
106+
{
96107
#pragma warning disable CA1303
97-
Console.WriteLine("X64 system detected (Intel, AMD,...) with Sse4.2 support.");
98-
AddFilter(new AnyCategoriesFilter(["sse", "scalar", "runtime"]));
108+
Console.WriteLine("X64 system detected (Intel, AMD,...) with Sse4.2 support.");
109+
printed = true;
110+
}
99111
}
100112
else
101113
{
114+
if (!printed)
115+
{
102116
#pragma warning disable CA1303
103-
Console.WriteLine("X64 system detected (Intel, AMD,...) without relevant SIMD support.");
104-
AddFilter(new AnyCategoriesFilter(["scalar", "runtime"]));
117+
Console.WriteLine("X64 system detected (Intel, AMD,...) without relevant SIMD support.");
118+
printed = true;
119+
}
105120
}
106121
}
107-
else
108-
{
109-
AddFilter(new AnyCategoriesFilter(["scalar", "runtime"]));
110-
111-
}
122+
AddFilter(new AnyCategoriesFilter(["default"]));
112123

113124
}
114125
}
115126
// Parameters and variables for real data
116-
[Params(@"data/Arabic-Lipsum.utf8.txt",
127+
[Params(@"data/twitter.json",
128+
@"data/Arabic-Lipsum.utf8.txt",
117129
@"data/Hebrew-Lipsum.utf8.txt",
118130
@"data/Korean-Lipsum.utf8.txt",
119131
@"data/Chinese-Lipsum.utf8.txt",
@@ -285,7 +297,6 @@ public unsafe void SIMDUtf8ValidationRealDataSse()
285297
});
286298
}
287299
}
288-
289300
}
290301
public class Program
291302
{

benchmark/benchmark.csproj

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
<None Update="data\*.utf8.txt">
2323
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
2424
</None>
25+
<None Update="data\twitter.json">
26+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
27+
</None>
2528
</ItemGroup>
2629

2730

0 commit comments

Comments
 (0)