Skip to content

Commit b919f83

Browse files
lemireUbuntu
and
Ubuntu
authored
Minor optimization (#46)
* Minor optimizations * minor reformat * Optimizing for neoverse --------- Co-authored-by: Ubuntu <[email protected]>
1 parent a352a15 commit b919f83

File tree

2 files changed

+31
-27
lines changed

2 files changed

+31
-27
lines changed

benchmark/Benchmark.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,10 @@ public Config()
125125
}
126126
// Parameters and variables for real data
127127
[Params(@"data/twitter.json",
128-
@"data/Bogatov1069.utf8.txt",
129-
@"data/Bogatov136.utf8.txt",
130-
@"data/Bogatov286.utf8.txt",
131-
@"data/Bogatov527.utf8.txt",
128+
@"data/Bogatov1069.utf8.txt",
129+
@"data/Bogatov136.utf8.txt",
130+
@"data/Bogatov286.utf8.txt",
131+
@"data/Bogatov527.utf8.txt",
132132
@"data/Arabic-Lipsum.utf8.txt",
133133
@"data/Hebrew-Lipsum.utf8.txt",
134134
@"data/Korean-Lipsum.utf8.txt",

src/UTF8.cs

+27-23
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by
6868
// We scan the input from buf to len, possibly going back howFarBack bytes, to find the end of
6969
// a valid UTF-8 sequence. We return buf + len if the buffer is valid, otherwise we return the
7070
// pointer to the first invalid byte.
71+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
7172
private unsafe static byte* SimpleRewindAndValidateWithErrors(int howFarBack, byte* buf, int len)
7273
{
7374
int extraLen = 0;
@@ -90,7 +91,6 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by
9091
{
9192
return buf - howFarBack;
9293
}
93-
9494
int pos = 0;
9595
int nextPos;
9696
uint codePoint = 0;
@@ -598,7 +598,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
598598
}
599599
else
600600
{
601-
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
601+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
602602
}
603603
if (invalidBytePointer < pInputBuffer + processedLength)
604604
{
@@ -624,16 +624,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
624624

625625

626626
// We may still have an error.
627-
if (processedLength < inputLength || !Sse42.TestZ(prevIncomplete, prevIncomplete))
627+
bool hasIncompete = !Sse42.TestZ(prevIncomplete, prevIncomplete);
628+
if (processedLength < inputLength || hasIncompete)
628629
{
629630
byte* invalidBytePointer;
630-
if (processedLength == 0)
631+
if (processedLength == 0 || !hasIncompete)
631632
{
632633
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength);
633634
}
634635
else
635636
{
636-
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
637+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
637638

638639
}
639640
if (invalidBytePointer != pInputBuffer + inputLength)
@@ -813,7 +814,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
813814
if (!Avx2.TestZ(prevIncomplete, prevIncomplete))
814815
{
815816
int off = processedLength >= 3 ? processedLength - 3 : processedLength;
816-
byte* invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(16 - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
817+
byte* invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(32 - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
817818
// So the code is correct up to invalidBytePointer
818819
if (invalidBytePointer < pInputBuffer + processedLength)
819820
{
@@ -877,7 +878,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
877878
}
878879
else
879880
{
880-
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
881+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
881882
}
882883
if (invalidBytePointer < pInputBuffer + processedLength)
883884
{
@@ -899,17 +900,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
899900
}
900901
}
901902
// We may still have an error.
902-
if (processedLength < inputLength || !Avx2.TestZ(prevIncomplete, prevIncomplete))
903+
bool hasIncompete = !Avx2.TestZ(prevIncomplete, prevIncomplete);
904+
if (processedLength < inputLength || hasIncompete)
903905
{
904906
byte* invalidBytePointer;
905-
if (processedLength == 0)
907+
if (processedLength == 0 || !hasIncompete)
906908
{
907909
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength);
908910
}
909911
else
910912
{
911-
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
912-
913+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
913914
}
914915
if (invalidBytePointer != pInputBuffer + inputLength)
915916
{
@@ -1215,7 +1216,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
12151216
}
12161217
else
12171218
{
1218-
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
1219+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
12191220
}
12201221
if (invalidBytePointer < pInputBuffer + processedLength)
12211222
{
@@ -1237,16 +1238,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
12371238
}
12381239
}
12391240
// We may still have an error.
1240-
if (processedLength < inputLength || Avx512BW.CompareGreaterThan(prevIncomplete, Vector512<byte>.Zero).ExtractMostSignificantBits() != 0)
1241+
bool hasIncompete = Avx512BW.CompareGreaterThan(prevIncomplete, Vector512<byte>.Zero).ExtractMostSignificantBits() != 0;
1242+
if (processedLength < inputLength || hasIncompete)
12411243
{
12421244
byte* invalidBytePointer;
1243-
if (processedLength == 0)
1245+
if (processedLength == 0 || !hasIncompete)
12441246
{
12451247
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength);
12461248
}
12471249
else
12481250
{
1249-
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
1251+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
12501252

12511253
}
12521254
if (invalidBytePointer != pInputBuffer + inputLength)
@@ -1360,8 +1362,9 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
13601362
{
13611363

13621364
Vector128<byte> currentBlock = AdvSimd.LoadVector128(pInputBuffer + processedLength);
1363-
1364-
if (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() <= 127)
1365+
if (AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(AdvSimd.And(currentBlock, v80))).ToScalar() == 0)
1366+
// We could it with (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() <= 127) but it is slower on some
1367+
// hardware.
13651368
{
13661369
// We have an ASCII block, no need to process it, but
13671370
// we need to check if the previous block was incomplete.
@@ -1431,7 +1434,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14311434
}
14321435
else
14331436
{
1434-
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
1437+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
14351438
}
14361439
if (invalidBytePointer < pInputBuffer + processedLength)
14371440
{
@@ -1457,18 +1460,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14571460
n4 += negn4add;
14581461
}
14591462
}
1460-
1461-
// We may still have an error.
1462-
if (processedLength < inputLength || AdvSimd.Arm64.MaxAcross(prevIncomplete).ToScalar() != 0)
1463+
bool hasIncompete = AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(prevIncomplete)).ToScalar() != 0;
1464+
if (processedLength < inputLength || hasIncompete)
14631465
{
14641466
byte* invalidBytePointer;
1465-
if (processedLength == 0)
1467+
if (processedLength == 0 || !hasIncompete)
14661468
{
14671469
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength);
14681470
}
14691471
else
14701472
{
1471-
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
1473+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
14721474
}
14731475
if (invalidBytePointer != pInputBuffer + inputLength)
14741476
{
@@ -1497,6 +1499,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14971499
return GetPointerToFirstInvalidByteScalar(pInputBuffer + processedLength, inputLength - processedLength, out utf16CodeUnitCountAdjustment, out scalarCountAdjustment);
14981500
}
14991501

1502+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
15001503
private static unsafe void removeCounters(byte* start, byte* end, ref int n4, ref int contbytes)
15011504
{
15021505
for (byte* p = start; p < end; p++)
@@ -1512,6 +1515,7 @@ private static unsafe void removeCounters(byte* start, byte* end, ref int n4, re
15121515
}
15131516
}
15141517

1518+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
15151519
private static unsafe void addCounters(byte* start, byte* end, ref int n4, ref int contbytes)
15161520
{
15171521
for (byte* p = start; p < end; p++)

0 commit comments

Comments
 (0)