Skip to content

Commit ba6a1b1

Browse files
authored
Update Cursively benchmark code. (#1)
* Update Cursively benchmark code. - VisitPartialFieldContents is required for correctness - Use the original byte array, since we have one, instead of forcing a stream around it - Set iteration time to 1 second, to work around dotnet/BenchmarkDotNet#837 while we wait for a version of this package that includes dotnet/BenchmarkDotNet#1573 * Change the order of this check Whether or not this is the optimal order is almost certain to depend on the data: data sets that would almost never use the pool would likely hurt more by doing it this way, especially if this were a real-world application since this would likely push a line out of the CPU cache for no reason. Then again, a real-world application probably wouldn't use the pool conditionally like this, so there's not much of a reason NOT to either.
1 parent 5f123e3 commit ba6a1b1

File tree

3 files changed

+48
-10
lines changed

3 files changed

+48
-10
lines changed

benchmark.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
dotnet build -c Release source/CsvBenchmark.sln
2-
bin\release\net5.0\CsvBenchmark.exe
2+
bin\release\net5.0\CsvBenchmark.exe --iterationTime 1000

source/CsvBenchmark/CsvReaderBenchmarks.cs

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,29 @@ public void MgholamFastCSV()
201201

202202
class CursivelyStringVisitor : CsvReaderVisitorBase
203203
{
204+
readonly bool doPooling;
205+
readonly byte[] bytes = new byte[1024];
206+
int bytesUsed = 0;
207+
204208
// in any realistic scenario we'd need to at least know the column oridnal to do anything with the record
205209
int ordinal = 0;
206210

211+
public CursivelyStringVisitor(bool doPooling)
212+
{
213+
this.doPooling = doPooling;
214+
}
215+
207216
public override void VisitEndOfField(System.ReadOnlySpan<byte> chunk)
208217
{
209-
var str = Encoding.UTF8.GetString(chunk);
218+
if (bytesUsed != 0)
219+
{
220+
chunk.CopyTo(bytes.AsSpan(bytesUsed, chunk.Length));
221+
chunk = new ReadOnlySpan<byte>(bytes, 0, bytesUsed + chunk.Length);
222+
bytesUsed = 0;
223+
}
224+
var str = doPooling && chunk.Length == 1 && chunk[0] < 128
225+
? pool[chunk[0]]
226+
: Encoding.UTF8.GetString(chunk);
210227
ordinal++;
211228
}
212229

@@ -217,17 +234,20 @@ public override void VisitEndOfRecord()
217234

218235
public override void VisitPartialFieldContents(System.ReadOnlySpan<byte> chunk)
219236
{
237+
chunk.CopyTo(bytes.AsSpan(bytesUsed, chunk.Length));
238+
bytesUsed += chunk.Length;
220239
}
221240
}
222241

223242
[Benchmark]
224-
public void CursivelyCsv()
243+
[Arguments(false)]
244+
[Arguments(true)]
245+
public void CursivelyCsv(bool doPooling)
225246
{
226-
var s = TestData.GetUtf8Stream();
227-
var proc = new CursivelyStringVisitor();
247+
var d = TestData.GetUtf8Array();
248+
var proc = new CursivelyStringVisitor(doPooling);
228249
CsvSyncInput
229-
.ForStream(s)
230-
.WithMinReadBufferByteCount(BufferSize)
250+
.ForMemory(d)
231251
.Process(proc);
232252
}
233253

@@ -352,6 +372,9 @@ public void NRecoSelect()
352372

353373
class CursivelySelectVisitor : CsvReaderVisitorBase
354374
{
375+
readonly byte[] bytes = new byte[1024];
376+
int bytesUsed = 0;
377+
355378
int ordinal = 0;
356379
int row = 0;
357380

@@ -361,6 +384,12 @@ class CursivelySelectVisitor : CsvReaderVisitorBase
361384

362385
public override void VisitEndOfField(ReadOnlySpan<byte> chunk)
363386
{
387+
if (bytesUsed != 0)
388+
{
389+
chunk.CopyTo(bytes.AsSpan(bytesUsed, chunk.Length));
390+
chunk = new ReadOnlySpan<byte>(bytes, 0, bytesUsed + chunk.Length);
391+
bytesUsed = 0;
392+
}
364393
if (row != 0) // skip the header row
365394
{
366395
switch (ordinal)
@@ -398,17 +427,21 @@ public override void VisitEndOfRecord()
398427

399428
public override void VisitPartialFieldContents(System.ReadOnlySpan<byte> chunk)
400429
{
430+
if (row > 0)
431+
{
432+
chunk.CopyTo(bytes.AsSpan(bytesUsed, chunk.Length));
433+
bytesUsed += chunk.Length;
434+
}
401435
}
402436
}
403437

404438
[Benchmark]
405439
public void CursivelyCsvSelect()
406440
{
407-
var s = TestData.GetUtf8Stream();
441+
var d = TestData.GetUtf8Array();
408442
var proc = new CursivelySelectVisitor();
409443
CsvSyncInput
410-
.ForStream(s)
411-
.WithMinReadBufferByteCount(BufferSize)
444+
.ForMemory(d)
412445
.Process(proc);
413446
}
414447

source/CsvBenchmark/TestData.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ public static Stream GetUtf8Stream()
112112
return new MemoryStream(CachedUtfData);
113113
}
114114

115+
public static ReadOnlyMemory<byte> GetUtf8Array()
116+
{
117+
return CachedUtfData;
118+
}
119+
115120
public static DbDataReader GetData()
116121
{
117122

0 commit comments

Comments
 (0)