Description
Description
The XML docs for ChannelOptions.AllowSynchronousContinuations
says:
Setting this option to
true
can provide measurable throughput improvements by avoiding scheduling additional work items.
But the following benchmark produces surprising results:
[SimpleJob(RunStrategy.Throughput, RuntimeMoniker.Net80)]
[MemoryDiagnoser]
public class ChannelBenchmark
{
private const int Count = 1_000_000;
private double[] _data;
[GlobalSetup]
public void GlobalSetup()
{
_data = Enumerable.Range(0, Count)
.Select(_ => Random.Shared.NextDouble() * 10000)
.ToArray();
}
[ParamsSource(nameof(ChannelsSource))]
public Func<Channel<double>> ChannelCreator { get; set; }
public IEnumerable<Func<Channel<double>>> ChannelsSource =>
new[]
{
() => Channel.CreateUnbounded<double>(new UnboundedChannelOptions
{ SingleReader = true, SingleWriter = true, AllowSynchronousContinuations = true }),
() => Channel.CreateUnbounded<double>(new UnboundedChannelOptions
{ SingleReader = true, SingleWriter = true, AllowSynchronousContinuations = false }),
() => Channel.CreateBounded<double>(new BoundedChannelOptions(Count)
{ SingleReader = true, SingleWriter = true, AllowSynchronousContinuations = true }),
() => Channel.CreateBounded<double>(new BoundedChannelOptions(Count)
{ SingleReader = true, SingleWriter = true, AllowSynchronousContinuations = false }),
};
[Benchmark]
public Task<double> Run()
{
var channel = ChannelCreator();
var tcs = new TaskCompletionSource<double>();
Task.Run(async () =>
{
var total = 0d;
await foreach (var output in channel.Reader.ReadAllAsync())
{
total += output;
}
tcs.SetResult(total);
});
foreach (var input in _data)
{
channel.Writer.TryWrite(input);
}
channel.Writer.Complete();
return tcs.Task;
}
}
Results:
Method | ChannelCreator | Mean | Error | StdDev | Gen0 | Gen1 | Gen2 | Allocated |
---|---|---|---|---|---|---|---|---|
Run | Syste(...)ble]] [65] // sync unbounded | 121.34 ms | 2.392 ms | 3.431 ms | - | - | - | 38.53 KB |
Run | Syste(...)ble]] [65] // async unbounded | 44.34 ms | 2.355 ms | 6.908 ms | - | - | - | 269.07 KB |
Run | Syste(...)ble]] [65] // sync bounded | 90.52 ms | 2.052 ms | 5.954 ms | 285.7143 | 285.7143 | 285.7143 | 15216 KB |
Run | Syste(...)ble]] [65] // async bounded | 86.18 ms | 1.714 ms | 3.502 ms | 571.4286 | 571.4286 | 571.4286 | 16386.46 KB |
In both bounded and unbounded channels, the async version runs faster than the sync version (at the cost of higher memory consumption).
I get that by setting AllowSynchronousContinuations
to true
, the benchmark code essentially becomes single-threaded and that could make processing take longer (as opposed to having one producer and one consumer working concurrently). But given that the "data consumption" is only a single increment, I imagine that should be extremely fast (much faster than a context switch, at least), so the fact that the async version runs so much faster is still surprising.
Nonetheless, in an attempt to rule out the possibility that the async version runs faster due to having more workers, I ran the following benchmark which is essentially the same thing as above, but running it Environment.ProcessorCount
times in parallel (Environment.ProcessorCount
is 20 on my machine):
[SimpleJob(RunStrategy.Throughput, RuntimeMoniker.Net80)]
[MemoryDiagnoser]
public class ChannelBenchmark
{
private const int Count = 1_000_000;
private double[] _data;
[GlobalSetup]
public void GlobalSetup()
{
_data = Enumerable.Range(0, Count)
.Select(_ => Random.Shared.NextDouble() * 10000)
.ToArray();
}
[ParamsSource(nameof(ChannelsSource))]
public Func<Channel<double>> ChannelCreator { get; set; }
public IEnumerable<Func<Channel<double>>> ChannelsSource =>
new[]
{
() => Channel.CreateUnbounded<double>(new UnboundedChannelOptions
{ SingleReader = true, SingleWriter = true, AllowSynchronousContinuations = true }),
() => Channel.CreateUnbounded<double>(new UnboundedChannelOptions
{ SingleReader = true, SingleWriter = true, AllowSynchronousContinuations = false }),
() => Channel.CreateBounded<double>(new BoundedChannelOptions(Count)
{ SingleReader = true, SingleWriter = true, AllowSynchronousContinuations = true }),
() => Channel.CreateBounded<double>(new BoundedChannelOptions(Count)
{ SingleReader = true, SingleWriter = true, AllowSynchronousContinuations = false }),
};
[Benchmark]
public async Task<double> Run()
{
var tasks = Enumerable.Range(0, Environment.ProcessorCount)
.Select(_ => RunOnce(ChannelCreator(), _data))
.ToArray();
var totals = await Task.WhenAll(tasks);
return totals.Sum();
}
private static Task<double> RunOnce(Channel<double> channel, double[] data)
{
var tcs = new TaskCompletionSource<double>();
Task.Run(async () =>
{
var total = 0d;
await foreach (var output in channel.Reader.ReadAllAsync())
{
total += output;
}
tcs.SetResult(total);
});
foreach (var input in data)
{
channel.Writer.TryWrite(input);
}
channel.Writer.Complete();
return tcs.Task;
}
}
The results are still in favor of the async versions:
Method | ChannelCreator | Mean | Error | StdDev | Gen0 | Gen1 | Gen2 | Allocated |
---|---|---|---|---|---|---|---|---|
Run | Syste(...)ble]] [65] // sync unbounded | 2,290.5 ms | 29.86 ms | 24.94 ms | - | - | - | 1 MB |
Run | Syste(...)ble]] [65] // async unbounded | 940.9 ms | 34.49 ms | 101.71 ms | - | - | - | 1.74 MB |
Run | Syste(...)ble]] [65] // sync bounded | 1,137.0 ms | 27.12 ms | 78.69 ms | 7000.0000 | 7000.0000 | 7000.0000 | 312.04 MB |
Run | Syste(...)ble]] [65] // async bounded | 1,093.4 ms | 23.37 ms | 68.92 ms | 16000.0000 | 16000.0000 | 16000.0000 | 320.05 MB |
Configuration
BenchmarkDotNet v0.14.0, Windows 11 (10.0.22631.4602/23H2/2023Update/SunValley3)
12th Gen Intel Core i7-12800H, 1 CPU, 20 logical and 14 physical cores
.NET SDK 9.0.100-preview.5.24307.3
[Host] : .NET 8.0.10 (8.0.1024.46610), X64 RyuJIT AVX2
Job-RRGDGB : .NET 8.0.10 (8.0.1024.46610), X64 RyuJIT AVX2
Runtime=.NET 8.0 RunStrategy=Throughput