Skip to content

Commit d4c13b0

Browse files
committed
One more iteration
1 parent 2859001 commit d4c13b0

File tree

4 files changed

+56
-45
lines changed

4 files changed

+56
-45
lines changed

src/NetFabric.Numerics.Tensors/ApplyBinary.cs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
using System;
2+
13
namespace NetFabric.Numerics.Tensors;
24

35
public static partial class Tensor
@@ -35,28 +37,32 @@ public static void Apply<T1, T2, TResult, TOperator>(ReadOnlyMemory<T1> x, ReadO
3537
if (x.Length > destination.Length)
3638
Throw.ArgumentException(nameof(destination), "Destination span is too small.");
3739

38-
if(x.Length > 2 * minChunkSize)
39-
ParallelApply(x, y, destination);
40+
var coreCount = AvailableCores();
41+
42+
if (coreCount >= minChunkCount && x.Length > minChunkCount * minChunkSize)
43+
ParallelApply(x, y, destination, coreCount);
4044
else
4145
Apply<T1, T2, TResult, TOperator>(x.Span, y.Span, destination.Span);
4246

43-
static void ParallelApply(ReadOnlyMemory<T1> x, ReadOnlyMemory<T2> y, Memory<TResult> destination)
47+
static void ParallelApply(ReadOnlyMemory<T1> x, ReadOnlyMemory<T2> y, Memory<TResult> destination, int coreCount)
4448
{
45-
var size = x.Length;
46-
var chunkSize = int.Max(size / AvailableCores(), minChunkSize);
49+
var totalSize = x.Length;
50+
var chunkSize = int.Max(totalSize / coreCount, minChunkSize);
4751

48-
var actions = new Action[size / chunkSize];
52+
var actions = new Action[totalSize / chunkSize];
53+
var start = 0;
4954
for (var index = 0; index < actions.Length; index++)
5055
{
51-
var start = index * chunkSize;
52-
var length = (index == actions.Length - 1)
53-
? size - start
56+
var length = (index == actions.Length - 1)
57+
? totalSize - start
5458
: chunkSize;
5559

5660
var xSlice = x.Slice(start, length);
5761
var ySlice = y.Slice(start, length);
5862
var destinationSlice = destination.Slice(start, length);
5963
actions[index] = () => Apply<T1, T2, TResult, TOperator>(xSlice.Span, ySlice.Span, destinationSlice.Span);
64+
65+
start += length;
6066
}
6167
Parallel.Invoke(actions);
6268
}
@@ -111,13 +117,13 @@ public static void Apply<T1, T2, TResult, TOperator>(ReadOnlySpan<T1> x, ReadOnl
111117
var indexSource = 0;
112118

113119
// Check if hardware acceleration and Vector<T> support are available,
114-
// and if the length of the x is greater than the Vector<T>.Count.
120+
// and if the length of the x is greater than the length of Vector<T>.
115121
if (TOperator.IsVectorizable &&
116122
Vector.IsHardwareAccelerated &&
117123
Vector<T1>.IsSupported &&
118124
Vector<T2>.IsSupported &&
119125
Vector<TResult>.IsSupported &&
120-
x.Length >= Vector<T1>.Count)
126+
x.Length > Vector<T1>.Count)
121127
{
122128
// Cast the spans to vectors for hardware acceleration.
123129
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);
@@ -220,13 +226,13 @@ public static void Apply<T1, T2, TResult, TOperator>(ReadOnlySpan<T1> x, T2 y, S
220226
var indexSource = 0;
221227

222228
// Check if hardware acceleration and Vector<T> support are available,
223-
// and if the length of the x is greater than the Vector<T>.Count.
229+
// and if the length of the x is greater than the length of Vector<T>.
224230
if (TOperator.IsVectorizable &&
225231
Vector.IsHardwareAccelerated &&
226232
Vector<T1>.IsSupported &&
227233
Vector<T2>.IsSupported &&
228234
Vector<TResult>.IsSupported &&
229-
x.Length >= Vector<T1>.Count)
235+
x.Length > Vector<T1>.Count)
230236
{
231237
// Cast the spans to vectors for hardware acceleration.
232238
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);
@@ -328,15 +334,15 @@ public static void Apply<T1, T2, TResult, TOperator>(ReadOnlySpan<T1> x, (T2, T2
328334
var indexSource = 0;
329335

330336
// Check if hardware acceleration and Vector<T> support are available,
331-
// and if the length of the x is greater than the Vector<T>.Count.
337+
// and if the length of the x is greater than the length of Vector<T>.
332338
if (TOperator.IsVectorizable &&
333339
Vector.IsHardwareAccelerated &&
334340
Vector<T1>.IsSupported &&
335341
Vector<T2>.IsSupported &&
336342
Vector<TResult>.IsSupported &&
337343
Vector<T1>.Count > 2 &&
338344
Vector<T1>.Count % 2 is 0 &&
339-
x.Length >= Vector<T1>.Count)
345+
x.Length > Vector<T1>.Count)
340346
{
341347
// Cast the spans to vectors for hardware acceleration.
342348
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);

src/NetFabric.Numerics.Tensors/ApplyTernary.cs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, Rea
5757
var indexSource = 0;
5858

5959
// Check if hardware acceleration and Vector<T> support are available,
60-
// and if the length of the x is greater than the Vector<T>.Count.
60+
// and if the length of the x is greater than the length of Vector<T>.
6161
if (TOperator.IsVectorizable &&
6262
Vector.IsHardwareAccelerated &&
6363
Vector<T1>.IsSupported &&
6464
Vector<T2>.IsSupported &&
6565
Vector<T3>.IsSupported &&
6666
Vector<TResult>.IsSupported &&
67-
x.Length >= Vector<T1>.Count)
67+
x.Length > Vector<T1>.Count)
6868
{
6969
// Cast the spans to vectors for hardware acceleration.
7070
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);
@@ -178,14 +178,14 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, T2
178178
var indexSource = 0;
179179

180180
// Check if hardware acceleration and Vector<T> support are available,
181-
// and if the length of the x is greater than the Vector<T>.Count.
181+
// and if the length of the x is greater than the length of Vector<T>.
182182
if (TOperator.IsVectorizable &&
183183
Vector.IsHardwareAccelerated &&
184184
Vector<T1>.IsSupported &&
185185
Vector<T2>.IsSupported &&
186186
Vector<T3>.IsSupported &&
187187
Vector<TResult>.IsSupported &&
188-
x.Length >= Vector<T1>.Count)
188+
x.Length > Vector<T1>.Count)
189189
{
190190
// Cast the spans to vectors for hardware acceleration.
191191
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);
@@ -299,7 +299,7 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, (T2
299299
var indexSource = 0;
300300

301301
// Check if hardware acceleration and Vector<T> support are available,
302-
// and if the length of the x is greater than the Vector<T>.Count.
302+
// and if the length of the x is greater than the length of Vector<T>.
303303
if (TOperator.IsVectorizable &&
304304
Vector.IsHardwareAccelerated &&
305305
Vector<T1>.IsSupported &&
@@ -308,7 +308,7 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, (T2
308308
Vector<TResult>.IsSupported &&
309309
Vector<T1>.Count > 2 &&
310310
Vector<T1>.Count % 2 is 0 &&
311-
x.Length >= Vector<T1>.Count)
311+
x.Length > Vector<T1>.Count)
312312
{
313313
// Cast the spans to vectors for hardware acceleration.
314314
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);
@@ -474,14 +474,14 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, Rea
474474
var indexSource = 0;
475475

476476
// Check if hardware acceleration and Vector<T> support are available,
477-
// and if the length of the x is greater than the Vector<T>.Count.
477+
// and if the length of the x is greater than the length of Vector<T>.
478478
if (TOperator.IsVectorizable &&
479479
Vector.IsHardwareAccelerated &&
480480
Vector<T1>.IsSupported &&
481481
Vector<T2>.IsSupported &&
482482
Vector<T3>.IsSupported &&
483483
Vector<TResult>.IsSupported &&
484-
x.Length >= Vector<T1>.Count)
484+
x.Length > Vector<T1>.Count)
485485
{
486486
// Cast the spans to vectors for hardware acceleration.
487487
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);
@@ -595,7 +595,7 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, Rea
595595
var indexSource = 0;
596596

597597
// Check if hardware acceleration and Vector<T> support are available,
598-
// and if the length of the x is greater than the Vector<T>.Count.
598+
// and if the length of the x is greater than the length of Vector<T>.
599599
if (TOperator.IsVectorizable &&
600600
Vector.IsHardwareAccelerated &&
601601
Vector<T1>.IsSupported &&
@@ -604,7 +604,7 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, Rea
604604
Vector<TResult>.IsSupported &&
605605
Vector<T1>.Count > 2 &&
606606
Vector<T1>.Count % 2 is 0 &&
607-
x.Length >= Vector<T1>.Count)
607+
x.Length > Vector<T1>.Count)
608608
{
609609
// Cast the spans to vectors for hardware acceleration.
610610
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);
@@ -776,14 +776,14 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, T2
776776
var indexSource = 0;
777777

778778
// Check if hardware acceleration and Vector<T> support are available,
779-
// and if the length of the x is greater than the Vector<T>.Count.
779+
// and if the length of the x is greater than the length of Vector<T>.
780780
if (TOperator.IsVectorizable &&
781781
Vector.IsHardwareAccelerated &&
782782
Vector<T1>.IsSupported &&
783783
Vector<T2>.IsSupported &&
784784
Vector<T3>.IsSupported &&
785785
Vector<TResult>.IsSupported &&
786-
x.Length >= Vector<T1>.Count)
786+
x.Length > Vector<T1>.Count)
787787
{
788788
// Cast the spans to vectors for hardware acceleration.
789789
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);
@@ -890,7 +890,7 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, (T2
890890
var indexSource = 0;
891891

892892
// Check if hardware acceleration and Vector<T> support are available,
893-
// and if the length of the x is greater than the Vector<T>.Count.
893+
// and if the length of the x is greater than the length of Vector<T>.
894894
if (TOperator.IsVectorizable &&
895895
Vector.IsHardwareAccelerated &&
896896
Vector<T1>.IsSupported &&
@@ -899,7 +899,7 @@ public static void Apply<T1, T2, T3, TResult, TOperator>(ReadOnlySpan<T1> x, (T2
899899
Vector<TResult>.IsSupported &&
900900
Vector<T1>.Count > 2 &&
901901
Vector<T1>.Count % 2 is 0 &&
902-
x.Length >= Vector<T1>.Count)
902+
x.Length > Vector<T1>.Count)
903903
{
904904
// Cast the spans to vectors for hardware acceleration.
905905
var xVectors = MemoryMarshal.Cast<T1, Vector<T1>>(x);

src/NetFabric.Numerics.Tensors/ApplyUnary.cs

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,27 +31,31 @@ public static void Apply<T, TResult, TOperator>(ReadOnlyMemory<T> x, Memory<TRes
3131
if (x.Length > destination.Length)
3232
Throw.ArgumentException(nameof(destination), "Destination span is too small.");
3333

34-
if(x.Length > 2 * minChunkSize)
35-
ParallelApply(x, destination);
34+
var coreCount = AvailableCores();
35+
36+
if (coreCount >= minChunkCount && x.Length > minChunkCount * minChunkSize)
37+
ParallelApply(x, destination, coreCount);
3638
else
3739
Apply<T, TResult, TOperator>(x.Span, destination.Span);
3840

39-
static void ParallelApply(ReadOnlyMemory<T> source, Memory<TResult> destination)
41+
static void ParallelApply(ReadOnlyMemory<T> x, Memory<TResult> destination, int coreCount)
4042
{
41-
var size = source.Length;
42-
var chunkSize = int.Max(size / AvailableCores(), minChunkSize);
43+
var totalSize = x.Length;
44+
var chunkSize = int.Max(totalSize / coreCount, minChunkSize);
4345

44-
var actions = new Action[size / chunkSize];
46+
var actions = new Action[totalSize / chunkSize];
47+
var start = 0;
4548
for (var index = 0; index < actions.Length; index++)
4649
{
47-
var start = index * chunkSize;
48-
var length = (index == actions.Length - 1)
49-
? size - start
50+
var length = (index == actions.Length - 1)
51+
? totalSize - start
5052
: chunkSize;
5153

52-
var sourceSlice = source.Slice(start, length);
54+
var xSlice = x.Slice(start, length);
5355
var destinationSlice = destination.Slice(start, length);
54-
actions[index] = () => Apply<T, TResult, TOperator>(sourceSlice.Span, destinationSlice.Span);
56+
actions[index] = () => Apply<T, TResult, TOperator>(xSlice.Span, destinationSlice.Span);
57+
58+
start += length;
5559
}
5660
Parallel.Invoke(actions);
5761
}
@@ -98,12 +102,12 @@ public static void Apply<T, TResult, TOperator>(ReadOnlySpan<T> x, Span<TResult>
98102
var indexSource = 0;
99103

100104
// Check if hardware acceleration and Vector<T> support are available,
101-
// and if the length of the x is greater than the Vector<T>.Count.
105+
// and if the length of the x is greater than the length of Vector<T>.
102106
if (TOperator.IsVectorizable &&
103107
Vector.IsHardwareAccelerated &&
104108
Vector<T>.IsSupported &&
105109
Vector<TResult>.IsSupported &&
106-
x.Length >= Vector<T>.Count)
110+
x.Length > Vector<T>.Count)
107111
{
108112
// Cast the spans to vectors for hardware acceleration.
109113
var sourceVectors = MemoryMarshal.Cast<T, Vector<T>>(x);
@@ -215,14 +219,14 @@ public static void Apply2<T, TResult1, TResult2, TOperator1, TOperator2>(ReadOnl
215219
var indexSource = 0;
216220

217221
// Check if hardware acceleration and Vector<T> support are available,
218-
// and if the length of the x is greater than the Vector<T>.Count.
222+
// and if the length of the x is greater than the length of Vector<T>.
219223
if (TOperator1.IsVectorizable &&
220224
TOperator2.IsVectorizable &&
221225
Vector.IsHardwareAccelerated &&
222226
Vector<T>.IsSupported &&
223227
Vector<TResult1>.IsSupported &&
224228
Vector<TResult2>.IsSupported &&
225-
x.Length >= Vector<T>.Count)
229+
x.Length > Vector<T>.Count)
226230
{
227231
// Cast the spans to vectors for hardware acceleration.
228232
var sourceVectors = MemoryMarshal.Cast<T, Vector<T>>(x);

src/NetFabric.Numerics.Tensors/Tensor.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ namespace NetFabric.Numerics.Tensors;
1010
/// </remarks>
1111
public static partial class Tensor
1212
{
13-
const int minChunkSize = 100;
13+
const int minChunkSize = 1_000;
14+
const int minChunkCount = 4;
1415

1516
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1617
static int AvailableCores()

0 commit comments

Comments
 (0)