26
26
#include "ompi/mca/op/base/base.h"
27
27
#include "ompi/mca/op/aarch64/op_aarch64.h"
28
28
29
- #if defined(GENERATE_SVE_CODE )
29
+ /**
30
+ * Ensure exactly one of GENERATE_SVE_CODE or GENERATE_NEON_CODE is enabled.
31
+ * Enabling both is invalid as each builds a separate library. Disabling both
32
+ * would leave no implementation to compile.
33
+ */
34
+ #if GENERATE_SVE_CODE && GENERATE_NEON_CODE
35
+ #error "Never build NEON and SVE within the same library"
36
+ #elif GENERATE_SVE_CODE
30
37
# include <arm_sve.h>
31
38
#define OMPI_OP_TYPE_PREPEND sv
32
39
#define OMPI_OP_OP_PREPEND sv
33
40
#define APPEND _sve
34
- #elif defined( GENERATE_NEON_CODE )
41
+ #elif GENERATE_NEON_CODE
35
42
# include <arm_neon.h>
36
43
#define OMPI_OP_TYPE_PREPEND
37
44
#define OMPI_OP_OP_PREPEND v
38
45
#define APPEND _neon
39
46
#else
40
- #error we should not reach this
47
+ #error "Neither NEON nor SVE code generated. This should never happen"
41
48
#endif /* OMPI_MCA_OP_HAVE_SVE */
42
49
43
50
/*
51
58
*/
52
59
#define OP_CONCAT (A , B ) OP_CONCAT_NX(A, B)
53
60
54
- #if defined( GENERATE_SVE_CODE )
61
+ #if GENERATE_SVE_CODE
55
62
# define svcnt (X ) \
56
63
_Generic((X), \
57
64
int8_t: svcntb, \
@@ -101,7 +108,7 @@ _Generic((*(out)), \
101
108
uint64_t: __extension__({ switch ((how_much)) { DUMP2(out, in1, in2) }}), \
102
109
float32_t: __extension__({ switch ((how_much)) { DUMP4(out, in1, in2) }}), \
103
110
float64_t: __extension__({ switch ((how_much)) { DUMP2(out, in1, in2) }}))
104
- #endif /* defined( GENERATE_SVE_CODE) */
111
+ #endif /* GENERATE_SVE_CODE */
105
112
106
113
/*
107
114
* Since all the functions in this file are essentially identical, we
@@ -111,7 +118,7 @@ _Generic((*(out)), \
111
118
* This macro is for (out op in).
112
119
*
113
120
*/
114
- #if defined( GENERATE_NEON_CODE )
121
+ #if GENERATE_NEON_CODE
115
122
#define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
116
123
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, \
117
124
APPEND)(const void *_in, void *_out, int *count, \
@@ -135,7 +142,7 @@ _Generic((*(out)), \
135
142
neon_loop(left_over, out, out, in); \
136
143
} \
137
144
}
138
- #elif defined( GENERATE_SVE_CODE )
145
+ #elif GENERATE_SVE_CODE
139
146
#define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
140
147
OMPI_SVE_ATTR \
141
148
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, APPEND) \
@@ -169,10 +176,10 @@ _Generic((*(out)), \
169
176
OP_AARCH64_FUNC (max , u , 16 , 8 , uint , max )
170
177
OP_AARCH64_FUNC (max , s , 32 , 4 , int , max )
171
178
OP_AARCH64_FUNC (max , u , 32 , 4 , uint , max )
172
- #if defined( GENERATE_SVE_CODE )
179
+ #if GENERATE_SVE_CODE
173
180
OP_AARCH64_FUNC (max , s , 64 , 2 , int , max )
174
181
OP_AARCH64_FUNC (max , u , 64 , 2 , uint , max )
175
- #endif /* defined( GENERATE_SVE_CODE) */
182
+ #endif /* GENERATE_SVE_CODE */
176
183
177
184
OP_AARCH64_FUNC (max , f , 32 , 4 , float , max )
178
185
OP_AARCH64_FUNC (max , f , 64 , 2 , float , max )
@@ -188,10 +195,10 @@ _Generic((*(out)), \
188
195
OP_AARCH64_FUNC (min , u , 16 , 8 , uint , min )
189
196
OP_AARCH64_FUNC (min , s , 32 , 4 , int , min )
190
197
OP_AARCH64_FUNC (min , u , 32 , 4 , uint , min )
191
- #if defined( GENERATE_SVE_CODE )
198
+ #if GENERATE_SVE_CODE
192
199
OP_AARCH64_FUNC (min , s , 64 , 2 , int , min )
193
200
OP_AARCH64_FUNC (min , u , 64 , 2 , uint , min )
194
- #endif /* defined( GENERATE_SVE_CODE) */
201
+ #endif /* GENERATE_SVE_CODE */
195
202
196
203
OP_AARCH64_FUNC (min , f , 32 , 4 , float , min )
197
204
OP_AARCH64_FUNC (min , f , 64 , 2 , float , min )
@@ -223,10 +230,10 @@ _Generic((*(out)), \
223
230
OP_AARCH64_FUNC (prod , u , 16 , 8 , uint , mul )
224
231
OP_AARCH64_FUNC (prod , s , 32 , 4 , int , mul )
225
232
OP_AARCH64_FUNC (prod , u , 32 , 4 , uint , mul )
226
- #if defined( GENERATE_SVE_CODE )
233
+ #if GENERATE_SVE_CODE
227
234
OP_AARCH64_FUNC (prod , s , 64 , 2 , int , mul )
228
235
OP_AARCH64_FUNC (prod , u , 64 , 2 , uint , mul )
229
- #endif /* defined( GENERATE_SVE_CODE) */
236
+ #endif /* GENERATE_SVE_CODE */
230
237
231
238
OP_AARCH64_FUNC (prod , f , 32 , 4 , float , mul )
232
239
OP_AARCH64_FUNC (prod , f , 64 , 2 , float , mul )
@@ -277,7 +284,7 @@ _Generic((*(out)), \
277
284
* This is a three buffer (2 input and 1 output) version of the reduction
278
285
* routines, needed for some optimizations.
279
286
*/
280
- #if defined( GENERATE_NEON_CODE )
287
+ #if GENERATE_NEON_CODE
281
288
#define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
282
289
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
283
290
(const void *_in1, const void *_in2, void *_out, int *count, \
@@ -302,7 +309,7 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
302
309
neon_loop(left_over, out, in1, in2); \
303
310
} \
304
311
}
305
- #elif defined( GENERATE_SVE_CODE )
312
+ #elif GENERATE_SVE_CODE
306
313
#define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
307
314
OMPI_SVE_ATTR \
308
315
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
@@ -324,7 +331,7 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
324
331
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
325
332
} \
326
333
}
327
- #endif /* defined( GENERATE_SVE_CODE) */
334
+ #endif /* GENERATE_SVE_CODE */
328
335
329
336
/*************************************************************************
330
337
* Max
@@ -337,10 +344,10 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
337
344
OP_AARCH64_FUNC_3BUFF (max , u , 16 , 8 , uint , max )
338
345
OP_AARCH64_FUNC_3BUFF (max , s , 32 , 4 , int , max )
339
346
OP_AARCH64_FUNC_3BUFF (max , u , 32 , 4 , uint , max )
340
- #if defined( GENERATE_SVE_CODE )
347
+ #if GENERATE_SVE_CODE
341
348
OP_AARCH64_FUNC_3BUFF (max , s , 64 , 2 , int , max )
342
349
OP_AARCH64_FUNC_3BUFF (max , u , 64 , 2 , uint , max )
343
- #endif /* defined( GENERATE_SVE_CODE) */
350
+ #endif /* GENERATE_SVE_CODE */
344
351
345
352
OP_AARCH64_FUNC_3BUFF (max , f , 32 , 4 , float , max )
346
353
OP_AARCH64_FUNC_3BUFF (max , f , 64 , 2 , float , max )
@@ -356,10 +363,10 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
356
363
OP_AARCH64_FUNC_3BUFF (min , u , 16 , 8 , uint , min )
357
364
OP_AARCH64_FUNC_3BUFF (min , s , 32 , 4 , int , min )
358
365
OP_AARCH64_FUNC_3BUFF (min , u , 32 , 4 , uint , min )
359
- #if defined( GENERATE_SVE_CODE )
366
+ #if GENERATE_SVE_CODE
360
367
OP_AARCH64_FUNC_3BUFF (min , s , 64 , 2 , int , min )
361
368
OP_AARCH64_FUNC_3BUFF (min , u , 64 , 2 , uint , min )
362
- #endif /* defined( GENERATE_SVE_CODE) */
369
+ #endif /* GENERATE_SVE_CODE */
363
370
364
371
OP_AARCH64_FUNC_3BUFF (min , f , 32 , 4 , float , min )
365
372
OP_AARCH64_FUNC_3BUFF (min , f , 64 , 2 , float , min )
@@ -392,10 +399,10 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
392
399
OP_AARCH64_FUNC_3BUFF (prod , u , 16 , 8 , uint , mul )
393
400
OP_AARCH64_FUNC_3BUFF (prod , s , 32 , 4 , int , mul )
394
401
OP_AARCH64_FUNC_3BUFF (prod , u , 32 , 4 , uint , mul )
395
- #if defined( GENERATE_SVE_CODE )
402
+ #if GENERATE_SVE_CODE
396
403
OP_AARCH64_FUNC_3BUFF (prod , s , 64 , 2 , int , mul )
397
404
OP_AARCH64_FUNC_3BUFF (prod , u , 64 , 2 , uint , mul )
398
- #endif /* defined( GENERATE_SVE_CODE) */
405
+ #endif /* GENERATE_SVE_CODE */
399
406
400
407
OP_AARCH64_FUNC_3BUFF (prod , f , 32 , 4 , float , mul )
401
408
OP_AARCH64_FUNC_3BUFF (prod , f , 64 , 2 , float , mul )
@@ -482,17 +489,17 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
482
489
/* Corresponds to MPI_MAX */
483
490
[OMPI_OP_BASE_FORTRAN_MAX ] = {
484
491
C_INTEGER_BASE (max , 2buff ),
485
- #if defined( GENERATE_SVE_CODE )
492
+ #if GENERATE_SVE_CODE
486
493
C_INTEGER_EX (max , 2buff ),
487
- #endif /* defined( GENERATE_SVE_CODE) */
494
+ #endif /* GENERATE_SVE_CODE */
488
495
FLOATING_POINT (max , 2buff ),
489
496
},
490
497
/* Corresponds to MPI_MIN */
491
498
[OMPI_OP_BASE_FORTRAN_MIN ] = {
492
499
C_INTEGER_BASE (min , 2buff ),
493
- #if defined( GENERATE_SVE_CODE )
500
+ #if GENERATE_SVE_CODE
494
501
C_INTEGER_EX (min , 2buff ),
495
- #endif /* defined( GENERATE_SVE_CODE) */
502
+ #endif /* GENERATE_SVE_CODE */
496
503
FLOATING_POINT (min , 2buff ),
497
504
},
498
505
/* Corresponds to MPI_SUM */
@@ -504,9 +511,9 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
504
511
/* Corresponds to MPI_PROD */
505
512
[OMPI_OP_BASE_FORTRAN_PROD ] = {
506
513
C_INTEGER_BASE (prod , 2buff ),
507
- #if defined( GENERATE_SVE_CODE )
514
+ #if GENERATE_SVE_CODE
508
515
C_INTEGER_EX (prod , 2buff ),
509
- #endif /* defined( GENERATE_SVE_CODE) */
516
+ #endif /* GENERATE_SVE_CODE */
510
517
FLOATING_POINT (prod , 2buff ),
511
518
},
512
519
/* Corresponds to MPI_LAND */
@@ -558,17 +565,17 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
558
565
/* Corresponds to MPI_MAX */
559
566
[OMPI_OP_BASE_FORTRAN_MAX ] = {
560
567
C_INTEGER_BASE (max , 3buff ),
561
- #if defined( GENERATE_SVE_CODE )
568
+ #if GENERATE_SVE_CODE
562
569
C_INTEGER_EX (max , 3buff ),
563
- #endif /* defined( GENERATE_SVE_CODE) */
570
+ #endif /* GENERATE_SVE_CODE */
564
571
FLOATING_POINT (max , 3buff ),
565
572
},
566
573
/* Corresponds to MPI_MIN */
567
574
[OMPI_OP_BASE_FORTRAN_MIN ] = {
568
575
C_INTEGER_BASE (min , 3buff ),
569
- #if defined( GENERATE_SVE_CODE )
576
+ #if GENERATE_SVE_CODE
570
577
C_INTEGER_EX (min , 3buff ),
571
- #endif /* defined( GENERATE_SVE_CODE) */
578
+ #endif /* GENERATE_SVE_CODE */
572
579
FLOATING_POINT (min , 3buff ),
573
580
},
574
581
/* Corresponds to MPI_SUM */
@@ -580,9 +587,9 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
580
587
/* Corresponds to MPI_PROD */
581
588
[OMPI_OP_BASE_FORTRAN_PROD ] = {
582
589
C_INTEGER_BASE (prod , 3buff ),
583
- #if defined( GENERATE_SVE_CODE )
590
+ #if GENERATE_SVE_CODE
584
591
C_INTEGER_EX (prod , 3buff ),
585
- #endif /* defined( GENERATE_SVE_CODE) */
592
+ #endif /* GENERATE_SVE_CODE */
586
593
FLOATING_POINT (prod , 3buff ),
587
594
},
588
595
/* Corresponds to MPI_LAND */
0 commit comments