forked from Xilinx/mlir-air
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAIR.td
More file actions
955 lines (881 loc) · 39.5 KB
/
AIR.td
File metadata and controls
955 lines (881 loc) · 39.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
//===- AIR.td ----------------------------------------------*- tablegen -*-===//
//
// Copyright (C) 2020-2022, Xilinx Inc. All rights reserved.
// Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved.
// SPDX-License-Identifier: MIT
//
//===----------------------------------------------------------------------===//
#ifndef AIR_OPS
#define AIR_OPS
include "mlir/IR/OpBase.td"
include "air/Dialect/AIR/AIROpBase.td"
include "mlir/IR/SymbolInterfaces.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/TilingInterface.td"
class air_Op<string mnemonic, list<Trait> traits = []> :
Op<air_Dialect, mnemonic, traits>;
def air_UniverseAllocOp : air_Op<"universe.alloc", [Pure]>,
Arguments<(ins Index:$capacity)>,
Results<(outs air_Universe:$universe)> {
let summary = "Allocate a universe of devices";
let description = [{
Creates an `!air.universe` value representing a bounded pool of
`capacity` devices or hosts. The universe value is consumed by
`air.rank` to constrain the physical pool from which rank instances
are scheduled.
}];
let assemblyFormat = "`(` $capacity `)` attr-dict";
}
def air_RankOp : air_Op<"rank", [air_AsyncOpInterface,
air_HierarchyInterface,
AttrSizedOperandSegments,
IsolatedFromAbove,
AffineScope,
DeclareOpInterfaceMethods<RegionBranchOpInterface, ["getEntrySuccessorOperands", "getSuccessorInputs", "getRegionInvocationBounds"]>,
SingleBlockImplicitTerminator<"RankTerminatorOp">]>,
Arguments<(ins OptionalAttr<SymbolNameAttr>:$sym_name,
Variadic<air_AsyncToken>:$async_dependencies,
Optional<air_Universe>:$universe,
Variadic<Index>:$sizes,
Variadic<AnyType>:$rank_operands)>,
Results<(outs Optional<air_AsyncToken>:$async_token)> {
let summary = "Multi-device rank";
let description = [{
Represents a communicating world of rank instances, where each instance
corresponds to a complete GPU device or a CPU host process. `air.rank`
is the outermost hierarchy level, sitting above `air.launch`.
The operation defines an N-dimensional iteration space. Each point is a
rank instance. The body is `IsolatedFromAbove`; values are passed via
explicit kernel operands.
An optional `universe` operand of type `!air.universe` constrains the
physical pool from which rank instances are scheduled.
}];
let regions = (region SizedRegion<1>:$body);
let skipDefaultBuilders = 1;
let builders = [
OpBuilder<(ins "ValueRange":$sizes,"ValueRange":$rankOperands)>,
OpBuilder<(ins "ValueRange":$async_dependencies,
"ValueRange":$sizes,"ValueRange":$rankOperands,
CArg<"bool", "false">:$is_async,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
OpBuilder<(ins "ValueRange":$async_dependencies,
"Value":$universe,
"ValueRange":$sizes,"ValueRange":$rankOperands,
CArg<"bool", "false">:$is_async,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
];
let hasCustomAssemblyFormat = 1;
let extraClassDeclaration = [{
ArrayRef<BlockArgument> getIds();
ArrayRef<BlockArgument> getSize();
OperandRange getSizeOperands();
unsigned getNumKernelOperands();
OperandRange getKernelOperands();
Value getKernelOperand(unsigned i);
ArrayRef<BlockArgument> getKernelArguments();
BlockArgument getKernelArgument(unsigned i);
unsigned getNumDims();
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
/// Return the kernel argument that corresponds to the given operand.
/// Return an "empty" block argument if the given value is not a kernel
/// operand.
BlockArgument getTiedKernelArgument(Value Oper) {
auto kernelOperands = getKernelOperands();
auto it = llvm::find(kernelOperands, Oper);
if (it == kernelOperands.end())
return {};
return getKernelArgument(std::distance(kernelOperands.begin(), it));
}
/// Return the operand that corresponds to the given kernel argument.
/// Return "nullptr" if the given block argument is not a kernel argument
/// of this op.
Value getTiedKernelOperand(BlockArgument bbArg) {
auto args = getKernelArguments();
auto it = llvm::find(args, bbArg);
if (it == args.end())
return {};
return getKernelOperand(std::distance(args.begin(), it));
}
/// Append the given values to kernel operands.
void appendKernelOperands(ValueRange operands){
getRankOperandsMutable().append(operands);
for (auto oper : operands){
getBody().addArgument(oper.getType(), getLoc());
}
}
}];
let hasCanonicalizer = 1;
let hasVerifier = 1;
}
def air_RankTerminatorOp : air_Op<"rank_terminator", [HasParent<"RankOp">,
Pure,
Terminator]>,
Arguments<(ins)>, Results<(outs)> {
let summary = "Terminator for `air.rank`.";
let description = [{
A terminator operation for the body of `air.rank` operations.
`air.rank` operations are not expected to return any value so the
terminator takes no operands.
}];
let assemblyFormat = "attr-dict";
}
def air_LaunchOp : air_Op<"launch", [air_AsyncOpInterface,
air_HierarchyInterface,
AttrSizedOperandSegments,
IsolatedFromAbove,
AffineScope,
DeclareOpInterfaceMethods<RegionBranchOpInterface, ["getEntrySuccessorOperands", "getSuccessorInputs", "getRegionInvocationBounds"]>,
SingleBlockImplicitTerminator<"LaunchTerminatorOp">]>,
Arguments<(ins OptionalAttr<SymbolNameAttr>:$sym_name,
Variadic<air_AsyncToken>:$async_dependencies,
Variadic<Index>:$sizes,
Variadic<AnyType>:$launch_operands)>,
Results<(outs Optional<air_AsyncToken>:$async_token)> {
let summary = "Launch";
let description = [{
Launch
}];
let regions = (region SizedRegion<1>:$body);
let skipDefaultBuilders = 1;
let builders = [
OpBuilder<(ins "ValueRange":$sizes,"ValueRange":$launch_operands)>,
OpBuilder<(ins "ValueRange":$async_dependencies,
"ValueRange":$sizes,"ValueRange":$launch_operands,
CArg<"bool", "false">:$is_async,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
];
let hasCustomAssemblyFormat = 1;
let extraClassDeclaration = [{
ArrayRef<BlockArgument> getIds();
ArrayRef<BlockArgument> getSize();
OperandRange getSizeOperands();
unsigned getNumKernelOperands();
OperandRange getKernelOperands();
Value getKernelOperand(unsigned i);
ArrayRef<BlockArgument> getKernelArguments();
BlockArgument getKernelArgument(unsigned i);
unsigned getNumDims();
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
/// Return the kernel argument that corresponds to the given operand.
/// Return an "empty" block argument if the given value is not a kernel
/// operand.
BlockArgument getTiedKernelArgument(Value Oper) {
auto kernelOperands = getKernelOperands();
auto it = llvm::find(kernelOperands, Oper);
if (it == kernelOperands.end())
return {};
return getKernelArgument(std::distance(kernelOperands.begin(), it));
}
/// Return the operand that corresponds to the given kernel argument.
/// Return "nullptr" if the given block argument is not a kernel argument
/// of this op.
Value getTiedKernelOperand(BlockArgument bbArg) {
auto args = getKernelArguments();
auto it = llvm::find(args, bbArg);
if (it == args.end())
return {};
return getKernelOperand(std::distance(args.begin(), it));
}
/// Append the given values to kernel operands.
void appendKernelOperands(ValueRange operands){
getLaunchOperandsMutable().append(operands);
for (auto oper : operands){
getBody().addArgument(oper.getType(), getLoc());
}
}
}];
let hasCanonicalizer = 1;
}
def air_LaunchTerminatorOp : air_Op<"launch_terminator", [HasParent<"LaunchOp">,
Pure,
Terminator]>,
Arguments<(ins)>, Results<(outs)> {
let summary = "Terminator for `air.launch`.";
let description = [{
A terminator operation for the body of `air.launch` operations.
`air.launch` operations are not expected to return any value so the
terminator takes no operands.
}];
let assemblyFormat = "attr-dict";
}
def air_SegmentOp : air_Op<"segment", [air_AsyncOpInterface,
air_HierarchyInterface,
AttrSizedOperandSegments,
IsolatedFromAbove,
AffineScope,
DeclareOpInterfaceMethods<RegionBranchOpInterface, ["getEntrySuccessorOperands", "getSuccessorInputs", "getRegionInvocationBounds"]>,
SingleBlockImplicitTerminator<"SegmentTerminatorOp">]>,
Arguments<(ins OptionalAttr<SymbolNameAttr>:$sym_name,
Variadic<air_AsyncToken>:$async_dependencies,
Variadic<Index>:$sizes,
Variadic<AnyType>:$segment_operands)>,
Results<(outs Optional<air_AsyncToken>:$async_token)> {
let summary = "Segment";
let description = [{
Segment
}];
let regions = (region SizedRegion<1>:$body);
let skipDefaultBuilders = 1;
let builders = [
OpBuilder<(ins "ValueRange":$sizes,"ValueRange":$segment_operands)>,
OpBuilder<(ins "ValueRange":$async_dependencies,
"ValueRange":$sizes,"ValueRange":$segment_operands,
CArg<"bool", "false">:$is_async,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
];
let hasCustomAssemblyFormat = 1;
let extraClassDeclaration = [{
ArrayRef<BlockArgument> getIds();
ArrayRef<BlockArgument> getSize();
OperandRange getSizeOperands();
unsigned getNumKernelOperands();
OperandRange getKernelOperands();
Value getKernelOperand(unsigned i);
ArrayRef<BlockArgument> getKernelArguments();
BlockArgument getKernelArgument(unsigned i);
unsigned getNumDims();
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
static StringRef getColOffsetAttrName() { return "x_loc"; }
static StringRef getRowOffsetAttrName() { return "y_loc"; }
std::optional<uint64_t> getColOffset() {
auto name = getColOffsetAttrName();
if (auto a = (*this)->getAttrOfType<IntegerAttr>(name)) {
return a.getInt();
}
return std::optional<uint64_t>();
}
std::optional<uint64_t> getRowOffset() {
auto name = getRowOffsetAttrName();
if (auto a = (*this)->getAttrOfType<IntegerAttr>(name)) {
return a.getInt();
}
return std::optional<uint64_t>();
}
static StringRef getNumColsAttrName() { return "x_size"; }
static StringRef getNumRowsAttrName() { return "y_size"; }
std::optional<uint64_t> getNumCols() {
auto name = getNumColsAttrName();
if (auto a = (*this)->getAttrOfType<IntegerAttr>(name)) {
return a.getInt();
}
return std::optional<uint64_t>();
}
std::optional<uint64_t> getNumRows() {
auto name = getNumRowsAttrName();
if (auto a = (*this)->getAttrOfType<IntegerAttr>(name)) {
return a.getInt();
}
return std::optional<uint64_t>();
}
/// Return the kernel argument that corresponds to the given operand.
/// Return an "empty" block argument if the given value is not a kernel
/// operand.
BlockArgument getTiedKernelArgument(Value Oper) {
auto kernelOperands = getKernelOperands();
auto it = llvm::find(kernelOperands, Oper);
if (it == kernelOperands.end())
return {};
return getKernelArgument(std::distance(kernelOperands.begin(), it));
}
/// Return the operand that corresponds to the given kernel argument.
/// Return "nullptr" if the given block argument is not a kernel argument
/// of this op.
Value getTiedKernelOperand(BlockArgument bbArg) {
auto args = getKernelArguments();
auto it = llvm::find(args, bbArg);
if (it == args.end())
return {};
return getKernelOperand(std::distance(args.begin(), it));
}
/// Append the given values to kernel operands.
void appendKernelOperands(ValueRange operands){
getSegmentOperandsMutable().append(operands);
for (auto oper : operands){
getBody().addArgument(oper.getType(), getLoc());
}
}
}];
let hasCanonicalizer = 1;
let hasVerifier = 1;
}
def air_SegmentTerminatorOp : air_Op<"segment_terminator", [HasParent<"SegmentOp">,
Pure, Terminator]>,
Arguments<(ins)>, Results<(outs)> {
let summary = "Terminator for air segment regions.";
let description = [{
A terminator operation for the body of `air.segment` operations.
`air.segment` operations are not expected to return any value so the
terminator takes no operands.
}];
let assemblyFormat = "attr-dict";
}
def air_HerdOp : air_Op<"herd", [air_AsyncOpInterface,
air_HierarchyInterface,
AttrSizedOperandSegments,
IsolatedFromAbove,
AffineScope,
DeclareOpInterfaceMethods<RegionBranchOpInterface, ["getEntrySuccessorOperands", "getSuccessorInputs", "getRegionInvocationBounds"]>,
SingleBlockImplicitTerminator<"HerdTerminatorOp">]>,
Arguments<(ins OptionalAttr<SymbolNameAttr>:$sym_name,
OptionalAttr<StrAttr>:$link_with,
Variadic<air_AsyncToken>:$async_dependencies,
Variadic<Index>:$sizes,
Variadic<AnyType>:$herd_operands)>,
Results<(outs Optional<air_AsyncToken>:$async_token)> {
let summary = "Herd";
let description = [{
Define and run a 1D or 2D array of tiles as an AIR Herd.
}];
let regions = (region SizedRegion<1>:$body);
let skipDefaultBuilders = 1;
let builders = [
OpBuilder<(ins "ValueRange":$sizes,"ValueRange":$herd_operands)>,
OpBuilder<(ins "ValueRange":$async_dependencies,
"ValueRange":$sizes,
"ValueRange":$herd_operands,
CArg<"bool", "false">:$is_async,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
];
let hasCustomAssemblyFormat = 1;
let extraClassDeclaration = [{
ArrayRef<BlockArgument> getIds();
ArrayRef<BlockArgument> getSize();
OperandRange getSizeOperands();
unsigned getNumKernelOperands();
OperandRange getKernelOperands();
Value getKernelOperand(unsigned i);
ArrayRef<BlockArgument> getKernelArguments();
BlockArgument getKernelArgument(unsigned i);
unsigned getNumDims();
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
static StringRef getColOffsetAttrName() { return "x_loc"; }
static StringRef getRowOffsetAttrName() { return "y_loc"; }
std::optional<uint64_t> getColOffset() {
auto name = getColOffsetAttrName();
if (auto a = (*this)->getAttrOfType<IntegerAttr>(name)) {
return a.getInt();
}
return std::optional<uint64_t>();
}
std::optional<uint64_t> getRowOffset() {
auto name = getRowOffsetAttrName();
if (auto a = (*this)->getAttrOfType<IntegerAttr>(name)) {
return a.getInt();
}
return std::optional<uint64_t>();
}
uint64_t getNumCols();
uint64_t getNumRows();
/// Return the kernel argument that corresponds to the given operand.
/// Return an "empty" block argument if the given value is not a kernel
/// operand.
BlockArgument getTiedKernelArgument(Value Oper) {
auto kernelOperands = getKernelOperands();
auto it = llvm::find(kernelOperands, Oper);
if (it == kernelOperands.end())
return {};
return getKernelArgument(std::distance(kernelOperands.begin(), it));
}
/// Return the operand that corresponds to the given kernel argument.
/// Return "nullptr" if the given block argument is not a kernel argument
/// of this op.
Value getTiedKernelOperand(BlockArgument bbArg) {
auto args = getKernelArguments();
auto it = llvm::find(args, bbArg);
if (it == args.end())
return {};
return getKernelOperand(std::distance(args.begin(), it));
}
/// Append the given values to kernel operands.
void appendKernelOperands(ValueRange operands){
getHerdOperandsMutable().append(operands);
for (auto oper : operands){
getBody().addArgument(oper.getType(), getLoc());
}
}
}];
let hasCanonicalizer = 1;
let hasVerifier = 1;
}
def air_HerdTerminatorOp : air_Op<"herd_terminator", [HasParent<"HerdOp">,
Pure, Terminator]>,
Arguments<(ins)>, Results<(outs)> {
let summary = "Terminator for air herd regions.";
let description = [{
A terminator operation for the body of `air.herd` operations.
`air.herd` operations are not expected to return any value so the
terminator takes no operands.
}];
let assemblyFormat = "attr-dict";
}
def air_DmaMemcpyNdOp: air_Op<"dma_memcpy_nd",
[air_AsyncOpInterface,
air_MemcpyInterface,
AttrSizedOperandSegments]> {
let summary = "dma operator";
let arguments = (
ins Variadic<air_AsyncToken>:$async_dependencies,
Arg<AnyRankedOrUnrankedMemRef, "destination memref",
[MemWriteAt<0, FullEffect>]>:$dst,
Variadic<Index>:$dst_offsets,
Variadic<Index>:$dst_sizes,
Variadic<Index>:$dst_strides,
Arg<AnyRankedOrUnrankedMemRef, "source memref",
[MemReadAt<0, FullEffect>]>:$src,
Variadic<Index>:$src_offsets,
Variadic<Index>:$src_sizes,
Variadic<Index>:$src_strides,
OptionalAttr<DenseI32ArrayAttr>:$pad_before,
OptionalAttr<DenseI32ArrayAttr>:$pad_after,
OptionalAttr<I64Attr>:$src_rank,
OptionalAttr<I64Attr>:$dst_rank
);
let results = (outs Optional<air_AsyncToken>:$async_token);
let assemblyFormat = [{
custom<AsyncDependencies>(type($async_token), $async_dependencies)
`(` $dst `[` ($dst_offsets^)? `]``[` ($dst_sizes^)? `]``[` ($dst_strides^)? `]` `,`
$src `[` ($src_offsets^)? `]``[` ($src_sizes^)? `]``[` ($src_strides^)? `]` `)` attr-dict `:`
`(` type($dst) `,` type($src) `)`
}];
let description = [{
N-dimensional strided bulk copy between two memrefs.
Optional `src_rank` / `dst_rank` integer attributes name a peer rank in the
enclosing `air.rank` scope. When present, the corresponding memref is
interpreted as living on rank R's symmetric heap rather than on the local
process. These attributes are only valid for `air.symmetric`-tagged memref
allocations and require an enclosing `air.rank`. Lowering for these
attributes will be added by a future GPU pass (planned: `air-cross-rank-
dma-to-mgpu`); this PR introduces only the IR surface and verifier rules.
}];
let extraClassDeclaration = [{
Value getSrcMemref() { return getSrc(); }
Value getDstMemref() { return getDst(); }
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
bool hasPadding() {
return getPadBefore().has_value();
}
bool hasCrossRank() {
return getSrcRank().has_value() || getDstRank().has_value();
}
}];
let builders = [
// Backward-compatible builder: defaults src_rank/dst_rank to absent.
OpBuilder<(ins "::mlir::TypeRange":$resultTypes,
"::mlir::ValueRange":$async_dependencies,
"::mlir::Value":$dst,
"::mlir::ValueRange":$dst_offsets,
"::mlir::ValueRange":$dst_sizes,
"::mlir::ValueRange":$dst_strides,
"::mlir::Value":$src,
"::mlir::ValueRange":$src_offsets,
"::mlir::ValueRange":$src_sizes,
"::mlir::ValueRange":$src_strides,
"::mlir::DenseI32ArrayAttr":$pad_before,
"::mlir::DenseI32ArrayAttr":$pad_after), [{
build($_builder, $_state, resultTypes, async_dependencies, dst,
dst_offsets, dst_sizes, dst_strides, src,
src_offsets, src_sizes, src_strides, pad_before, pad_after,
/*src_rank=*/IntegerAttr(),
/*dst_rank=*/IntegerAttr());
}]>
];
let hasCanonicalizer = 1;
let hasVerifier = 1;
}
def air_WaitAllOp: air_Op<"wait_all", [air_AsyncOpInterface]> {
let arguments = (ins Variadic<air_AsyncToken>:$async_dependencies);
let results = (
outs Optional<air_AsyncToken>:$async_token
);
let summary = "wait for all operator";
let description = [{
Wait for all async tokens before preceding.
}];
let assemblyFormat = [{
custom<AsyncDependencies>(type($async_token), $async_dependencies) attr-dict
}];
let extraClassDeclaration = [{
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
}];
let hasCanonicalizer = 1;
}
// AIR channel
def air_ChannelOp : air_Op<"channel", [Symbol]>,
Arguments<(ins SymbolNameAttr:$sym_name,
DefaultValuedAttr<I64ArrayAttr, "{}">:$size,
DefaultValuedAttr<StrAttr, "\"npu_dma_stream\"">:$channel_type)> {
let assemblyFormat = [{
$sym_name $size attr-dict
}];
let summary = "Channel for data movement.";
let description = [{
Operation to represent a communication channel as a point-to-point connection between two memrefs.
The array following the channel name symbol represents the channel's dimensional sizes. Default
size, with empty size array, is 1. The data movement mechanism that the channel uses is controlled
by the `channel_type` attribute.
### Channel Types
The `channel_type` attribute is a string that determines the mechanism used for data movement.
Values are namespaced by backend: NPU (AIE) channels use the `npu_` prefix; GPU channels use
the `gpu_` prefix.
NPU (AIE) channel types:
- **"npu_dma_stream"** (default):
Use DMA engines to send and receive data, with routing performed over a streaming interconnect.
- **"npu_dma_packet"**:
Use DMA engines to send and receive data, with routing performed over a packet-switched network.
- **"npu_cascade"**:
Use processor cores to send and receive data via cascade connections between adjacent tiles.
- **"npu_mmio"**:
Use host-side MMIO writes (e.g. `aiex.npu.blockwrite`) issued from the runtime
sequence to deliver a constant payload directly into a tile-local L1 buffer.
No DMA channel, no shim allocation, no flow is reserved.
Verifier-enforced constraints on the put/get sites:
* the `put` source memref must live in L3 (`memory_space=0`);
* the `get` destination memref must live in L1 (`memory_space=2`).
The lowering further requires the put source to be a constant
`memref.get_global`. The consumer-side `get` lowers to a no-op
because the L1 buffer is already populated when the core begins executing.
GPU channel types:
- **"gpu_symmetric_heap"**:
Cross-GPU messaging through the symmetric heap runtime
(`runtime_lib/airgpu/symmetric_heap.{h,cpp}`). The channel must be enclosed
by an `air.rank` op; the put/get sites use rank indices to address peer
heaps. Lowering will be added by a future GPU pass (planned:
`air-gpu-channel-to-mgpu`) which expands put/get to peer-mapped
`mgpuMemcpy` calls plus a barrier; this PR introduces only the IR
surface and verifier rules.
### Broadcasting
If a channel broadcasts to multiple destinations, the optional `broadcast_shape` attribute
annotates the output sizes after broadcasting. Broadcasting follows NumPy's broadcasting rules.
Example:
```mlir
// An array of 4 x 4 streaming DMA channels (NPU)
air.channel @channel_0 [4, 4] {channel_type = "npu_dma_stream"}
// A streaming DMA channel broadcasting to 4 destinations (NPU)
air.channel @channel_1 [1, 1] {broadcast_shape = [1, 4], channel_type = "npu_dma_stream"}
// An array of 1 x 4 streaming DMA channels broadcasting to 4 x 4 destinations (NPU).
// Broadcasting follows NumPy's rules.
air.channel @channel_2 [1, 4] {broadcast_shape = [4, 4], channel_type = "npu_dma_stream"}
// A packet-switched DMA channel (NPU)
air.channel @channel_3 [] {channel_type = "npu_dma_packet"}
// A cascade channel using core-to-core cascade connections (NPU)
air.channel @channel_4 [] {channel_type = "npu_cascade"}
// An MMIO channel: the put writes a constant from host into L1 of each
// get's destination tile via runtime-sequence blockwrites (NPU)
air.channel @channel_5 [] {channel_type = "npu_mmio"}
// A cross-GPU channel through the symmetric heap (GPU). Must appear inside
// an air.rank scope; the indices on put/get encode the peer rank.
air.channel @channel_6 [] {channel_type = "gpu_symmetric_heap"}
```
}];
let extraClassDeclaration = [{
bool isBroadcast() {
if(auto attr = getOperation()->getAttrOfType<ArrayAttr>("broadcast_shape"))
return true;
else
return false;
}
int getBroadcastDimension();
ArrayAttr getBroadcastShape() {
return getOperation()->getAttrOfType<ArrayAttr>("broadcast_shape");
}
int getBroadcastNum() {
int broadcastNum = 1;
if (isBroadcast())
for (auto bShape : getOperation()->getAttrOfType<ArrayAttr>("broadcast_shape")) {
auto attr = llvm::dyn_cast<IntegerAttr>(bShape).getInt();
broadcastNum *= attr;
}
return broadcastNum;
}
int getBufferResources() {
if(auto attr = getOperation()->getAttrOfType<IntegerAttr>("buffer_resources"))
return llvm::dyn_cast<IntegerAttr>(attr).getInt();
else
return 1;
}
int getBundleSize() {
int size = 1;
for (auto i : getSize())
size *= llvm::dyn_cast<IntegerAttr>(i).getInt();
return size;
}
}];
let hasVerifier = 1;
let hasCanonicalizer = 1;
}
def air_ChannelPutOp : air_Op<"channel.put", [air_AsyncOpInterface,
air_MemcpyInterface,
air_ChannelInterface,
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<TilingInterface,
["getIterationDomain",
"getLoopIteratorTypes",
"getResultTilePosition",
"getTiledImplementation"
]>]>,
Arguments<(ins Variadic<air_AsyncToken>:$async_dependencies,
FlatSymbolRefAttr:$chan_name,
Variadic<Index>:$indices,
AnyRankedOrUnrankedMemRef:$src,
Variadic<Index>:$src_offsets,
Variadic<Index>:$src_sizes,
Variadic<Index>:$src_strides,
OptionalAttr<DenseI32ArrayAttr>:$pad_before,
OptionalAttr<DenseI32ArrayAttr>:$pad_after)>,
Results<(outs Optional<air_AsyncToken>:$async_token)> {
let summary = "Push for air channels.";
let description = [{
The `air.channel.put` operation represents a **push** (send) operation that copies data from a
source memref into a specified channel.
This operation models one-way data movement into a channel endpoint, enabling asynchronous
communication between producer and consumer operations. It is typically paired with
`air.channel.get` operations on the receiving side.
### Semantics
- The source data is specified by the `src` memref, along with its associated
`src_offsets`, `src_sizes`, and `src_strides` which describe the subview being transferred.
- The channel being targeted is identified by the symbol referenced by `chan_name`.
- The channel must have been declared earlier via an `air.channel` operation.
- The operation may be asynchronous: if an async token is produced, it can be used to
synchronize with subsequent dependent operations.
- The specific channel it operates on, when `chan_name` references an array of channels, is
identified by `indices`.
- Optionally, `pad_before` and `pad_after` specify constant zero-padding to apply per
dimension during the DMA transfer. This maps to hardware DMA buffer descriptor padding
on AIE memtile DMAs.
### Interfaces
- Implements `air_AsyncOpInterface`, allowing it to participate in async dependency chains.
- Implements `air_MemcpyInterface`, enabling it to behave like a DMA/memcpy operation.
- Implements `air_ChannelInterface`, allowing inspection of channel properties.
### Example
```mlir
// Send a 4x4 tile from %src into channel @chan_0
air.channel.put @chan_0(%src[%c0, %c0][%c4, %c4][%c1, %c1]) : (memref<16x16xf32>)
// Asynchronous put with dependency on %t0
%t1 = air.channel.put async [%t0] @chan_1(%src[%c8, %c0][%c4, %c4][%c1, %c1]) : (memref<16x16xf32>)
// Put with padding: read 13 elements, pad 2 before and 1 after
air.channel.put @chan_2(%src[%c0] [13] [%c1])
{pad_before = array<i32: 2>, pad_after = array<i32: 1>} : (memref<16xi32>)
```
}];
let assemblyFormat = [{
custom<AsyncDependencies>(type($async_token), $async_dependencies)
$chan_name `[` ($indices^)? `]`
`(` $src `[` ($src_offsets^)? `]``[` ($src_sizes^)? `]``[` ($src_strides^)? `]` `)` attr-dict `:`
`(` type($src) `)`
}];
let extraClassDeclaration = [{
Value getMemref() { return getSrc(); }
Value getSrcMemref() { return getSrc(); }
Value getDstMemref() { return nullptr; }
OperandRange getOffsets() { return getSrcOffsets(); }
OperandRange getDstOffsets() { emitOpError("does not have DstOffset"); llvm_unreachable("unreachable"); }
OperandRange getSizes() { return getSrcSizes(); }
OperandRange getDstSizes() { emitOpError("does not have DstSizes"); llvm_unreachable("unreachable"); }
OperandRange getStrides() { return getSrcStrides(); }
OperandRange getDstStrides() { emitOpError("does not have DstStrides"); llvm_unreachable("unreachable"); }
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
bool hasPadding() {
return getPadBefore().has_value() || getPadAfter().has_value();
}
}];
let hasCanonicalizer = 1;
let hasVerifier = 1;
}
def air_ChannelGetOp : air_Op<"channel.get", [air_AsyncOpInterface,
air_MemcpyInterface,
air_ChannelInterface,
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<TilingInterface,
["getIterationDomain",
"getLoopIteratorTypes",
"getResultTilePosition",
"getTiledImplementation"
]>]>,
Arguments<(ins Variadic<air_AsyncToken>:$async_dependencies,
FlatSymbolRefAttr:$chan_name,
Variadic<Index>:$indices,
AnyRankedOrUnrankedMemRef:$dst,
Variadic<Index>:$dst_offsets,
Variadic<Index>:$dst_sizes,
Variadic<Index>:$dst_strides,
OptionalAttr<DenseI32ArrayAttr>:$pad_before,
OptionalAttr<DenseI32ArrayAttr>:$pad_after)>,
Results<(outs Optional<air_AsyncToken>:$async_token)> {
let summary = "Get for air channels.";
let description = [{
The `air.channel.get` operation represents a **pull** (receive) operation that copies data from a
specified channel into a destination memref.
This operation models one-way data movement from a channel endpoint into memory, enabling
asynchronous communication where data previously sent by a corresponding
`air.channel.put` becomes available to the consumer.
### Semantics
- The destination buffer is specified by the `dst` memref, along with its associated
`dst_offsets`, `dst_sizes`, and `dst_strides` which describe the subview being written to.
- The channel being read is identified by the symbol referenced by `chan_name`.
- The channel must have been declared earlier via an `air.channel` operation.
- The operation may be asynchronous: if an async token is produced, it can be used to
synchronize with subsequent dependent operations.
- The specific channel it operates on, when `chan_name` references an array of channels, is
identified by `indices`.
- Optionally, `pad_before` and `pad_after` specify constant zero-padding to apply per
dimension during the DMA transfer. This maps to hardware DMA buffer descriptor padding
on AIE memtile DMAs.
### Interfaces
- Implements `air_AsyncOpInterface`, enabling participation in async dependency chains.
- Implements `air_MemcpyInterface`, allowing it to behave like a DMA/memcpy operation.
- Implements `air_ChannelInterface`, allowing inspection of channel properties.
### Example
```mlir
// Receive a 4x4 tile into %dst from channel @chan_0
air.channel.get @chan_0(%dst[%c0, %c0][%c4, %c4][%c1, %c1]) : (memref<16x16xf32>)
// Asynchronous get with dependency on %t1
%t2 = air.channel.get async [%t1] @chan_1(%dst[%c8, %c0][%c4, %c4][%c1, %c1]) : (memref<16x16xf32>)
```
}];
let assemblyFormat = [{
custom<AsyncDependencies>(type($async_token), $async_dependencies)
$chan_name `[` ($indices^)? `]`
`(` $dst `[` ($dst_offsets^)? `]``[` ($dst_sizes^)? `]``[` ($dst_strides^)? `]` `)` attr-dict `:`
`(` type($dst) `)`
}];
let extraClassDeclaration = [{
Value getMemref() { return getDst(); }
Value getDstMemref() { return getDst(); }
Value getSrcMemref() { return nullptr; }
OperandRange getOffsets() { return getDstOffsets(); }
OperandRange getSrcOffsets() { emitOpError("does not have SrcOffsets"); llvm_unreachable("unreachable"); }
OperandRange getSizes() { return getDstSizes(); }
OperandRange getSrcSizes() { emitOpError("does not have SrcSizes"); llvm_unreachable("unreachable"); }
OperandRange getStrides() { return getDstStrides(); }
OperandRange getSrcStrides() { emitOpError("does not have SrcStrides"); llvm_unreachable("unreachable"); }
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
bool hasPadding() {
return getPadBefore().has_value() || getPadAfter().has_value();
}
}];
let hasCanonicalizer = 1;
let hasVerifier = 1;
}
// AIR asynchronous region for dynamic event dispatching.
def air_ExecuteOp : air_Op<"execute", [SingleBlockImplicitTerminator<"ExecuteTerminatorOp">,
air_AsyncOpInterface,
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
let arguments = (
ins Variadic<air_AsyncToken>:$async_dependencies
);
let results = (
outs air_AsyncToken:$async_token,
Variadic<AnyType>:$results
);
let summary = "Asynchronous code region";
let regions = (region SizedRegion<1>:$region);
let description = [{
Defines a code region to be dispatched asynchronously at runtime. All operations in
the region must be executed sequentially.
}];
// Note: async_dependencies are printed in sorted order for ops which have
// custom assembly formats. We can consider giving ExecuteOp custom
// printer/parser too.
let assemblyFormat = [{
(` ``[` $async_dependencies^ `]`)?
(`->` `(` type($results)^ `)`)? regions attr-dict
}];
let extraClassDeclaration = [{
Block &getBody() { return getRegion().front(); }
llvm::iplist<Operation> &getChildOps() { return getBody().getOperations(); }
SmallVector<Operation *> getYieldedChildOps() {
SmallVector<Operation *> ops;
for (auto oper : getBody().getTerminator()->getOperands())
if (oper.getDefiningOp() && getRegion().isAncestor(oper.getDefiningOp()->getParentRegion()))
ops.push_back(oper.getDefiningOp());
return ops;
}
int32_t getId() {
if (auto id_attr = (*this)->getAttrOfType<IntegerAttr>("id")) {
return id_attr.getInt();
}
return -1;
}
}];
let hasVerifier = 1;
let hasCanonicalizer = 1;
}
def air_ExecuteTerminatorOp : air_Op<"execute_terminator", [HasParent<"ExecuteOp">,
Pure, ReturnLike, Terminator]>{
let summary = "Terminator for air execute.";
let description = [{
A terminator operation for code regions that appear in the body of
`air.execute` operation. The operation takes variable number of
operands and produces no results. The operand number and types must
match the signature of the `air.execute` that contains the operation.
}];
let arguments = (ins Variadic<AnyType>:$results);
let builders = [OpBuilder<(ins), [{ /* nothing to do */ }]>];
let assemblyFormat =
[{ attr-dict ($results^ `:` type($results))? }];
}
// AIR custom op, as a handle for a user-provided AIE kernel
def air_CustomOp : air_Op<"custom", [air_AsyncOpInterface,
AttrSizedOperandSegments]>,
Arguments<(ins OptionalAttr<SymbolRefAttr>:$symbol,
Variadic<air_AsyncToken>:$async_dependencies,
Variadic<AnyType>:$custom_operands)>,
Results<(outs Optional<air_AsyncToken>:$async_token)> {
let summary = "A handle to a user-customized op";
let description = [{
A placeholder operation for a user-customized op. With user-specified
latency value, AIR Runner is able to simulate the system-level
performance with this op in place.
}];
let skipDefaultBuilders = 1;
let builders = [
OpBuilder<(ins "ValueRange":$custom_operands)>,
OpBuilder<(ins "ValueRange":$async_dependencies,
"ValueRange":$custom_operands,
CArg<"bool", "false">:$is_async,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
];
let hasCustomAssemblyFormat = 1;
}
#endif