From 179927d3023ce9f7e1cf135909dac81a726c73fa Mon Sep 17 00:00:00 2001 From: yenjames Date: Tue, 26 May 2026 11:16:50 -0600 Subject: [PATCH 1/3] Sort MemTile ObjectFifos by buffer size and spill to least-used neighbor When multiple MemTile ObjectFifos need to spill buffers to adjacent tiles, IR order can cause suboptimal allocation where large buffers find no spill room because smaller ones already consumed neighbor capacity. Sort MemTile-producer ObjectFifos by buffer size descending before allocation so large buffers get priority. Also sort neighbor tiles by remaining capacity when spilling, preferring the neighbor with more room. --- .../AIEObjectFifoStatefulTransform.cpp | 45 ++++++++++++- .../memtile_spill_order.mlir | 64 +++++++++++++++++++ 2 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 test/objectFifo-stateful-transform/adjacent_memtile_allocation/memtile_spill_order.mlir diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index fbf8deac919..93322ecad7b 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -756,10 +756,16 @@ struct AIEObjectFifoStatefulTransformPass } } - // try to allocate on neighbor tiles + // Try neighbor with more remaining capacity first to avoid + // blocking adjacent MemTiles that also need spill space. if (!neighborTiles.empty()) { + llvm::stable_sort(neighborTiles, [&](TileOp a, TileOp b) { + return calculateCurrentUsedMemory(a, state.buffersPerFifo, + buffers) < + calculateCurrentUsedMemory(b, state.buffersPerFifo, + buffers); + }); for (auto &tile : neighborTiles) { - // Try to allocate on this neighbor tile int neighborUsedMemory = calculateCurrentUsedMemory( tile, state.buffersPerFifo, buffers); if (static_cast(neighborUsedMemory + totalSizeBytes) <= @@ -1977,7 +1983,40 @@ struct AIEObjectFifoStatefulTransformPass // the acquires/releases (uses of the FIFO). // - Global release counter tracker to keep track of the objectFifo state //===------------------------------------------------------------------===// - for (auto createOp : device.getOps()) { + // Process MemTile ObjectFifos largest-first so large buffers get + // priority for home placement and spill targets are chosen before + // smaller fifos consume neighbor capacity. + SmallVector sortedCreateOps( + device.getOps()); + if (!sortedCreateOps.empty()) { + DataLayout dataLayout = DataLayout::closest(sortedCreateOps[0]); + // Sort only among MemTile-producer fifos by buffer size descending. + // Non-MemTile fifos keep their IR-order positions undisturbed. + auto getBufSize = [&](ObjectFifoCreateOp op) -> int64_t { + auto fifoType = llvm::cast(op.getElemType()); + auto elemType = llvm::cast(fifoType.getElementType()); + int64_t bits = + dataLayout.getTypeSizeInBits(elemType.getElementType()); + return elemType.getNumElements() * bits / 8; + }; + SmallVector memTileSlots; + SmallVector memTileFifos; + for (size_t i = 0; i < sortedCreateOps.size(); i++) { + auto prodTile = dyn_cast( + sortedCreateOps[i].getProducerTile().getDefiningOp()); + if (prodTile && prodTile.isMemTile()) { + memTileSlots.push_back(i); + memTileFifos.push_back(sortedCreateOps[i]); + } + } + llvm::stable_sort(memTileFifos, + [&](ObjectFifoCreateOp a, ObjectFifoCreateOp b) { + return getBufSize(a) > getBufSize(b); + }); + for (size_t i = 0; i < memTileSlots.size(); i++) + sortedCreateOps[memTileSlots[i]] = memTileFifos[i]; + } + for (auto createOp : sortedCreateOps) { int share_direction = 0; bool shared = !requiresDMAs(createOp, share_direction, state); diff --git a/test/objectFifo-stateful-transform/adjacent_memtile_allocation/memtile_spill_order.mlir b/test/objectFifo-stateful-transform/adjacent_memtile_allocation/memtile_spill_order.mlir new file mode 100644 index 00000000000..ea9d4e222cb --- /dev/null +++ b/test/objectFifo-stateful-transform/adjacent_memtile_allocation/memtile_spill_order.mlir @@ -0,0 +1,64 @@ +//===- memtile_spill_order.mlir ----------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s + +// Three MemTile ObjectFifos where IR-order allocation fails but +// size-sorted (large-first) allocation succeeds. +// +// @block at col 3 (depth 1, 524000B): fills col 3 so @large can't spill right. +// @small at col 1 (depth 2, 260000B): fits locally (520000 < 524288). +// @large at col 2 (depth 2, 400000B): must spill one buffer. +// +// Without large-first sort (IR order: block, small, large): +// @small fills col 1 (4288B remaining), @large can't spill left or right → FAIL. +// +// With large-first sort (block 524000, large 400000, small 260000): +// @large spills to col 1 (empty), @small spills to col 0 → SUCCESS. + +// large_cons_buff_0 on col 2 (home), large_cons_buff_1 spills to col 1 +// CHECK-DAG: aie.buffer(%mem_tile_2_1) {sym_name = "large_cons_buff_0"} +// CHECK-DAG: aie.buffer(%mem_tile_1_1) {sym_name = "large_cons_buff_1"} +// small buffers both spill to col 0 +// CHECK-DAG: aie.buffer(%mem_tile_0_1) {sym_name = "small_cons_buff_0"} +// CHECK-DAG: aie.buffer(%mem_tile_0_1) {sym_name = "small_cons_buff_1"} +// block buffer on col 3 (home) +// CHECK-DAG: aie.buffer(%mem_tile_3_1) {sym_name = "block_cons_buff_0"} + +module { + aie.device(npu2) { + %shim0 = aie.tile(0, 0) + %shim1 = aie.tile(1, 0) + %shim2 = aie.tile(2, 0) + %shim3 = aie.tile(3, 0) + %mem1 = aie.tile(1, 1) + %mem2 = aie.tile(2, 1) + %mem3 = aie.tile(3, 1) + %core1 = aie.tile(1, 2) + %core2 = aie.tile(2, 2) + %core3 = aie.tile(3, 2) + + // IR order: block first, then small, then large. + aie.objectfifo @block(%shim3, {%mem3}, 1 : i32) : !aie.objectfifo> + + aie.objectfifo @small(%shim1, {%mem1}, 2 : i32) : !aie.objectfifo> + + aie.objectfifo @large(%shim2, {%mem2}, 2 : i32) : !aie.objectfifo> + + aie.objectfifo @small_out(%mem1, {%core1}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@small] -> [@small_out]([] []) + + aie.objectfifo @large_out(%mem2, {%core2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@large] -> [@large_out]([] []) + + aie.objectfifo @block_out(%mem3, {%core3}, 1 : i32) : !aie.objectfifo> + aie.objectfifo.link [@block] -> [@block_out]([] []) + } +} From dc59d6fa00e5bbec83a670fe637d55db4827936a Mon Sep 17 00:00:00 2001 From: yenjames Date: Tue, 26 May 2026 14:58:30 -0600 Subject: [PATCH 2/3] Clang-format version mismatch. --- lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index 93322ecad7b..3ac254fed30 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -1995,8 +1995,7 @@ struct AIEObjectFifoStatefulTransformPass auto getBufSize = [&](ObjectFifoCreateOp op) -> int64_t { auto fifoType = llvm::cast(op.getElemType()); auto elemType = llvm::cast(fifoType.getElementType()); - int64_t bits = - dataLayout.getTypeSizeInBits(elemType.getElementType()); + int64_t bits = dataLayout.getTypeSizeInBits(elemType.getElementType()); return elemType.getNumElements() * bits / 8; }; SmallVector memTileSlots; From 925890b229b99efca3c5f22913959a4ae1013dc6 Mon Sep 17 00:00:00 2001 From: yenjames Date: Wed, 27 May 2026 11:07:32 -0600 Subject: [PATCH 3/3] use dag check for order invariant operations. --- .../link/link_test_join_offsets.mlir | 206 +++++++------- ...sable_synchronization_test_distribute.mlir | 88 +++--- .../memtile_padding_test.mlir | 256 +++++++++--------- .../nd_dma_fromStream_join.mlir | 182 ++++++------- .../init_values_join_input_test.mlir | 156 +++++------ .../init_values_join_output_test.mlir | 140 +++++----- .../link_join_repeat_count_test.mlir | 136 +++++----- .../repeat_count/link_repeat_count_test.mlir | 144 +++++----- 8 files changed, 657 insertions(+), 651 deletions(-) diff --git a/test/objectFifo-stateful-transform/data_movement_patterns/link/link_test_join_offsets.mlir b/test/objectFifo-stateful-transform/data_movement_patterns/link/link_test_join_offsets.mlir index f50dd777063..689feb9676f 100644 --- a/test/objectFifo-stateful-transform/data_movement_patterns/link/link_test_join_offsets.mlir +++ b/test/objectFifo-stateful-transform/data_movement_patterns/link/link_test_join_offsets.mlir @@ -19,144 +19,144 @@ // CHECK: %{{.*}}tile_2_2 = aie.tile(2, 2) // CHECK: %{{.*}}tile_2_3 = aie.tile(2, 3) // CHECK: %{{.*}}tile_3_3 = aie.tile(3, 3) -// CHECK: %[[VAL_2:.*]] = aie.buffer(%{{.*}}tile_2_1) {sym_name = "link4_buff_0"} : memref<48xi32> -// CHECK: %[[VAL_3:.*]] = aie.buffer(%{{.*}}tile_2_1) {sym_name = "link4_buff_1"} : memref<48xi32> -// CHECK: %[[VAL_4:.*]] = aie.lock(%{{.*}}tile_2_1, 0) {init = 2 : i32, sym_name = "link4_prod_lock_0"} -// CHECK: %[[VAL_5:.*]] = aie.lock(%{{.*}}tile_2_1, 1) {init = 0 : i32, sym_name = "link4_cons_lock_0"} -// CHECK: %[[VAL_6:.*]] = aie.lock(%{{.*}}tile_2_1, 2) {init = 2 : i32, sym_name = "link4_prod_lock_1"} -// CHECK: %[[VAL_7:.*]] = aie.lock(%{{.*}}tile_2_1, 3) {init = 0 : i32, sym_name = "link4_cons_lock_1"} -// CHECK: %[[VAL_8:.*]] = aie.lock(%{{.*}}tile_2_1, 4) {init = 2 : i32, sym_name = "link4_prod_lock_2"} -// CHECK: %[[VAL_9:.*]] = aie.lock(%{{.*}}tile_2_1, 5) {init = 0 : i32, sym_name = "link4_cons_lock_2"} -// CHECK: %[[VAL_10:.*]] = aie.buffer(%{{.*}}tile_3_3) {sym_name = "link3_buff_0"} : memref<12xi32> -// CHECK: %[[VAL_11:.*]] = aie.buffer(%{{.*}}tile_3_3) {sym_name = "link3_buff_1"} : memref<12xi32> -// CHECK: %[[VAL_12:.*]] = aie.lock(%{{.*}}tile_3_3, 0) {init = 2 : i32, sym_name = "link3_prod_lock_0"} -// CHECK: %[[VAL_13:.*]] = aie.lock(%{{.*}}tile_3_3, 1) {init = 0 : i32, sym_name = "link3_cons_lock_0"} -// CHECK: %[[VAL_14:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "link2_buff_0"} : memref<20xi32> -// CHECK: %[[VAL_15:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "link2_buff_1"} : memref<20xi32> -// CHECK: %[[VAL_16:.*]] = aie.lock(%{{.*}}tile_2_3, 0) {init = 2 : i32, sym_name = "link2_prod_lock_0"} -// CHECK: %[[VAL_17:.*]] = aie.lock(%{{.*}}tile_2_3, 1) {init = 0 : i32, sym_name = "link2_cons_lock_0"} -// CHECK: %[[VAL_18:.*]] = aie.buffer(%{{.*}}tile_2_2) {sym_name = "link1_buff_0"} : memref<4x4xi32> -// CHECK: %[[VAL_19:.*]] = aie.buffer(%{{.*}}tile_2_2) {sym_name = "link1_buff_1"} : memref<4x4xi32> -// CHECK: %[[VAL_20:.*]] = aie.lock(%{{.*}}tile_2_2, 0) {init = 2 : i32, sym_name = "link1_prod_lock_0"} -// CHECK: %[[VAL_21:.*]] = aie.lock(%{{.*}}tile_2_2, 1) {init = 0 : i32, sym_name = "link1_cons_lock_0"} -// CHECK: aie.flow(%{{.*}}tile_2_2, DMA : 0, %{{.*}}tile_2_1, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_2_3, DMA : 0, %{{.*}}tile_2_1, DMA : 1) -// CHECK: aie.flow(%{{.*}}tile_3_3, DMA : 0, %{{.*}}tile_2_1, DMA : 2) -// CHECK: aie.flow(%{{.*}}tile_2_1, DMA : 0, %{{.*}}tile_2_0, DMA : 0) +// CHECK-DAG: %[[LINK4_BUFF_0:.*]] = aie.buffer(%{{.*}}tile_2_1) {sym_name = "link4_buff_0"} : memref<48xi32> +// CHECK-DAG: %[[LINK4_BUFF_1:.*]] = aie.buffer(%{{.*}}tile_2_1) {sym_name = "link4_buff_1"} : memref<48xi32> +// CHECK-DAG: %[[LINK4_PROD_LOCK_0:.*]] = aie.lock(%{{.*}}tile_2_1, 0) {init = 2 : i32, sym_name = "link4_prod_lock_0"} +// CHECK-DAG: %[[LINK4_CONS_LOCK_0:.*]] = aie.lock(%{{.*}}tile_2_1, 1) {init = 0 : i32, sym_name = "link4_cons_lock_0"} +// CHECK-DAG: %[[LINK4_PROD_LOCK_1:.*]] = aie.lock(%{{.*}}tile_2_1, 2) {init = 2 : i32, sym_name = "link4_prod_lock_1"} +// CHECK-DAG: %[[LINK4_CONS_LOCK_1:.*]] = aie.lock(%{{.*}}tile_2_1, 3) {init = 0 : i32, sym_name = "link4_cons_lock_1"} +// CHECK-DAG: %[[LINK4_PROD_LOCK_2:.*]] = aie.lock(%{{.*}}tile_2_1, 4) {init = 2 : i32, sym_name = "link4_prod_lock_2"} +// CHECK-DAG: %[[LINK4_CONS_LOCK_2:.*]] = aie.lock(%{{.*}}tile_2_1, 5) {init = 0 : i32, sym_name = "link4_cons_lock_2"} +// CHECK-DAG: %[[LINK3_BUFF_0:.*]] = aie.buffer(%{{.*}}tile_3_3) {sym_name = "link3_buff_0"} : memref<12xi32> +// CHECK-DAG: %[[LINK3_BUFF_1:.*]] = aie.buffer(%{{.*}}tile_3_3) {sym_name = "link3_buff_1"} : memref<12xi32> +// CHECK-DAG: %[[LINK3_PROD_LOCK:.*]] = aie.lock(%{{.*}}tile_3_3, 0) {init = 2 : i32, sym_name = "link3_prod_lock_0"} +// CHECK-DAG: %[[LINK3_CONS_LOCK:.*]] = aie.lock(%{{.*}}tile_3_3, 1) {init = 0 : i32, sym_name = "link3_cons_lock_0"} +// CHECK-DAG: %[[LINK2_BUFF_0:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "link2_buff_0"} : memref<20xi32> +// CHECK-DAG: %[[LINK2_BUFF_1:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "link2_buff_1"} : memref<20xi32> +// CHECK-DAG: %[[LINK2_PROD_LOCK:.*]] = aie.lock(%{{.*}}tile_2_3, 0) {init = 2 : i32, sym_name = "link2_prod_lock_0"} +// CHECK-DAG: %[[LINK2_CONS_LOCK:.*]] = aie.lock(%{{.*}}tile_2_3, 1) {init = 0 : i32, sym_name = "link2_cons_lock_0"} +// CHECK-DAG: %[[LINK1_BUFF_0:.*]] = aie.buffer(%{{.*}}tile_2_2) {sym_name = "link1_buff_0"} : memref<4x4xi32> +// CHECK-DAG: %[[LINK1_BUFF_1:.*]] = aie.buffer(%{{.*}}tile_2_2) {sym_name = "link1_buff_1"} : memref<4x4xi32> +// CHECK-DAG: %[[LINK1_PROD_LOCK:.*]] = aie.lock(%{{.*}}tile_2_2, 0) {init = 2 : i32, sym_name = "link1_prod_lock_0"} +// CHECK-DAG: %[[LINK1_CONS_LOCK:.*]] = aie.lock(%{{.*}}tile_2_2, 1) {init = 0 : i32, sym_name = "link1_cons_lock_0"} +// CHECK-DAG: aie.flow(%{{.*}}tile_2_2, DMA : 0, %{{.*}}tile_2_1, DMA : 0) +// CHECK-DAG: aie.flow(%{{.*}}tile_2_3, DMA : 0, %{{.*}}tile_2_1, DMA : 1) +// CHECK-DAG: aie.flow(%{{.*}}tile_3_3, DMA : 0, %{{.*}}tile_2_1, DMA : 2) +// CHECK-DAG: aie.flow(%{{.*}}tile_2_1, DMA : 0, %{{.*}}tile_2_0, DMA : 0) // CHECK: %mem_2_2 = aie.mem(%{{.*}}tile_2_2) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_21]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_18]] : memref<4x4xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_20]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[LINK1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK1_BUFF_0]] : memref<4x4xi32>, 0, 16) +// CHECK: aie.use_lock(%[[LINK1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_21]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_19]] : memref<4x4xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_20]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[LINK1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK1_BUFF_1]] : memref<4x4xi32>, 0, 16) +// CHECK: aie.use_lock(%[[LINK1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } // CHECK: %memtile_dma_2_1 = aie.memtile_dma(%{{.*}}tile_2_1) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<48xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_5]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_0]] : memref<48xi32>, 0, 16) +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<48xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_5]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_1]] : memref<48xi32>, 0, 16) +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: %1 = aie.dma_start(S2MM, 1, ^bb4, ^bb6) -// CHECK: ^bb4: // 2 preds: ^bb3, ^bb5 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<48xi32>, 16, 20) -// CHECK: aie.use_lock(%[[VAL_7]], Release, 1) +// CHECK: ^bb4: +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_0]] : memref<48xi32>, 16, 20) +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb5 -// CHECK: ^bb5: // pred: ^bb4 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<48xi32>, 16, 20) -// CHECK: aie.use_lock(%[[VAL_7]], Release, 1) +// CHECK: ^bb5: +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_1]] : memref<48xi32>, 16, 20) +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb4 -// CHECK: ^bb6: // pred: ^bb3 +// CHECK: ^bb6: // CHECK: %2 = aie.dma_start(S2MM, 2, ^bb7, ^bb9) -// CHECK: ^bb7: // 2 preds: ^bb6, ^bb8 -// CHECK: aie.use_lock(%[[VAL_8]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<48xi32>, 36, 12) -// CHECK: aie.use_lock(%[[VAL_9]], Release, 1) +// CHECK: ^bb7: +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_2]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_0]] : memref<48xi32>, 36, 12) +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_2]], Release, 1) // CHECK: aie.next_bd ^bb8 -// CHECK: ^bb8: // pred: ^bb7 -// CHECK: aie.use_lock(%[[VAL_8]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<48xi32>, 36, 12) -// CHECK: aie.use_lock(%[[VAL_9]], Release, 1) +// CHECK: ^bb8: +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_2]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_1]] : memref<48xi32>, 36, 12) +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_2]], Release, 1) // CHECK: aie.next_bd ^bb7 -// CHECK: ^bb9: // pred: ^bb6 +// CHECK: ^bb9: // CHECK: %3 = aie.dma_start(MM2S, 0, ^bb10, ^bb16) -// CHECK: ^bb10: // 2 preds: ^bb9, ^bb15 -// CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<48xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: ^bb10: +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_0]] : memref<48xi32>, 0, 16) +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb11 -// CHECK: ^bb11: // pred: ^bb10 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<48xi32>, 16, 20) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb11: +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_0]] : memref<48xi32>, 16, 20) +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb12 -// CHECK: ^bb12: // pred: ^bb11 -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<48xi32>, 36, 12) -// CHECK: aie.use_lock(%[[VAL_8]], Release, 1) +// CHECK: ^bb12: +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_2]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_0]] : memref<48xi32>, 36, 12) +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_2]], Release, 1) // CHECK: aie.next_bd ^bb13 -// CHECK: ^bb13: // pred: ^bb12 -// CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<48xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: ^bb13: +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_1]] : memref<48xi32>, 0, 16) +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb14 -// CHECK: ^bb14: // pred: ^bb13 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<48xi32>, 16, 20) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb14: +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_1]] : memref<48xi32>, 16, 20) +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb15 -// CHECK: ^bb15: // pred: ^bb14 -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<48xi32>, 36, 12) -// CHECK: aie.use_lock(%[[VAL_8]], Release, 1) +// CHECK: ^bb15: +// CHECK: aie.use_lock(%[[LINK4_CONS_LOCK_2]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK4_BUFF_1]] : memref<48xi32>, 36, 12) +// CHECK: aie.use_lock(%[[LINK4_PROD_LOCK_2]], Release, 1) // CHECK: aie.next_bd ^bb10 -// CHECK: ^bb16: // pred: ^bb9 +// CHECK: ^bb16: // CHECK: aie.end // CHECK: } // CHECK: %mem_2_3 = aie.mem(%{{.*}}tile_2_3) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_17]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_14]] : memref<20xi32>, 0, 20) -// CHECK: aie.use_lock(%[[VAL_16]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[LINK2_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK2_BUFF_0]] : memref<20xi32>, 0, 20) +// CHECK: aie.use_lock(%[[LINK2_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_17]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_15]] : memref<20xi32>, 0, 20) -// CHECK: aie.use_lock(%[[VAL_16]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[LINK2_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK2_BUFF_1]] : memref<20xi32>, 0, 20) +// CHECK: aie.use_lock(%[[LINK2_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } // CHECK: %mem_3_3 = aie.mem(%{{.*}}tile_3_3) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_13]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_10]] : memref<12xi32>, 0, 12) -// CHECK: aie.use_lock(%[[VAL_12]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[LINK3_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK3_BUFF_0]] : memref<12xi32>, 0, 12) +// CHECK: aie.use_lock(%[[LINK3_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_13]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_11]] : memref<12xi32>, 0, 12) -// CHECK: aie.use_lock(%[[VAL_12]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[LINK3_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK3_BUFF_1]] : memref<12xi32>, 0, 12) +// CHECK: aie.use_lock(%[[LINK3_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } // CHECK: aie.shim_dma_allocation @link4_shim_alloc(%shim_noc_tile_2_0, S2MM, 0) diff --git a/test/objectFifo-stateful-transform/debug_features/disable_synchronization_test_distribute.mlir b/test/objectFifo-stateful-transform/debug_features/disable_synchronization_test_distribute.mlir index f27b4c17ac7..5f970410db4 100644 --- a/test/objectFifo-stateful-transform/debug_features/disable_synchronization_test_distribute.mlir +++ b/test/objectFifo-stateful-transform/debug_features/disable_synchronization_test_distribute.mlir @@ -12,62 +12,62 @@ // CHECK: module @disable_sync { // CHECK: aie.device(xcve2302) { -// CHECK: %{{.*}}tile_2_0 = aie.tile(2, 0) -// CHECK: %{{.*}}tile_2_1 = aie.tile(2, 1) -// CHECK: %{{.*}}tile_2_2 = aie.tile(2, 2) -// CHECK: %{{.*}}tile_2_3 = aie.tile(2, 3) -// CHECK: %[[VAL_0:.*]] = aie.buffer(%{{.*}}tile_2_1) {sym_name = "link3_buff_0"} : memref<36xi32> -// CHECK: %[[VAL_1:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "link2_buff_0"} : memref<20xi32> -// CHECK: %[[VAL_2:.*]] = aie.lock(%{{.*}}tile_2_3, 0) {init = 1 : i32, sym_name = "link2_prod_lock_0"} -// CHECK: %[[VAL_3:.*]] = aie.lock(%{{.*}}tile_2_3, 1) {init = 0 : i32, sym_name = "link2_cons_lock_0"} -// CHECK: %[[VAL_4:.*]] = aie.buffer(%{{.*}}tile_2_2) {sym_name = "link1_buff_0"} : memref<4x4xi32> -// CHECK: %[[VAL_5:.*]] = aie.lock(%{{.*}}tile_2_2, 0) {init = 1 : i32, sym_name = "link1_prod_lock_0"} -// CHECK: %[[VAL_6:.*]] = aie.lock(%{{.*}}tile_2_2, 1) {init = 0 : i32, sym_name = "link1_cons_lock_0"} -// CHECK: aie.flow(%{{.*}}tile_2_2, DMA : 0, %{{.*}}tile_2_1, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_2_3, DMA : 0, %{{.*}}tile_2_1, DMA : 1) -// CHECK: aie.flow(%{{.*}}tile_2_1, DMA : 0, %{{.*}}tile_2_0, DMA : 0) -// CHECK: %mem_2_2 = aie.mem(%{{.*}}tile_2_2) { -// CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb2) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb1 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_4]] : memref<4x4xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_5]], Release, 1) +// CHECK: %[[SHIM_TILE:.*]] = aie.tile(2, 0) +// CHECK: %[[MEM_TILE:.*]] = aie.tile(2, 1) +// CHECK: %[[TILE_2_2:.*]] = aie.tile(2, 2) +// CHECK: %[[TILE_2_3:.*]] = aie.tile(2, 3) +// CHECK-DAG: %[[LINK3_BUFF:.*]] = aie.buffer(%[[MEM_TILE]]) {sym_name = "link3_buff_0"} : memref<36xi32> +// CHECK-DAG: %[[LINK2_BUFF:.*]] = aie.buffer(%[[TILE_2_3]]) {sym_name = "link2_buff_0"} : memref<20xi32> +// CHECK-DAG: %[[LINK2_PROD_LOCK:.*]] = aie.lock(%[[TILE_2_3]], 0) {init = 1 : i32, sym_name = "link2_prod_lock_0"} +// CHECK-DAG: %[[LINK2_CONS_LOCK:.*]] = aie.lock(%[[TILE_2_3]], 1) {init = 0 : i32, sym_name = "link2_cons_lock_0"} +// CHECK-DAG: %[[LINK1_BUFF:.*]] = aie.buffer(%[[TILE_2_2]]) {sym_name = "link1_buff_0"} : memref<4x4xi32> +// CHECK-DAG: %[[LINK1_PROD_LOCK:.*]] = aie.lock(%[[TILE_2_2]], 0) {init = 1 : i32, sym_name = "link1_prod_lock_0"} +// CHECK-DAG: %[[LINK1_CONS_LOCK:.*]] = aie.lock(%[[TILE_2_2]], 1) {init = 0 : i32, sym_name = "link1_cons_lock_0"} +// CHECK-DAG: aie.flow(%[[TILE_2_2]], DMA : 0, %[[MEM_TILE]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_2_3]], DMA : 0, %[[MEM_TILE]], DMA : 1) +// CHECK-DAG: aie.flow(%[[MEM_TILE]], DMA : 0, %[[SHIM_TILE]], DMA : 0) +// CHECK: %{{.*}} = aie.mem(%[[TILE_2_2]]) { +// CHECK: %{{.*}} = aie.dma_start(MM2S, 0, ^bb1, ^bb2) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[LINK1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK1_BUFF]] : memref<4x4xi32>, 0, 16) +// CHECK: aie.use_lock(%[[LINK1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb2: // pred: ^bb0 +// CHECK: ^bb2: // CHECK: aie.end // CHECK: } -// CHECK: %memtile_dma_2_1 = aie.memtile_dma(%{{.*}}tile_2_1) { -// CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb2) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb1 -// CHECK: aie.dma_bd(%[[VAL_0]] : memref<36xi32>, 0, 16) +// CHECK: %{{.*}} = aie.memtile_dma(%[[MEM_TILE]]) { +// CHECK: %{{.*}} = aie.dma_start(S2MM, 0, ^bb1, ^bb2) +// CHECK: ^bb1: +// CHECK: aie.dma_bd(%[[LINK3_BUFF]] : memref<36xi32>, 0, 16) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb2: // pred: ^bb0 -// CHECK: %1 = aie.dma_start(S2MM, 1, ^bb3, ^bb4) -// CHECK: ^bb3: // 2 preds: ^bb2, ^bb3 -// CHECK: aie.dma_bd(%[[VAL_0]] : memref<36xi32>, 16, 20) +// CHECK: ^bb2: +// CHECK: %{{.*}} = aie.dma_start(S2MM, 1, ^bb3, ^bb4) +// CHECK: ^bb3: +// CHECK: aie.dma_bd(%[[LINK3_BUFF]] : memref<36xi32>, 16, 20) // CHECK: aie.next_bd ^bb3 -// CHECK: ^bb4: // pred: ^bb2 -// CHECK: %2 = aie.dma_start(MM2S, 0, ^bb5, ^bb7) -// CHECK: ^bb5: // 2 preds: ^bb4, ^bb6 -// CHECK: aie.dma_bd(%[[VAL_0]] : memref<36xi32>, 0, 16) +// CHECK: ^bb4: +// CHECK: %{{.*}} = aie.dma_start(MM2S, 0, ^bb5, ^bb7) +// CHECK: ^bb5: +// CHECK: aie.dma_bd(%[[LINK3_BUFF]] : memref<36xi32>, 0, 16) // CHECK: aie.next_bd ^bb6 -// CHECK: ^bb6: // pred: ^bb5 -// CHECK: aie.dma_bd(%[[VAL_0]] : memref<36xi32>, 16, 20) +// CHECK: ^bb6: +// CHECK: aie.dma_bd(%[[LINK3_BUFF]] : memref<36xi32>, 16, 20) // CHECK: aie.next_bd ^bb5 -// CHECK: ^bb7: // pred: ^bb4 +// CHECK: ^bb7: // CHECK: aie.end // CHECK: } -// CHECK: %mem_2_3 = aie.mem(%{{.*}}tile_2_3) { -// CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb2) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb1 -// CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_1]] : memref<20xi32>, 0, 20) -// CHECK: aie.use_lock(%[[VAL_2]], Release, 1) +// CHECK: %{{.*}} = aie.mem(%[[TILE_2_3]]) { +// CHECK: %{{.*}} = aie.dma_start(MM2S, 0, ^bb1, ^bb2) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[LINK2_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[LINK2_BUFF]] : memref<20xi32>, 0, 20) +// CHECK: aie.use_lock(%[[LINK2_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb2: // pred: ^bb0 +// CHECK: ^bb2: // CHECK: aie.end // CHECK: } -// CHECK: aie.shim_dma_allocation @link3_shim_alloc(%shim_noc_tile_2_0, S2MM, 0) +// CHECK: aie.shim_dma_allocation @link3_shim_alloc(%{{.*}}, S2MM, 0) // CHECK: } // CHECK: } diff --git a/test/objectFifo-stateful-transform/dma_transformations/memtile_padding_test.mlir b/test/objectFifo-stateful-transform/dma_transformations/memtile_padding_test.mlir index b3b2a1d7bbd..2d6e247bbcc 100644 --- a/test/objectFifo-stateful-transform/dma_transformations/memtile_padding_test.mlir +++ b/test/objectFifo-stateful-transform/dma_transformations/memtile_padding_test.mlir @@ -9,132 +9,136 @@ //===----------------------------------------------------------------------===// // RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s - // CHECK: %{{.*}}tile_0_0 = aie.tile(0, 0) - // CHECK: %{{.*}}tile_0_1 = aie.tile(0, 1) - // CHECK: %{{.*}}tile_0_2 = aie.tile(0, 2) - // CHECK: %[[VAL_2:.*]] = aie.buffer(%{{.*}}tile_0_1) {sym_name = "objFifo_out1_cons_buff_0"} : memref<64x64xi8> - // CHECK: %[[VAL_3:.*]] = aie.buffer(%{{.*}}tile_0_1) {sym_name = "objFifo_out1_cons_buff_1"} : memref<64x64xi8> - // CHECK: %[[VAL_4:.*]] = aie.lock(%{{.*}}tile_0_1, 2) {init = 2 : i32, sym_name = "objFifo_out1_cons_prod_lock_0"} - // CHECK: %[[VAL_5:.*]] = aie.lock(%{{.*}}tile_0_1, 3) {init = 0 : i32, sym_name = "objFifo_out1_cons_cons_lock_0"} - // CHECK: %[[VAL_6:.*]] = aie.buffer(%{{.*}}tile_0_2) {sym_name = "objFifo_out1_buff_0"} : memref<64x64xi8> - // CHECK: %[[VAL_7:.*]] = aie.buffer(%{{.*}}tile_0_2) {sym_name = "objFifo_out1_buff_1"} : memref<64x64xi8> - // CHECK: %[[VAL_8:.*]] = aie.lock(%{{.*}}tile_0_2, 2) {init = 2 : i32, sym_name = "objFifo_out1_prod_lock_0"} - // CHECK: %[[VAL_9:.*]] = aie.lock(%{{.*}}tile_0_2, 3) {init = 0 : i32, sym_name = "objFifo_out1_cons_lock_0"} - // CHECK: %[[VAL_10:.*]] = aie.buffer(%{{.*}}tile_0_2) {sym_name = "objFifo_in1_cons_buff_0"} : memref<64x64xi8> - // CHECK: %[[VAL_11:.*]] = aie.buffer(%{{.*}}tile_0_2) {sym_name = "objFifo_in1_cons_buff_1"} : memref<64x64xi8> - // CHECK: %[[VAL_12:.*]] = aie.lock(%{{.*}}tile_0_2, 0) {init = 2 : i32, sym_name = "objFifo_in1_cons_prod_lock_0"} - // CHECK: %[[VAL_13:.*]] = aie.lock(%{{.*}}tile_0_2, 1) {init = 0 : i32, sym_name = "objFifo_in1_cons_cons_lock_0"} - // CHECK: %[[VAL_14:.*]] = aie.buffer(%{{.*}}tile_0_1) {sym_name = "objFifo_in1_buff_0"} : memref<64x64xi8> - // CHECK: %[[VAL_15:.*]] = aie.buffer(%{{.*}}tile_0_1) {sym_name = "objFifo_in1_buff_1"} : memref<64x64xi8> - // CHECK: %[[VAL_16:.*]] = aie.lock(%{{.*}}tile_0_1, 0) {init = 2 : i32, sym_name = "objFifo_in1_prod_lock_0"} - // CHECK: %[[VAL_17:.*]] = aie.lock(%{{.*}}tile_0_1, 1) {init = 0 : i32, sym_name = "objFifo_in1_cons_lock_0"} - // CHECK: aie.flow(%{{.*}}tile_0_0, DMA : 0, %{{.*}}tile_0_1, DMA : 0) - // CHECK: aie.flow(%{{.*}}tile_0_1, DMA : 0, %{{.*}}tile_0_2, DMA : 0) - // CHECK: aie.flow(%{{.*}}tile_0_2, DMA : 0, %{{.*}}tile_0_1, DMA : 1) - // CHECK: aie.flow(%{{.*}}tile_0_1, DMA : 1, %{{.*}}tile_0_0, DMA : 0) - // CHECK: %core_0_2 = aie.core(%{{.*}}tile_0_2) { - // CHECK: aie.use_lock(%[[VAL_13]], AcquireGreaterEqual, 1) - // CHECK: aie.use_lock(%[[VAL_8]], AcquireGreaterEqual, 1) - // CHECK: %c0 = arith.constant 0 : index - // CHECK: %c1 = arith.constant 1 : index - // CHECK: %c64 = arith.constant 64 : index - // CHECK: %c12_i8 = arith.constant 12 : i8 - // CHECK: scf.for %arg0 = %c0 to %c64 step %c1 { - // CHECK: scf.for %arg1 = %c0 to %c64 step %c1 { - // CHECK: %0 = memref.load %[[VAL_10]][%arg0, %arg1] : memref<64x64xi8> - // CHECK: %1 = arith.addi %0, %c12_i8 : i8 - // CHECK: memref.store %1, %[[VAL_10]][%arg0, %arg1] : memref<64x64xi8> - // CHECK: } - // CHECK: } - // CHECK: aie.use_lock(%[[VAL_12]], Release, 1) - // CHECK: aie.use_lock(%[[VAL_9]], Release, 1) - // CHECK: aie.end - // CHECK: } - // CHECK: aie.runtime_sequence(%arg0: memref<61x56xi8>, %arg1: memref<32xi8>, %arg2: memref<64x64xi8>) { - // CHECK: aiex.npu.dma_memcpy_nd(%arg0[0, 0, 0, 0][1, 1, 61, 56][0, 0, 56, 1]) {id = 0 : i64, metadata = @objFifo_in0_shim_alloc} : memref<61x56xi8> - // CHECK: aiex.npu.dma_memcpy_nd(%arg2[0, 0, 0, 0][1, 1, 64, 64][0, 0, 64, 1]) {id = 1 : i64, issue_token = true, metadata = @objFifo_out0_shim_alloc} : memref<64x64xi8> - // CHECK: aiex.npu.dma_wait {symbol = @objFifo_out0_shim_alloc} - // CHECK: } - // CHECK: aie.shim_dma_allocation @objFifo_in0_shim_alloc(%shim_noc_tile_0_0, MM2S, 0) - // CHECK: %memtile_dma_0_1 = aie.memtile_dma(%{{.*}}tile_0_1) { - // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) - // CHECK: ^bb1: - // CHECK: aie.use_lock(%[[VAL_16]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_14]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_17]], Release, 1) - // CHECK: aie.next_bd ^bb2 - // CHECK: ^bb2: - // CHECK: aie.use_lock(%[[VAL_16]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_15]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_17]], Release, 1) - // CHECK: aie.next_bd ^bb1 - // CHECK: ^bb3: - // CHECK: %1 = aie.dma_start(MM2S, 0, ^bb4, ^bb6) - // CHECK: ^bb4: - // CHECK: aie.use_lock(%[[VAL_17]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_14]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_16]], Release, 1) - // CHECK: aie.next_bd ^bb5 - // CHECK: ^bb5: - // CHECK: aie.use_lock(%[[VAL_17]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_15]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_16]], Release, 1) - // CHECK: aie.next_bd ^bb4 - // CHECK: ^bb6: - // CHECK: %2 = aie.dma_start(S2MM, 1, ^bb7, ^bb9) - // CHECK: ^bb7: - // CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_2]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_5]], Release, 1) - // CHECK: aie.next_bd ^bb8 - // CHECK: ^bb8: - // CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_3]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_5]], Release, 1) - // CHECK: aie.next_bd ^bb7 - // CHECK: ^bb9: - // CHECK: %3 = aie.dma_start(MM2S, 1, ^bb10, ^bb12) - // CHECK: ^bb10: - // CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_2]] : memref<64x64xi8>, 0, 4096, [, ], [, ]) - // CHECK: aie.use_lock(%[[VAL_4]], Release, 1) - // CHECK: aie.next_bd ^bb11 - // CHECK: ^bb11: - // CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_3]] : memref<64x64xi8>, 0, 4096, [, ], [, ]) - // CHECK: aie.use_lock(%[[VAL_4]], Release, 1) - // CHECK: aie.next_bd ^bb10 - // CHECK: ^bb12: - // CHECK: aie.end - // CHECK: } - // CHECK: %mem_0_2 = aie.mem(%{{.*}}tile_0_2) { - // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) - // CHECK: ^bb1: - // CHECK: aie.use_lock(%[[VAL_12]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_10]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_13]], Release, 1) - // CHECK: aie.next_bd ^bb2 - // CHECK: ^bb2: - // CHECK: aie.use_lock(%[[VAL_12]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_11]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_13]], Release, 1) - // CHECK: aie.next_bd ^bb1 - // CHECK: ^bb3: - // CHECK: %1 = aie.dma_start(MM2S, 0, ^bb4, ^bb6) - // CHECK: ^bb4: - // CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_6]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_8]], Release, 1) - // CHECK: aie.next_bd ^bb5 - // CHECK: ^bb5: - // CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) - // CHECK: aie.dma_bd(%[[VAL_7]] : memref<64x64xi8>, 0, 4096) - // CHECK: aie.use_lock(%[[VAL_8]], Release, 1) - // CHECK: aie.next_bd ^bb4 - // CHECK: ^bb6: - // CHECK: aie.end - // CHECK: } - // CHECK: aie.shim_dma_allocation @objFifo_out0_shim_alloc(%shim_noc_tile_0_0, S2MM, 0) + // CHECK: %[[SHIM_TILE:.*]] = aie.tile(0, 0) + // CHECK: %[[MEM_TILE:.*]] = aie.tile(0, 1) + // CHECK: %[[COMP_TILE:.*]] = aie.tile(0, 2) + // CHECK-DAG: %[[OUT1_CONS_BUFF_0:.*]] = aie.buffer(%[[MEM_TILE]]) {sym_name = "objFifo_out1_cons_buff_0"} : memref<64x64xi8> + // CHECK-DAG: %[[OUT1_CONS_BUFF_1:.*]] = aie.buffer(%[[MEM_TILE]]) {sym_name = "objFifo_out1_cons_buff_1"} : memref<64x64xi8> + // CHECK-DAG: %[[OUT1_CONS_PROD_LOCK:.*]] = aie.lock(%[[MEM_TILE]], 2) {init = 2 : i32, sym_name = "objFifo_out1_cons_prod_lock_0"} + // CHECK-DAG: %[[OUT1_CONS_CONS_LOCK:.*]] = aie.lock(%[[MEM_TILE]], 3) {init = 0 : i32, sym_name = "objFifo_out1_cons_cons_lock_0"} + // CHECK-DAG: %[[OUT1_BUFF_0:.*]] = aie.buffer(%[[COMP_TILE]]) {sym_name = "objFifo_out1_buff_0"} : memref<64x64xi8> + // CHECK-DAG: %[[OUT1_BUFF_1:.*]] = aie.buffer(%[[COMP_TILE]]) {sym_name = "objFifo_out1_buff_1"} : memref<64x64xi8> + // CHECK-DAG: %[[OUT1_PROD_LOCK:.*]] = aie.lock(%[[COMP_TILE]], 2) {init = 2 : i32, sym_name = "objFifo_out1_prod_lock_0"} + // CHECK-DAG: %[[OUT1_CONS_LOCK:.*]] = aie.lock(%[[COMP_TILE]], 3) {init = 0 : i32, sym_name = "objFifo_out1_cons_lock_0"} + // CHECK-DAG: %[[IN1_CONS_BUFF_0:.*]] = aie.buffer(%[[COMP_TILE]]) {sym_name = "objFifo_in1_cons_buff_0"} : memref<64x64xi8> + // CHECK-DAG: %[[IN1_CONS_BUFF_1:.*]] = aie.buffer(%[[COMP_TILE]]) {sym_name = "objFifo_in1_cons_buff_1"} : memref<64x64xi8> + // CHECK-DAG: %[[IN1_CONS_PROD_LOCK:.*]] = aie.lock(%[[COMP_TILE]], 0) {init = 2 : i32, sym_name = "objFifo_in1_cons_prod_lock_0"} + // CHECK-DAG: %[[IN1_CONS_CONS_LOCK:.*]] = aie.lock(%[[COMP_TILE]], 1) {init = 0 : i32, sym_name = "objFifo_in1_cons_cons_lock_0"} + // CHECK-DAG: %[[IN1_BUFF_0:.*]] = aie.buffer(%[[MEM_TILE]]) {sym_name = "objFifo_in1_buff_0"} : memref<64x64xi8> + // CHECK-DAG: %[[IN1_BUFF_1:.*]] = aie.buffer(%[[MEM_TILE]]) {sym_name = "objFifo_in1_buff_1"} : memref<64x64xi8> + // CHECK-DAG: %[[IN1_PROD_LOCK:.*]] = aie.lock(%[[MEM_TILE]], 0) {init = 2 : i32, sym_name = "objFifo_in1_prod_lock_0"} + // CHECK-DAG: %[[IN1_CONS_LOCK:.*]] = aie.lock(%[[MEM_TILE]], 1) {init = 0 : i32, sym_name = "objFifo_in1_cons_lock_0"} + // CHECK-DAG: %[[OUT0_CONS_PROD_LOCK:.*]] = aie.lock(%[[SHIM_TILE]], 2) + // CHECK-DAG: %[[OUT0_CONS_CONS_LOCK:.*]] = aie.lock(%[[SHIM_TILE]], 3) + // CHECK-DAG: %[[IN0_PROD_LOCK:.*]] = aie.lock(%[[SHIM_TILE]], 0) + // CHECK-DAG: %[[IN0_CONS_LOCK:.*]] = aie.lock(%[[SHIM_TILE]], 1) + // CHECK-DAG: aie.flow(%[[SHIM_TILE]], DMA : 0, %[[MEM_TILE]], DMA : 0) + // CHECK-DAG: aie.flow(%[[MEM_TILE]], DMA : 0, %[[COMP_TILE]], DMA : 0) + // CHECK-DAG: aie.flow(%[[COMP_TILE]], DMA : 0, %[[MEM_TILE]], DMA : 1) + // CHECK-DAG: aie.flow(%[[MEM_TILE]], DMA : 1, %[[SHIM_TILE]], DMA : 0) + // CHECK: %core_0_2 = aie.core(%[[COMP_TILE]]) { + // CHECK: aie.use_lock(%[[IN1_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.use_lock(%[[OUT1_PROD_LOCK]], AcquireGreaterEqual, 1) + // CHECK: %c0 = arith.constant 0 : index + // CHECK: %c1 = arith.constant 1 : index + // CHECK: %c64 = arith.constant 64 : index + // CHECK: %c12_i8 = arith.constant 12 : i8 + // CHECK: scf.for %arg0 = %c0 to %c64 step %c1 { + // CHECK: scf.for %arg1 = %c0 to %c64 step %c1 { + // CHECK: %0 = memref.load %[[IN1_CONS_BUFF_0]][%arg0, %arg1] : memref<64x64xi8> + // CHECK: %1 = arith.addi %0, %c12_i8 : i8 + // CHECK: memref.store %1, %[[IN1_CONS_BUFF_0]][%arg0, %arg1] : memref<64x64xi8> + // CHECK: } + // CHECK: } + // CHECK: aie.use_lock(%[[IN1_CONS_PROD_LOCK]], Release, 1) + // CHECK: aie.use_lock(%[[OUT1_CONS_LOCK]], Release, 1) + // CHECK: aie.end + // CHECK: } + // CHECK: aie.runtime_sequence(%arg0: memref<61x56xi8>, %arg1: memref<32xi8>, %arg2: memref<64x64xi8>) { + // CHECK: aiex.npu.dma_memcpy_nd(%arg0[0, 0, 0, 0][1, 1, 61, 56][0, 0, 56, 1]) {id = 0 : i64, metadata = @objFifo_in0_shim_alloc} : memref<61x56xi8> + // CHECK: aiex.npu.dma_memcpy_nd(%arg2[0, 0, 0, 0][1, 1, 64, 64][0, 0, 64, 1]) {id = 1 : i64, issue_token = true, metadata = @objFifo_out0_shim_alloc} : memref<64x64xi8> + // CHECK: aiex.npu.dma_wait {symbol = @objFifo_out0_shim_alloc} + // CHECK: } + // CHECK: aie.shim_dma_allocation @objFifo_in0_shim_alloc(%shim_noc_tile_0_0, MM2S, 0) + // CHECK: %memtile_dma_0_1 = aie.memtile_dma(%[[MEM_TILE]]) { + // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) + // CHECK: ^bb1: + // CHECK: aie.use_lock(%[[IN1_PROD_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[IN1_BUFF_0]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[IN1_CONS_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb2 + // CHECK: ^bb2: + // CHECK: aie.use_lock(%[[IN1_PROD_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[IN1_BUFF_1]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[IN1_CONS_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb1 + // CHECK: ^bb3: + // CHECK: %1 = aie.dma_start(MM2S, 0, ^bb4, ^bb6) + // CHECK: ^bb4: + // CHECK: aie.use_lock(%[[IN1_CONS_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[IN1_BUFF_0]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[IN1_PROD_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb5 + // CHECK: ^bb5: + // CHECK: aie.use_lock(%[[IN1_CONS_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[IN1_BUFF_1]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[IN1_PROD_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb4 + // CHECK: ^bb6: + // CHECK: %2 = aie.dma_start(S2MM, 1, ^bb7, ^bb9) + // CHECK: ^bb7: + // CHECK: aie.use_lock(%[[OUT1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[OUT1_CONS_BUFF_0]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[OUT1_CONS_CONS_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb8 + // CHECK: ^bb8: + // CHECK: aie.use_lock(%[[OUT1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[OUT1_CONS_BUFF_1]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[OUT1_CONS_CONS_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb7 + // CHECK: ^bb9: + // CHECK: %3 = aie.dma_start(MM2S, 1, ^bb10, ^bb12) + // CHECK: ^bb10: + // CHECK: aie.use_lock(%[[OUT1_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[OUT1_CONS_BUFF_0]] : memref<64x64xi8>, 0, 4096, [, ], [, ]) + // CHECK: aie.use_lock(%[[OUT1_CONS_PROD_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb11 + // CHECK: ^bb11: + // CHECK: aie.use_lock(%[[OUT1_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[OUT1_CONS_BUFF_1]] : memref<64x64xi8>, 0, 4096, [, ], [, ]) + // CHECK: aie.use_lock(%[[OUT1_CONS_PROD_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb10 + // CHECK: ^bb12: + // CHECK: aie.end + // CHECK: } + // CHECK: %mem_0_2 = aie.mem(%[[COMP_TILE]]) { + // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) + // CHECK: ^bb1: + // CHECK: aie.use_lock(%[[IN1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[IN1_CONS_BUFF_0]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[IN1_CONS_CONS_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb2 + // CHECK: ^bb2: + // CHECK: aie.use_lock(%[[IN1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[IN1_CONS_BUFF_1]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[IN1_CONS_CONS_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb1 + // CHECK: ^bb3: + // CHECK: %1 = aie.dma_start(MM2S, 0, ^bb4, ^bb6) + // CHECK: ^bb4: + // CHECK: aie.use_lock(%[[OUT1_CONS_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[OUT1_BUFF_0]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[OUT1_PROD_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb5 + // CHECK: ^bb5: + // CHECK: aie.use_lock(%[[OUT1_CONS_LOCK]], AcquireGreaterEqual, 1) + // CHECK: aie.dma_bd(%[[OUT1_BUFF_1]] : memref<64x64xi8>, 0, 4096) + // CHECK: aie.use_lock(%[[OUT1_PROD_LOCK]], Release, 1) + // CHECK: aie.next_bd ^bb4 + // CHECK: ^bb6: + // CHECK: aie.end + // CHECK: } + // CHECK: aie.shim_dma_allocation @objFifo_out0_shim_alloc(%shim_noc_tile_0_0, S2MM, 0) module { aie.device(npu1_1col) { diff --git a/test/objectFifo-stateful-transform/dma_transformations/nd_dma_fromStream_join.mlir b/test/objectFifo-stateful-transform/dma_transformations/nd_dma_fromStream_join.mlir index d485de3fc2a..183a749c67c 100644 --- a/test/objectFifo-stateful-transform/dma_transformations/nd_dma_fromStream_join.mlir +++ b/test/objectFifo-stateful-transform/dma_transformations/nd_dma_fromStream_join.mlir @@ -12,123 +12,123 @@ // CHECK: module @ndDMAObjFifoAIE2 { // CHECK: aie.device(xcve2302) { -// CHECK: %{{.*}}tile_1_1 = aie.tile(1, 1) -// CHECK: %{{.*}}tile_1_2 = aie.tile(1, 2) -// CHECK: %{{.*}}tile_2_3 = aie.tile(2, 3) -// CHECK: %{{.*}}tile_3_3 = aie.tile(3, 3) -// CHECK: %[[VAL_0:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "of2_cons_buff_0"} : memref<256xi32> -// CHECK: %[[VAL_1:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "of2_cons_buff_1"} : memref<256xi32> -// CHECK: %[[VAL_2:.*]] = aie.lock(%{{.*}}tile_2_3, 0) {init = 2 : i32, sym_name = "of2_cons_prod_lock_0"} -// CHECK: %[[VAL_3:.*]] = aie.lock(%{{.*}}tile_2_3, 1) {init = 0 : i32, sym_name = "of2_cons_cons_lock_0"} -// CHECK: %[[VAL_4:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_0"} : memref<256xi32> -// CHECK: %[[VAL_5:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_1"} : memref<256xi32> -// CHECK: %[[VAL_6:.*]] = aie.lock(%{{.*}}tile_1_1, 0) {init = 2 : i32, sym_name = "of2_prod_lock_0"} -// CHECK: %[[VAL_7:.*]] = aie.lock(%{{.*}}tile_1_1, 1) {init = 0 : i32, sym_name = "of2_cons_lock_0"} -// CHECK: %[[VAL_8:.*]] = aie.lock(%{{.*}}tile_1_1, 2) {init = 2 : i32, sym_name = "of2_prod_lock_1"} -// CHECK: %[[VAL_9:.*]] = aie.lock(%{{.*}}tile_1_1, 3) {init = 0 : i32, sym_name = "of2_cons_lock_1"} -// CHECK: %[[VAL_10:.*]] = aie.buffer(%{{.*}}tile_3_3) {sym_name = "of1_buff_0"} : memref<128xi32> -// CHECK: %[[VAL_11:.*]] = aie.buffer(%{{.*}}tile_3_3) {sym_name = "of1_buff_1"} : memref<128xi32> -// CHECK: %[[VAL_12:.*]] = aie.lock(%{{.*}}tile_3_3, 0) {init = 2 : i32, sym_name = "of1_prod_lock_0"} -// CHECK: %[[VAL_13:.*]] = aie.lock(%{{.*}}tile_3_3, 1) {init = 0 : i32, sym_name = "of1_cons_lock_0"} -// CHECK: %[[VAL_14:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_0"} : memref<128xi32> -// CHECK: %[[VAL_15:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_1"} : memref<128xi32> -// CHECK: %[[VAL_16:.*]] = aie.lock(%{{.*}}tile_1_2, 0) {init = 2 : i32, sym_name = "of0_prod_lock_0"} -// CHECK: %[[VAL_17:.*]] = aie.lock(%{{.*}}tile_1_2, 1) {init = 0 : i32, sym_name = "of0_cons_lock_0"} -// CHECK: aie.flow(%{{.*}}tile_1_2, DMA : 0, %{{.*}}tile_1_1, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_3_3, DMA : 0, %{{.*}}tile_1_1, DMA : 1) -// CHECK: aie.flow(%{{.*}}tile_1_1, DMA : 0, %{{.*}}tile_2_3, DMA : 0) -// CHECK: %mem_1_2 = aie.mem(%{{.*}}tile_1_2) { +// CHECK-DAG: %[[TILE_1_1:.*]] = aie.tile(1, 1) +// CHECK-DAG: %[[TILE_1_2:.*]] = aie.tile(1, 2) +// CHECK-DAG: %[[TILE_2_3:.*]] = aie.tile(2, 3) +// CHECK-DAG: %[[TILE_3_3:.*]] = aie.tile(3, 3) +// CHECK-DAG: %[[OF2_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_2_3]]) {sym_name = "of2_cons_buff_0"} : memref<256xi32> +// CHECK-DAG: %[[OF2_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_2_3]]) {sym_name = "of2_cons_buff_1"} : memref<256xi32> +// CHECK-DAG: %[[OF2_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_2_3]], 0) {init = 2 : i32, sym_name = "of2_cons_prod_lock_0"} +// CHECK-DAG: %[[OF2_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_2_3]], 1) {init = 0 : i32, sym_name = "of2_cons_cons_lock_0"} +// CHECK-DAG: %[[OF2_BUFF_0:.*]] = aie.buffer(%[[TILE_1_1]]) {sym_name = "of2_buff_0"} : memref<256xi32> +// CHECK-DAG: %[[OF2_BUFF_1:.*]] = aie.buffer(%[[TILE_1_1]]) {sym_name = "of2_buff_1"} : memref<256xi32> +// CHECK-DAG: %[[OF2_PROD_LOCK_0:.*]] = aie.lock(%[[TILE_1_1]], 0) {init = 2 : i32, sym_name = "of2_prod_lock_0"} +// CHECK-DAG: %[[OF2_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_1_1]], 1) {init = 0 : i32, sym_name = "of2_cons_lock_0"} +// CHECK-DAG: %[[OF2_PROD_LOCK_1:.*]] = aie.lock(%[[TILE_1_1]], 2) {init = 2 : i32, sym_name = "of2_prod_lock_1"} +// CHECK-DAG: %[[OF2_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_1_1]], 3) {init = 0 : i32, sym_name = "of2_cons_lock_1"} +// CHECK-DAG: %[[OF1_BUFF_0:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "of1_buff_0"} : memref<128xi32> +// CHECK-DAG: %[[OF1_BUFF_1:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "of1_buff_1"} : memref<128xi32> +// CHECK-DAG: %[[OF1_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 2 : i32, sym_name = "of1_prod_lock_0"} +// CHECK-DAG: %[[OF1_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "of1_cons_lock_0"} +// CHECK-DAG: %[[OF0_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_0"} : memref<128xi32> +// CHECK-DAG: %[[OF0_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_1"} : memref<128xi32> +// CHECK-DAG: %[[OF0_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 2 : i32, sym_name = "of0_prod_lock_0"} +// CHECK-DAG: %[[OF0_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of0_cons_lock_0"} +// CHECK-DAG: aie.flow(%[[TILE_1_2]], DMA : 0, %[[TILE_1_1]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_3_3]], DMA : 0, %[[TILE_1_1]], DMA : 1) +// CHECK-DAG: aie.flow(%[[TILE_1_1]], DMA : 0, %[[TILE_2_3]], DMA : 0) +// CHECK: %mem_1_2 = aie.mem(%[[TILE_1_2]]) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_17]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_14]] : memref<128xi32>, 0, 128) -// CHECK: aie.use_lock(%[[VAL_16]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF_0]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_17]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_15]] : memref<128xi32>, 0, 128) -// CHECK: aie.use_lock(%[[VAL_16]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF_1]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } -// CHECK: %memtile_dma_1_1 = aie.memtile_dma(%{{.*}}tile_1_1) { +// CHECK: %memtile_dma_1_1 = aie.memtile_dma(%[[TILE_1_1]]) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_4]] : memref<256xi32>, 0, 128, []) -// CHECK: aie.use_lock(%[[VAL_7]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<256xi32>, 0, 128, []) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_5]] : memref<256xi32>, 0, 128, []) -// CHECK: aie.use_lock(%[[VAL_7]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<256xi32>, 0, 128, []) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: %1 = aie.dma_start(S2MM, 1, ^bb4, ^bb6) -// CHECK: ^bb4: // 2 preds: ^bb3, ^bb5 -// CHECK: aie.use_lock(%[[VAL_8]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_4]] : memref<256xi32>, 128, 128, []) -// CHECK: aie.use_lock(%[[VAL_9]], Release, 1) +// CHECK: ^bb4: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<256xi32>, 128, 128, []) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb5 -// CHECK: ^bb5: // pred: ^bb4 -// CHECK: aie.use_lock(%[[VAL_8]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_5]] : memref<256xi32>, 128, 128, []) -// CHECK: aie.use_lock(%[[VAL_9]], Release, 1) +// CHECK: ^bb5: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<256xi32>, 128, 128, []) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb4 -// CHECK: ^bb6: // pred: ^bb3 +// CHECK: ^bb6: // CHECK: %2 = aie.dma_start(MM2S, 0, ^bb7, ^bb11) -// CHECK: ^bb7: // 2 preds: ^bb6, ^bb10 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_4]] : memref<256xi32>, 0, 128) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb7: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<256xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb8 -// CHECK: ^bb8: // pred: ^bb7 -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_4]] : memref<256xi32>, 128, 128) -// CHECK: aie.use_lock(%[[VAL_8]], Release, 1) +// CHECK: ^bb8: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<256xi32>, 128, 128) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb9 -// CHECK: ^bb9: // pred: ^bb8 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_5]] : memref<256xi32>, 0, 128) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb9: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<256xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb10 -// CHECK: ^bb10: // pred: ^bb9 -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_5]] : memref<256xi32>, 128, 128) -// CHECK: aie.use_lock(%[[VAL_8]], Release, 1) +// CHECK: ^bb10: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<256xi32>, 128, 128) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb7 -// CHECK: ^bb11: // pred: ^bb6 +// CHECK: ^bb11: // CHECK: aie.end // CHECK: } -// CHECK: %mem_3_3 = aie.mem(%{{.*}}tile_3_3) { +// CHECK: %mem_3_3 = aie.mem(%[[TILE_3_3]]) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_13]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_10]] : memref<128xi32>, 0, 128) -// CHECK: aie.use_lock(%[[VAL_12]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF_0]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_13]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_11]] : memref<128xi32>, 0, 128) -// CHECK: aie.use_lock(%[[VAL_12]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF_1]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } -// CHECK: %mem_2_3 = aie.mem(%{{.*}}tile_2_3) { +// CHECK: %mem_2_3 = aie.mem(%[[TILE_2_3]]) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_2]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_0]] : memref<256xi32>, 0, 256) -// CHECK: aie.use_lock(%[[VAL_3]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_CONS_BUFF_0]] : memref<256xi32>, 0, 256) +// CHECK: aie.use_lock(%[[OF2_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_2]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_1]] : memref<256xi32>, 0, 256) -// CHECK: aie.use_lock(%[[VAL_3]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF2_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_CONS_BUFF_1]] : memref<256xi32>, 0, 256) +// CHECK: aie.use_lock(%[[OF2_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } // CHECK: } diff --git a/test/objectFifo-stateful-transform/init_values/init_values_join_input_test.mlir b/test/objectFifo-stateful-transform/init_values/init_values_join_input_test.mlir index 9120fb6ccc8..116b498a4c8 100644 --- a/test/objectFifo-stateful-transform/init_values/init_values_join_input_test.mlir +++ b/test/objectFifo-stateful-transform/init_values/init_values_join_input_test.mlir @@ -12,107 +12,107 @@ // CHECK: module @init_join_input { // CHECK: aie.device(xcve2302) { -// CHECK: %[[VAL_0:.*]] = aie.tile(1, 0) -// CHECK: %{{.*}}tile_1_1 = aie.tile(1, 1) -// CHECK: %{{.*}}tile_1_2 = aie.tile(1, 2) -// CHECK: %{{.*}}tile_2_3 = aie.tile(2, 3) -// CHECK: %[[VAL_2:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_0"} : memref<8xi32> -// CHECK: %[[VAL_3:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_1"} : memref<8xi32> -// CHECK: %[[VAL_4:.*]] = aie.lock(%{{.*}}tile_1_1, 0) {init = 2 : i32, sym_name = "of2_prod_lock_0"} -// CHECK: %[[VAL_5:.*]] = aie.lock(%{{.*}}tile_1_1, 1) {init = 0 : i32, sym_name = "of2_cons_lock_0"} -// CHECK: %[[VAL_6:.*]] = aie.lock(%{{.*}}tile_1_1, 2) {init = 2 : i32, sym_name = "of2_prod_lock_1"} -// CHECK: %[[VAL_7:.*]] = aie.lock(%{{.*}}tile_1_1, 3) {init = 0 : i32, sym_name = "of2_cons_lock_1"} -// CHECK: %[[VAL_8:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "of1_buff_0"} : memref<2x2xi32> = dense<{{\[}}[0, 1], [2, 3]]> -// CHECK: %[[VAL_9:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "of1_buff_1"} : memref<2x2xi32> = dense<{{\[}}[4, 5], [6, 7]]> -// CHECK: %[[VAL_10:.*]] = aie.lock(%{{.*}}tile_2_3, 0) {init = 0 : i32, sym_name = "of1_prod_lock_0"} -// CHECK: %[[VAL_11:.*]] = aie.lock(%{{.*}}tile_2_3, 1) {init = 2 : i32, sym_name = "of1_cons_lock_0"} -// CHECK: %[[VAL_12:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_0"} : memref<2x2xi32> = dense<{{\[}}[0, 1], [2, 3]]> -// CHECK: %[[VAL_13:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_1"} : memref<2x2xi32> = dense<{{\[}}[4, 5], [6, 7]]> -// CHECK: %[[VAL_14:.*]] = aie.lock(%{{.*}}tile_1_2, 0) {init = 0 : i32, sym_name = "of0_prod_lock_0"} -// CHECK: %[[VAL_15:.*]] = aie.lock(%{{.*}}tile_1_2, 1) {init = 2 : i32, sym_name = "of0_cons_lock_0"} -// CHECK: aie.flow(%{{.*}}tile_1_2, DMA : 0, %{{.*}}tile_1_1, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_2_3, DMA : 0, %{{.*}}tile_1_1, DMA : 1) -// CHECK: aie.flow(%{{.*}}tile_1_1, DMA : 0, %{{.*}}tile_1_0, DMA : 0) -// CHECK: %mem_1_2 = aie.mem(%{{.*}}tile_1_2) { +// CHECK: %[[SHIM_TILE:.*]] = aie.tile(1, 0) +// CHECK: %[[MEM_TILE:.*]] = aie.tile(1, 1) +// CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) +// CHECK: %[[TILE_2_3:.*]] = aie.tile(2, 3) +// CHECK-DAG: %[[OF2_BUFF_0:.*]] = aie.buffer(%[[MEM_TILE]]) {sym_name = "of2_buff_0"} : memref<8xi32> +// CHECK-DAG: %[[OF2_BUFF_1:.*]] = aie.buffer(%[[MEM_TILE]]) {sym_name = "of2_buff_1"} : memref<8xi32> +// CHECK-DAG: %[[OF2_PROD_LOCK_0:.*]] = aie.lock(%[[MEM_TILE]], 0) {init = 2 : i32, sym_name = "of2_prod_lock_0"} +// CHECK-DAG: %[[OF2_CONS_LOCK_0:.*]] = aie.lock(%[[MEM_TILE]], 1) {init = 0 : i32, sym_name = "of2_cons_lock_0"} +// CHECK-DAG: %[[OF2_PROD_LOCK_1:.*]] = aie.lock(%[[MEM_TILE]], 2) {init = 2 : i32, sym_name = "of2_prod_lock_1"} +// CHECK-DAG: %[[OF2_CONS_LOCK_1:.*]] = aie.lock(%[[MEM_TILE]], 3) {init = 0 : i32, sym_name = "of2_cons_lock_1"} +// CHECK-DAG: %[[OF1_BUFF_0:.*]] = aie.buffer(%[[TILE_2_3]]) {sym_name = "of1_buff_0"} : memref<2x2xi32> = dense<{{\[}}[0, 1], [2, 3]]> +// CHECK-DAG: %[[OF1_BUFF_1:.*]] = aie.buffer(%[[TILE_2_3]]) {sym_name = "of1_buff_1"} : memref<2x2xi32> = dense<{{\[}}[4, 5], [6, 7]]> +// CHECK-DAG: %[[OF1_PROD_LOCK:.*]] = aie.lock(%[[TILE_2_3]], 0) {init = 0 : i32, sym_name = "of1_prod_lock_0"} +// CHECK-DAG: %[[OF1_CONS_LOCK:.*]] = aie.lock(%[[TILE_2_3]], 1) {init = 2 : i32, sym_name = "of1_cons_lock_0"} +// CHECK-DAG: %[[OF0_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_0"} : memref<2x2xi32> = dense<{{\[}}[0, 1], [2, 3]]> +// CHECK-DAG: %[[OF0_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_1"} : memref<2x2xi32> = dense<{{\[}}[4, 5], [6, 7]]> +// CHECK-DAG: %[[OF0_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "of0_prod_lock_0"} +// CHECK-DAG: %[[OF0_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 2 : i32, sym_name = "of0_cons_lock_0"} +// CHECK-DAG: aie.flow(%[[TILE_1_2]], DMA : 0, %[[MEM_TILE]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_2_3]], DMA : 0, %[[MEM_TILE]], DMA : 1) +// CHECK-DAG: aie.flow(%[[MEM_TILE]], DMA : 0, %[[SHIM_TILE]], DMA : 0) +// CHECK: %mem_1_2 = aie.mem(%[[TILE_1_2]]) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_15]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_12]] : memref<2x2xi32>, 0, 4) -// CHECK: aie.use_lock(%[[VAL_14]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF_0]] : memref<2x2xi32>, 0, 4) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_15]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_13]] : memref<2x2xi32>, 0, 4) -// CHECK: aie.use_lock(%[[VAL_14]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF_1]] : memref<2x2xi32>, 0, 4) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } -// CHECK: %memtile_dma_1_1 = aie.memtile_dma(%{{.*}}tile_1_1) { +// CHECK: %memtile_dma_1_1 = aie.memtile_dma(%[[MEM_TILE]]) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<8xi32>, 0, 4) -// CHECK: aie.use_lock(%[[VAL_5]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<8xi32>, 0, 4) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<8xi32>, 0, 4) -// CHECK: aie.use_lock(%[[VAL_5]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<8xi32>, 0, 4) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: %1 = aie.dma_start(S2MM, 1, ^bb4, ^bb6) -// CHECK: ^bb4: // 2 preds: ^bb3, ^bb5 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<8xi32>, 4, 4) -// CHECK: aie.use_lock(%[[VAL_7]], Release, 1) +// CHECK: ^bb4: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<8xi32>, 4, 4) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb5 -// CHECK: ^bb5: // pred: ^bb4 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<8xi32>, 4, 4) -// CHECK: aie.use_lock(%[[VAL_7]], Release, 1) +// CHECK: ^bb5: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<8xi32>, 4, 4) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb4 -// CHECK: ^bb6: // pred: ^bb3 +// CHECK: ^bb6: // CHECK: %2 = aie.dma_start(MM2S, 0, ^bb7, ^bb11) -// CHECK: ^bb7: // 2 preds: ^bb6, ^bb10 -// CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<8xi32>, 0, 4) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: ^bb7: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<8xi32>, 0, 4) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb8 -// CHECK: ^bb8: // pred: ^bb7 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<8xi32>, 4, 4) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb8: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<8xi32>, 4, 4) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb9 -// CHECK: ^bb9: // pred: ^bb8 -// CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<8xi32>, 0, 4) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: ^bb9: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<8xi32>, 0, 4) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb10 -// CHECK: ^bb10: // pred: ^bb9 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<8xi32>, 4, 4) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb10: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<8xi32>, 4, 4) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb7 -// CHECK: ^bb11: // pred: ^bb6 +// CHECK: ^bb11: // CHECK: aie.end // CHECK: } -// CHECK: %mem_2_3 = aie.mem(%{{.*}}tile_2_3) { +// CHECK: %mem_2_3 = aie.mem(%[[TILE_2_3]]) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_11]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_8]] : memref<2x2xi32>, 0, 4) -// CHECK: aie.use_lock(%[[VAL_10]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF_0]] : memref<2x2xi32>, 0, 4) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_11]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_9]] : memref<2x2xi32>, 0, 4) -// CHECK: aie.use_lock(%[[VAL_10]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF_1]] : memref<2x2xi32>, 0, 4) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } -// CHECK: aie.shim_dma_allocation @of2_shim_alloc(%[[VAL_0]], S2MM, 0) +// CHECK: aie.shim_dma_allocation @of2_shim_alloc(%[[SHIM_TILE]], S2MM, 0) // CHECK: } // CHECK: } diff --git a/test/objectFifo-stateful-transform/init_values/init_values_join_output_test.mlir b/test/objectFifo-stateful-transform/init_values/init_values_join_output_test.mlir index 08aa95de12b..7dc63568254 100644 --- a/test/objectFifo-stateful-transform/init_values/init_values_join_output_test.mlir +++ b/test/objectFifo-stateful-transform/init_values/init_values_join_output_test.mlir @@ -16,100 +16,100 @@ // CHECK: %{{.*}}tile_1_1 = aie.tile(1, 1) // CHECK: %{{.*}}tile_1_2 = aie.tile(1, 2) // CHECK: %{{.*}}tile_2_3 = aie.tile(2, 3) -// CHECK: %[[VAL_2:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_0"} : memref<4xi32> = dense<[0, 1, 2, 3]> -// CHECK: %[[VAL_3:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_1"} : memref<4xi32> = dense<[4, 5, 6, 7]> -// CHECK: %[[VAL_4:.*]] = aie.lock(%{{.*}}tile_1_1, 0) {init = 0 : i32, sym_name = "of2_prod_lock_0"} -// CHECK: %[[VAL_5:.*]] = aie.lock(%{{.*}}tile_1_1, 1) {init = 2 : i32, sym_name = "of2_cons_lock_0"} -// CHECK: %[[VAL_6:.*]] = aie.lock(%{{.*}}tile_1_1, 2) {init = 0 : i32, sym_name = "of2_prod_lock_1"} -// CHECK: %[[VAL_7:.*]] = aie.lock(%{{.*}}tile_1_1, 3) {init = 2 : i32, sym_name = "of2_cons_lock_1"} -// CHECK: %[[VAL_8:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "of1_buff_0"} : memref<2xi32> -// CHECK: %[[VAL_9:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "of1_buff_1"} : memref<2xi32> -// CHECK: %[[VAL_10:.*]] = aie.lock(%{{.*}}tile_2_3, 0) {init = 2 : i32, sym_name = "of1_prod_lock_0"} -// CHECK: %[[VAL_11:.*]] = aie.lock(%{{.*}}tile_2_3, 1) {init = 0 : i32, sym_name = "of1_cons_lock_0"} -// CHECK: %[[VAL_12:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_0"} : memref<2xi32> -// CHECK: %[[VAL_13:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_1"} : memref<2xi32> -// CHECK: %[[VAL_14:.*]] = aie.lock(%{{.*}}tile_1_2, 0) {init = 2 : i32, sym_name = "of0_prod_lock_0"} -// CHECK: %[[VAL_15:.*]] = aie.lock(%{{.*}}tile_1_2, 1) {init = 0 : i32, sym_name = "of0_cons_lock_0"} -// CHECK: aie.flow(%{{.*}}tile_1_2, DMA : 0, %{{.*}}tile_1_1, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_2_3, DMA : 0, %{{.*}}tile_1_1, DMA : 1) -// CHECK: aie.flow(%{{.*}}tile_1_1, DMA : 0, %{{.*}}tile_1_0, DMA : 0) +// CHECK-DAG: %[[OF2_BUFF_0:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_0"} : memref<4xi32> = dense<[0, 1, 2, 3]> +// CHECK-DAG: %[[OF2_BUFF_1:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_1"} : memref<4xi32> = dense<[4, 5, 6, 7]> +// CHECK-DAG: %[[OF2_PROD_LOCK_0:.*]] = aie.lock(%{{.*}}tile_1_1, 0) {init = 0 : i32, sym_name = "of2_prod_lock_0"} +// CHECK-DAG: %[[OF2_CONS_LOCK_0:.*]] = aie.lock(%{{.*}}tile_1_1, 1) {init = 2 : i32, sym_name = "of2_cons_lock_0"} +// CHECK-DAG: %[[OF2_PROD_LOCK_1:.*]] = aie.lock(%{{.*}}tile_1_1, 2) {init = 0 : i32, sym_name = "of2_prod_lock_1"} +// CHECK-DAG: %[[OF2_CONS_LOCK_1:.*]] = aie.lock(%{{.*}}tile_1_1, 3) {init = 2 : i32, sym_name = "of2_cons_lock_1"} +// CHECK-DAG: %[[OF1_BUFF_0:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "of1_buff_0"} : memref<2xi32> +// CHECK-DAG: %[[OF1_BUFF_1:.*]] = aie.buffer(%{{.*}}tile_2_3) {sym_name = "of1_buff_1"} : memref<2xi32> +// CHECK-DAG: %[[OF1_PROD_LOCK:.*]] = aie.lock(%{{.*}}tile_2_3, 0) {init = 2 : i32, sym_name = "of1_prod_lock_0"} +// CHECK-DAG: %[[OF1_CONS_LOCK:.*]] = aie.lock(%{{.*}}tile_2_3, 1) {init = 0 : i32, sym_name = "of1_cons_lock_0"} +// CHECK-DAG: %[[OF0_BUFF_0:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_0"} : memref<2xi32> +// CHECK-DAG: %[[OF0_BUFF_1:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_1"} : memref<2xi32> +// CHECK-DAG: %[[OF0_PROD_LOCK:.*]] = aie.lock(%{{.*}}tile_1_2, 0) {init = 2 : i32, sym_name = "of0_prod_lock_0"} +// CHECK-DAG: %[[OF0_CONS_LOCK:.*]] = aie.lock(%{{.*}}tile_1_2, 1) {init = 0 : i32, sym_name = "of0_cons_lock_0"} +// CHECK-DAG: aie.flow(%{{.*}}tile_1_2, DMA : 0, %{{.*}}tile_1_1, DMA : 0) +// CHECK-DAG: aie.flow(%{{.*}}tile_2_3, DMA : 0, %{{.*}}tile_1_1, DMA : 1) +// CHECK-DAG: aie.flow(%{{.*}}tile_1_1, DMA : 0, %{{.*}}tile_1_0, DMA : 0) // CHECK: %mem_1_2 = aie.mem(%{{.*}}tile_1_2) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_15]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_12]] : memref<2xi32>, 0, 2) -// CHECK: aie.use_lock(%[[VAL_14]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF_0]] : memref<2xi32>, 0, 2) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_15]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_13]] : memref<2xi32>, 0, 2) -// CHECK: aie.use_lock(%[[VAL_14]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF_1]] : memref<2xi32>, 0, 2) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } // CHECK: %memtile_dma_1_1 = aie.memtile_dma(%{{.*}}tile_1_1) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<4xi32>, 0, 2) -// CHECK: aie.use_lock(%[[VAL_5]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<4xi32>, 0, 2) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<4xi32>, 0, 2) -// CHECK: aie.use_lock(%[[VAL_5]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<4xi32>, 0, 2) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: %1 = aie.dma_start(S2MM, 1, ^bb4, ^bb6) -// CHECK: ^bb4: // 2 preds: ^bb3, ^bb5 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<4xi32>, 2, 2) -// CHECK: aie.use_lock(%[[VAL_7]], Release, 1) +// CHECK: ^bb4: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<4xi32>, 2, 2) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb5 -// CHECK: ^bb5: // pred: ^bb4 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<4xi32>, 2, 2) -// CHECK: aie.use_lock(%[[VAL_7]], Release, 1) +// CHECK: ^bb5: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<4xi32>, 2, 2) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb4 -// CHECK: ^bb6: // pred: ^bb3 +// CHECK: ^bb6: // CHECK: %2 = aie.dma_start(MM2S, 0, ^bb7, ^bb11) -// CHECK: ^bb7: // 2 preds: ^bb6, ^bb10 -// CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<4xi32>, 0, 2) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: ^bb7: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<4xi32>, 0, 2) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb8 -// CHECK: ^bb8: // pred: ^bb7 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<4xi32>, 2, 2) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb8: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<4xi32>, 2, 2) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb9 -// CHECK: ^bb9: // pred: ^bb8 -// CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<4xi32>, 0, 2) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: ^bb9: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<4xi32>, 0, 2) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb10 -// CHECK: ^bb10: // pred: ^bb9 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_3]] : memref<4xi32>, 2, 2) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb10: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_1]] : memref<4xi32>, 2, 2) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb7 -// CHECK: ^bb11: // pred: ^bb6 +// CHECK: ^bb11: // CHECK: aie.end // CHECK: } // CHECK: %mem_2_3 = aie.mem(%{{.*}}tile_2_3) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb3) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 -// CHECK: aie.use_lock(%[[VAL_11]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_8]] : memref<2xi32>, 0, 2) -// CHECK: aie.use_lock(%[[VAL_10]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF_0]] : memref<2xi32>, 0, 2) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_11]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_9]] : memref<2xi32>, 0, 2) -// CHECK: aie.use_lock(%[[VAL_10]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF_1]] : memref<2xi32>, 0, 2) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb3: // pred: ^bb0 +// CHECK: ^bb3: // CHECK: aie.end // CHECK: } // CHECK: aie.shim_dma_allocation @of2_shim_alloc(%[[VAL_0]], S2MM, 0) diff --git a/test/objectFifo-stateful-transform/repeat_count/link_join_repeat_count_test.mlir b/test/objectFifo-stateful-transform/repeat_count/link_join_repeat_count_test.mlir index 91c5e401984..35714e0793e 100644 --- a/test/objectFifo-stateful-transform/repeat_count/link_join_repeat_count_test.mlir +++ b/test/objectFifo-stateful-transform/repeat_count/link_join_repeat_count_test.mlir @@ -12,94 +12,96 @@ // CHECK: module @memtileRepeat { // CHECK: aie.device(npu1) { -// CHECK: %{{.*}}tile_1_0 = aie.tile(1, 0) -// CHECK: %{{.*}}tile_1_1 = aie.tile(1, 1) -// CHECK: %{{.*}}tile_1_2 = aie.tile(1, 2) -// CHECK: %{{.*}}tile_3_3 = aie.tile(3, 3) -// CHECK: %[[VAL_2:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of2_buff_0"} : memref<32xi32> -// CHECK: %[[VAL_3:.*]] = aie.lock(%{{.*}}tile_1_1, 0) {init = 1 : i32, sym_name = "of2_prod_lock_0"} -// CHECK: %[[VAL_4:.*]] = aie.lock(%{{.*}}tile_1_1, 1) {init = 0 : i32, sym_name = "of2_cons_lock_0"} -// CHECK: %[[VAL_5:.*]] = aie.lock(%{{.*}}tile_1_1, 2) {init = 1 : i32, sym_name = "of2_prod_lock_1"} -// CHECK: %[[VAL_6:.*]] = aie.lock(%{{.*}}tile_1_1, 3) {init = 0 : i32, sym_name = "of2_cons_lock_1"} -// CHECK: %[[VAL_7:.*]] = aie.buffer(%{{.*}}tile_3_3) {sym_name = "of1_buff_0"} : memref<16xi32> -// CHECK: %[[VAL_8:.*]] = aie.lock(%{{.*}}tile_3_3, 0) {init = 3 : i32, sym_name = "of1_prod_lock_0"} -// CHECK: %[[VAL_9:.*]] = aie.lock(%{{.*}}tile_3_3, 1) {init = 0 : i32, sym_name = "of1_cons_lock_0"} -// CHECK: %[[VAL_10:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of0_buff_0"} : memref<16xi32> -// CHECK: %[[VAL_11:.*]] = aie.lock(%{{.*}}tile_1_2, 0) {init = 3 : i32, sym_name = "of0_prod_lock_0"} -// CHECK: %[[VAL_12:.*]] = aie.lock(%{{.*}}tile_1_2, 1) {init = 0 : i32, sym_name = "of0_cons_lock_0"} -// CHECK: aie.flow(%{{.*}}tile_1_2, DMA : 0, %{{.*}}tile_1_1, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_3_3, DMA : 0, %{{.*}}tile_1_1, DMA : 1) -// CHECK: aie.flow(%{{.*}}tile_1_1, DMA : 0, %{{.*}}tile_1_0, DMA : 0) -// CHECK: %mem_1_2 = aie.mem(%{{.*}}tile_1_2) { +// CHECK-DAG: %[[SHIM_TILE:.*]] = aie.tile(1, 0) +// CHECK-DAG: %[[MEM_TILE:.*]] = aie.tile(1, 1) +// CHECK-DAG: %[[TILE_1_2:.*]] = aie.tile(1, 2) +// CHECK-DAG: %[[TILE_3_3:.*]] = aie.tile(3, 3) +// CHECK-DAG: %[[OF2_CONS_PROD_LOCK:.*]] = aie.lock(%[[SHIM_TILE]], 0) {init = 0 : i32, sym_name = "of2_cons_prod_lock_0"} +// CHECK-DAG: %[[OF2_CONS_CONS_LOCK:.*]] = aie.lock(%[[SHIM_TILE]], 1) {init = 0 : i32, sym_name = "of2_cons_cons_lock_0"} +// CHECK-DAG: %[[OF2_BUFF:.*]] = aie.buffer(%[[MEM_TILE]]) {sym_name = "of2_buff_0"} : memref<32xi32> +// CHECK-DAG: %[[OF2_PROD_LOCK_0:.*]] = aie.lock(%[[MEM_TILE]], 0) {init = 1 : i32, sym_name = "of2_prod_lock_0"} +// CHECK-DAG: %[[OF2_CONS_LOCK_0:.*]] = aie.lock(%[[MEM_TILE]], 1) {init = 0 : i32, sym_name = "of2_cons_lock_0"} +// CHECK-DAG: %[[OF2_PROD_LOCK_1:.*]] = aie.lock(%[[MEM_TILE]], 2) {init = 1 : i32, sym_name = "of2_prod_lock_1"} +// CHECK-DAG: %[[OF2_CONS_LOCK_1:.*]] = aie.lock(%[[MEM_TILE]], 3) {init = 0 : i32, sym_name = "of2_cons_lock_1"} +// CHECK-DAG: %[[OF1_BUFF:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "of1_buff_0"} : memref<16xi32> +// CHECK-DAG: %[[OF1_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 3 : i32, sym_name = "of1_prod_lock_0"} +// CHECK-DAG: %[[OF1_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "of1_cons_lock_0"} +// CHECK-DAG: %[[OF0_BUFF:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_0"} : memref<16xi32> +// CHECK-DAG: %[[OF0_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 3 : i32, sym_name = "of0_prod_lock_0"} +// CHECK-DAG: %[[OF0_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of0_cons_lock_0"} +// CHECK-DAG: aie.flow(%[[TILE_1_2]], DMA : 0, %[[MEM_TILE]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_3_3]], DMA : 0, %[[MEM_TILE]], DMA : 1) +// CHECK-DAG: aie.flow(%[[MEM_TILE]], DMA : 0, %[[SHIM_TILE]], DMA : 0) +// CHECK: %mem_1_2 = aie.mem(%[[TILE_1_2]]) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb4) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 -// CHECK: aie.use_lock(%[[VAL_12]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_10]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_11]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF]] : memref<16xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_12]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_10]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_11]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF]] : memref<16xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb3 -// CHECK: ^bb3: // pred: ^bb2 -// CHECK: aie.use_lock(%[[VAL_12]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_10]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_11]], Release, 1) +// CHECK: ^bb3: +// CHECK: aie.use_lock(%[[OF0_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_BUFF]] : memref<16xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF0_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb4: // pred: ^bb0 +// CHECK: ^bb4: // CHECK: aie.end // CHECK: } -// CHECK: %memtile_dma_1_1 = aie.memtile_dma(%{{.*}}tile_1_1) { +// CHECK: %memtile_dma_1_1 = aie.memtile_dma(%[[MEM_TILE]]) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb2) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb1 -// CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<32xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF]] : memref<32xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb2: // pred: ^bb0 +// CHECK: ^bb2: // CHECK: %1 = aie.dma_start(S2MM, 1, ^bb3, ^bb4) -// CHECK: ^bb3: // 2 preds: ^bb2, ^bb3 -// CHECK: aie.use_lock(%[[VAL_5]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<32xi32>, 16, 16) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb3: +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF]] : memref<32xi32>, 16, 16) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb3 -// CHECK: ^bb4: // pred: ^bb2 +// CHECK: ^bb4: // CHECK: %2 = aie.dma_start(MM2S, 0, ^bb5, ^bb7) -// CHECK: ^bb5: // 2 preds: ^bb4, ^bb6 -// CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<32xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_3]], Release, 1) +// CHECK: ^bb5: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF]] : memref<32xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb6 -// CHECK: ^bb6: // pred: ^bb5 -// CHECK: aie.use_lock(%[[VAL_6]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<32xi32>, 16, 16) -// CHECK: aie.use_lock(%[[VAL_5]], Release, 1) +// CHECK: ^bb6: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF]] : memref<32xi32>, 16, 16) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_1]], Release, 1) // CHECK: aie.next_bd ^bb5 -// CHECK: ^bb7: // pred: ^bb4 +// CHECK: ^bb7: // CHECK: aie.end // CHECK: } -// CHECK: %mem_3_3 = aie.mem(%{{.*}}tile_3_3) { +// CHECK: %mem_3_3 = aie.mem(%[[TILE_3_3]]) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb4) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_7]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_8]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF]] : memref<16xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_7]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_8]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF]] : memref<16xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb3 -// CHECK: ^bb3: // pred: ^bb2 -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_7]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_8]], Release, 1) +// CHECK: ^bb3: +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_BUFF]] : memref<16xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb4: // pred: ^bb0 +// CHECK: ^bb4: // CHECK: aie.end // CHECK: } -// CHECK: aie.shim_dma_allocation @of2_shim_alloc(%shim_noc_tile_1_0, S2MM, 0) +// CHECK: aie.shim_dma_allocation @of2_shim_alloc(%{{.*}}, S2MM, 0) // CHECK: } // CHECK: } diff --git a/test/objectFifo-stateful-transform/repeat_count/link_repeat_count_test.mlir b/test/objectFifo-stateful-transform/repeat_count/link_repeat_count_test.mlir index 9fd49fdf62b..a7cd926c2f5 100644 --- a/test/objectFifo-stateful-transform/repeat_count/link_repeat_count_test.mlir +++ b/test/objectFifo-stateful-transform/repeat_count/link_repeat_count_test.mlir @@ -12,100 +12,100 @@ // CHECK: module @memtileRepeat { // CHECK: aie.device(npu1) { -// CHECK: %{{.*}}tile_1_0 = aie.tile(1, 0) -// CHECK: %{{.*}}tile_1_1 = aie.tile(1, 1) -// CHECK: %{{.*}}tile_2_1 = aie.tile(2, 1) -// CHECK: %{{.*}}tile_1_2 = aie.tile(1, 2) -// CHECK: %{{.*}}tile_3_3 = aie.tile(3, 3) -// CHECK: %[[VAL_2:.*]] = aie.buffer(%{{.*}}tile_2_1) {sym_name = "of2_cons_buff_0"} : memref<32xi32> -// CHECK: %[[VAL_3:.*]] = aie.lock(%{{.*}}tile_2_1, 0) {init = 1 : i32, sym_name = "of2_cons_prod_lock_0"} -// CHECK: %[[VAL_4:.*]] = aie.lock(%{{.*}}tile_2_1, 1) {init = 0 : i32, sym_name = "of2_cons_cons_lock_0"} -// CHECK: %[[VAL_5:.*]] = aie.buffer(%{{.*}}tile_3_3) {sym_name = "of2_buff_0"} : memref<32xi32> -// CHECK: %[[VAL_6:.*]] = aie.lock(%{{.*}}tile_3_3, 0) {init = 3 : i32, sym_name = "of2_prod_lock_0"} -// CHECK: %[[VAL_7:.*]] = aie.lock(%{{.*}}tile_3_3, 1) {init = 0 : i32, sym_name = "of2_cons_lock_0"} -// CHECK: %[[VAL_8:.*]] = aie.buffer(%{{.*}}tile_1_2) {sym_name = "of1_cons_buff_0"} : memref<16xi32> -// CHECK: %[[VAL_9:.*]] = aie.lock(%{{.*}}tile_1_2, 0) {init = 1 : i32, sym_name = "of1_cons_prod_lock_0"} -// CHECK: %[[VAL_10:.*]] = aie.lock(%{{.*}}tile_1_2, 1) {init = 0 : i32, sym_name = "of1_cons_cons_lock_0"} -// CHECK: %[[VAL_11:.*]] = aie.buffer(%{{.*}}tile_1_1) {sym_name = "of0_cons_buff_0"} : memref<32xi32> -// CHECK: %[[VAL_12:.*]] = aie.lock(%{{.*}}tile_1_1, 0) {init = 3 : i32, sym_name = "of0_cons_prod_lock_0"} -// CHECK: %[[VAL_13:.*]] = aie.lock(%{{.*}}tile_1_1, 1) {init = 0 : i32, sym_name = "of0_cons_cons_lock_0"} -// CHECK: aie.flow(%{{.*}}tile_1_0, DMA : 0, %{{.*}}tile_1_1, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_1_1, DMA : 0, %{{.*}}tile_1_2, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_3_3, DMA : 0, %{{.*}}tile_2_1, DMA : 0) -// CHECK: aie.flow(%{{.*}}tile_2_1, DMA : 0, %{{.*}}tile_1_0, DMA : 0) -// CHECK: aie.shim_dma_allocation @of0_shim_alloc(%shim_noc_tile_1_0, MM2S, 0) -// CHECK: %memtile_dma_1_1 = aie.memtile_dma(%{{.*}}tile_1_1) { +// CHECK-DAG: %[[TILE_1_0:.*]] = aie.tile(1, 0) +// CHECK-DAG: %[[TILE_1_1:.*]] = aie.tile(1, 1) +// CHECK-DAG: %[[TILE_2_1:.*]] = aie.tile(2, 1) +// CHECK-DAG: %[[TILE_1_2:.*]] = aie.tile(1, 2) +// CHECK-DAG: %[[TILE_3_3:.*]] = aie.tile(3, 3) +// CHECK-DAG: %[[OF2_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_2_1]]) {sym_name = "of2_cons_buff_0"} : memref<32xi32> +// CHECK-DAG: %[[OF2_CONS_PROD_LOCK_0:.*]] = aie.lock(%[[TILE_2_1]], 0) {init = 1 : i32, sym_name = "of2_cons_prod_lock_0"} +// CHECK-DAG: %[[OF2_CONS_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_2_1]], 1) {init = 0 : i32, sym_name = "of2_cons_cons_lock_0"} +// CHECK-DAG: %[[OF2_BUFF_0:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "of2_buff_0"} : memref<32xi32> +// CHECK-DAG: %[[OF2_PROD_LOCK_0:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 3 : i32, sym_name = "of2_prod_lock_0"} +// CHECK-DAG: %[[OF2_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "of2_cons_lock_0"} +// CHECK-DAG: %[[OF1_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of1_cons_buff_0"} : memref<16xi32> +// CHECK-DAG: %[[OF1_CONS_PROD_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 1 : i32, sym_name = "of1_cons_prod_lock_0"} +// CHECK-DAG: %[[OF1_CONS_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of1_cons_cons_lock_0"} +// CHECK-DAG: %[[OF0_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_1_1]]) {sym_name = "of0_cons_buff_0"} : memref<32xi32> +// CHECK-DAG: %[[OF0_CONS_PROD_LOCK_0:.*]] = aie.lock(%[[TILE_1_1]], 0) {init = 3 : i32, sym_name = "of0_cons_prod_lock_0"} +// CHECK-DAG: %[[OF0_CONS_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_1_1]], 1) {init = 0 : i32, sym_name = "of0_cons_cons_lock_0"} +// CHECK-DAG: aie.flow(%[[TILE_1_0]], DMA : 0, %[[TILE_1_1]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_1_1]], DMA : 0, %[[TILE_1_2]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_3_3]], DMA : 0, %[[TILE_2_1]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_2_1]], DMA : 0, %[[TILE_1_0]], DMA : 0) +// CHECK-DAG: aie.shim_dma_allocation @of0_shim_alloc(%shim_noc_tile_1_0, MM2S, 0) +// CHECK: %memtile_dma_1_1 = aie.memtile_dma(%[[TILE_1_1]]) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb2) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb1 -// CHECK: aie.use_lock(%[[VAL_12]], AcquireGreaterEqual, 3) -// CHECK: aie.dma_bd(%[[VAL_11]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_13]], Release, 3) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK_0]], AcquireGreaterEqual, 3) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK_0]], Release, 3) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb2: // pred: ^bb0 +// CHECK: ^bb2: // CHECK: %1 = aie.dma_start(MM2S, 0, ^bb3, ^bb6) -// CHECK: ^bb3: // 2 preds: ^bb2, ^bb5 -// CHECK: aie.use_lock(%[[VAL_13]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_11]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_12]], Release, 1) +// CHECK: ^bb3: +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb4 -// CHECK: ^bb4: // pred: ^bb3 -// CHECK: aie.use_lock(%[[VAL_13]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_11]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_12]], Release, 1) +// CHECK: ^bb4: +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb5 -// CHECK: ^bb5: // pred: ^bb4 -// CHECK: aie.use_lock(%[[VAL_13]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_11]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_12]], Release, 1) +// CHECK: ^bb5: +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb3 -// CHECK: ^bb6: // pred: ^bb2 +// CHECK: ^bb6: // CHECK: aie.end // CHECK: } -// CHECK: %mem_1_2 = aie.mem(%{{.*}}tile_1_2) { +// CHECK: %mem_1_2 = aie.mem(%[[TILE_1_2]]) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb2) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb1 -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_8]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[VAL_10]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF1_CONS_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_CONS_BUFF_0]] : memref<16xi32>, 0, 16) +// CHECK: aie.use_lock(%[[OF1_CONS_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb2: // pred: ^bb0 +// CHECK: ^bb2: // CHECK: aie.end // CHECK: } -// CHECK: %mem_3_3 = aie.mem(%{{.*}}tile_3_3) { +// CHECK: %mem_3_3 = aie.mem(%[[TILE_3_3]]) { // CHECK: %0 = aie.dma_start(MM2S, 0, ^bb1, ^bb4) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_5]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb2 -// CHECK: ^bb2: // pred: ^bb1 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_5]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb3 -// CHECK: ^bb3: // pred: ^bb2 -// CHECK: aie.use_lock(%[[VAL_7]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_5]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_6]], Release, 1) +// CHECK: ^bb3: +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb4: // pred: ^bb0 +// CHECK: ^bb4: // CHECK: aie.end // CHECK: } -// CHECK: %memtile_dma_2_1 = aie.memtile_dma(%{{.*}}tile_2_1) { +// CHECK: %memtile_dma_2_1 = aie.memtile_dma(%[[TILE_2_1]]) { // CHECK: %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb2) -// CHECK: ^bb1: // 2 preds: ^bb0, ^bb1 -// CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_CONS_PROD_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_CONS_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF2_CONS_CONS_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb1 -// CHECK: ^bb2: // pred: ^bb0 +// CHECK: ^bb2: // CHECK: %1 = aie.dma_start(MM2S, 0, ^bb3, ^bb4) -// CHECK: ^bb3: // 2 preds: ^bb2, ^bb3 -// CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_2]] : memref<32xi32>, 0, 32) -// CHECK: aie.use_lock(%[[VAL_3]], Release, 1) +// CHECK: ^bb3: +// CHECK: aie.use_lock(%[[OF2_CONS_CONS_LOCK_0]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_CONS_BUFF_0]] : memref<32xi32>, 0, 32) +// CHECK: aie.use_lock(%[[OF2_CONS_PROD_LOCK_0]], Release, 1) // CHECK: aie.next_bd ^bb3 -// CHECK: ^bb4: // pred: ^bb2 +// CHECK: ^bb4: // CHECK: aie.end // CHECK: } // CHECK: aie.shim_dma_allocation @of3_shim_alloc(%shim_noc_tile_1_0, S2MM, 0)