diff --git a/mlir/include/air/Conversion/AIRToAIESchedulingUtils.h b/mlir/include/air/Conversion/AIRToAIESchedulingUtils.h index a16581896..6d4ad6446 100644 --- a/mlir/include/air/Conversion/AIRToAIESchedulingUtils.h +++ b/mlir/include/air/Conversion/AIRToAIESchedulingUtils.h @@ -201,8 +201,7 @@ class ShimDMAAllocator : public DMAAllocator { FailureOr allocNewDmaChannel(air::MemcpyInterface &memcpyOp, int col, int row, - std::vector &dma_ops, - std::string colAllocConstraint = "same_column"); + std::vector &dma_ops); FailureOr allocNewDmaChannel(air::MemcpyInterface &memcpyOp, diff --git a/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp b/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp index b65a97c02..73f8f296e 100644 --- a/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp +++ b/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp @@ -1014,9 +1014,10 @@ air::ShimDMAAllocator::ShimDMAAllocator(AIE::DeviceOp device) } } -FailureOr air::ShimDMAAllocator::allocNewDmaChannel( - air::MemcpyInterface &memcpyOp, int col, int row, - std::vector &dma_ops, std::string colAllocConstraint) { +FailureOr +air::ShimDMAAllocator::allocNewDmaChannel(air::MemcpyInterface &memcpyOp, + int col, int row, + std::vector &dma_ops) { auto isMM2S = isTileOutbound(memcpyOp, dmaMemorySpace); if (failed(isMM2S)) return failure(); @@ -1043,12 +1044,6 @@ FailureOr air::ShimDMAAllocator::allocNewDmaChannel( } AIE::TileOp tile = nullptr; int colIdx = 0; - if (colAllocConstraint == "same_column") { - // Attempt to use shim dma channels within the same column. - auto it = find(dma_columns.begin(), dma_columns.end(), col); - if (it != dma_columns.end()) - colIdx = it - dma_columns.begin(); - } int dma_col = dma_columns[colIdx]; // For packet-flow ops, reuse an existing physical channel on this shim tile diff --git a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_npu.mlir b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_npu.mlir index e5c723abb..5ce2fd831 100644 --- a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_npu.mlir +++ b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_npu.mlir @@ -782,8 +782,6 @@ func.func @func11(%arg0: memref<128xbf16>, %arg1: memref<128xbf16>) { // CHECK: aie.device(npu1) // CHECK: %[[tile_0_0:.*]] = aie.tile(0, 0) // CHECK: %[[tile_1_0:.*]] = aie.tile(1, 0) -// CHECK: %[[tile_2_0:.*]] = aie.tile(2, 0) -// CHECK: %[[tile_3_0:.*]] = aie.tile(3, 0) // CHECK: %[[tile_0_1:.*]] = aie.tile(0, 1) // CHECK: %[[tile_1_1:.*]] = aie.tile(1, 1) // CHECK: %[[tile_2_1:.*]] = aie.tile(2, 1) @@ -841,9 +839,9 @@ func.func @func11(%arg0: memref<128xbf16>, %arg1: memref<128xbf16>) { // CHECK: aie.core(%[[tile_1_2]]) // CHECK: aie.core(%[[tile_0_2]]) // CHECK: aie.flow(%[[tile_0_1]], DMA : 0, %[[tile_0_0]], DMA : 0) -// CHECK: aie.flow(%[[tile_1_1]], DMA : 0, %[[tile_1_0]], DMA : 0) -// CHECK: aie.flow(%[[tile_2_1]], DMA : 0, %[[tile_2_0]], DMA : 0) -// CHECK: aie.flow(%[[tile_3_1]], DMA : 0, %[[tile_3_0]], DMA : 0) +// CHECK: aie.flow(%[[tile_1_1]], DMA : 0, %[[tile_0_0]], DMA : 1) +// CHECK: aie.flow(%[[tile_2_1]], DMA : 0, %[[tile_1_0]], DMA : 0) +// CHECK: aie.flow(%[[tile_3_1]], DMA : 0, %[[tile_1_0]], DMA : 1) // CHECK: aie.flow(%[[tile_0_2]], DMA : 0, %[[tile_0_1]], DMA : 0) // CHECK: aie.flow(%[[tile_0_3]], DMA : 0, %[[tile_0_1]], DMA : 1) // CHECK: aie.flow(%[[tile_0_4]], DMA : 0, %[[tile_0_1]], DMA : 2) @@ -1082,12 +1080,12 @@ module { // CHECK: %[[tile_0_3:.*]] = aie.tile(0, 3) // CHECK: %[[tile_1_3:.*]] = aie.tile(1, 3) // CHECK: aie.flow(%[[tile_0_0]], DMA : 0, %[[tile_0_2]], DMA : 0) -// CHECK: aie.flow(%[[tile_1_0]], DMA : 0, %[[tile_1_2]], DMA : 0) -// CHECK: aie.flow(%[[tile_0_0]], DMA : 1, %[[tile_0_3]], DMA : 0) +// CHECK: aie.flow(%[[tile_0_0]], DMA : 1, %[[tile_1_2]], DMA : 0) +// CHECK: aie.flow(%[[tile_1_0]], DMA : 0, %[[tile_0_3]], DMA : 0) // CHECK: aie.flow(%[[tile_1_0]], DMA : 1, %[[tile_1_3]], DMA : 0) // CHECK: aie.shim_dma_allocation @air_channel_0_0(%[[tile_0_0]], MM2S, 0) -// CHECK: aie.shim_dma_allocation @air_channel_0_1(%[[tile_1_0]], MM2S, 0) -// CHECK: aie.shim_dma_allocation @air_channel_0_2(%[[tile_0_0]], MM2S, 1) +// CHECK: aie.shim_dma_allocation @air_channel_0_1(%[[tile_0_0]], MM2S, 1) +// CHECK: aie.shim_dma_allocation @air_channel_0_2(%[[tile_1_0]], MM2S, 0) // CHECK: aie.shim_dma_allocation @air_channel_0_3(%[[tile_1_0]], MM2S, 1) // CHECK: func.func @func14 // CHECK: air.channel.put @channel_0{{.*}} metadataArray = [{base = "air_channel_0_0", index = 0 : i32}, {base = "air_channel_0_1", index = 1 : i32}, {base = "air_channel_0_2", index = 2 : i32}, {base = "air_channel_0_3", index = 3 : i32}]} : (memref<32x16xi32>) diff --git a/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_npu.mlir b/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_npu.mlir index 764deb0e4..e481c298d 100644 --- a/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_npu.mlir +++ b/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_npu.mlir @@ -9,7 +9,6 @@ // CHECK-LABEL: aie.device(npu1) @segment_0 { // CHECK: %[[tile_0_0:.*]] = aie.tile(0, 0) -// CHECK: %[[tile_1_0:.*]] = aie.tile(1, 0) // CHECK: %[[tile_0_1:.*]] = aie.tile(0, 1) // CHECK: %[[tile_1_1:.*]] = aie.tile(1, 1) // CHECK: %[[tile_0_2:.*]] = aie.tile(0, 2) @@ -29,7 +28,7 @@ // CHECK-DAG: aie.buffer(%[[tile_1_1]]) {{{.*}}} : memref<128x64xi32, 1> // CHECK-COUNT-20: aie.buffer({{.*}}) {{{.*}}} : memref<32x32xi32, 2> // CHECK: aie.flow(%[[tile_0_0]], DMA : 0, %[[tile_0_1]], DMA : 0) -// CHECK: aie.flow(%[[tile_1_0]], DMA : 0, %[[tile_1_1]], DMA : 0) +// CHECK: aie.flow(%[[tile_0_0]], DMA : 1, %[[tile_1_1]], DMA : 0) // CHECK: aie.flow(%[[tile_0_1]], DMA : 0, %[[tile_0_0]], DMA : 0) // CHECK: aie.flow(%[[tile_0_1]], DMA : 1, %[[tile_0_2]], DMA : 0) // CHECK: aie.flow(%[[tile_0_1]], DMA : 2, %[[tile_0_3]], DMA : 0) diff --git a/mlir/test/Conversion/AIRToAIE/good_shim_packet_flow_npu_4col.mlir b/mlir/test/Conversion/AIRToAIE/good_shim_packet_flow_npu_4col.mlir index ac6af7d8a..3c8aba862 100644 --- a/mlir/test/Conversion/AIRToAIE/good_shim_packet_flow_npu_4col.mlir +++ b/mlir/test/Conversion/AIRToAIE/good_shim_packet_flow_npu_4col.mlir @@ -10,10 +10,10 @@ // 4x4 NPU1 array. // WHOLEARRAY: %[[shim_noc_tile_0_0:.*]] = aie.tile(0, 0) -// WHOLEARRAY: %[[shim_noc_tile_1_0:.*]] = aie.tile(1, 0) -// WHOLEARRAY: %[[shim_noc_tile_2_0:.*]] = aie.tile(2, 0) -// WHOLEARRAY: %[[shim_noc_tile_3_0:.*]] = aie.tile(3, 0) // WHOLEARRAY: aie.shim_dma_allocation @air_channel_2_0(%[[shim_noc_tile_0_0]], MM2S, 0) +// WHOLEARRAY: aie.shim_dma_allocation @air_channel_2_1(%[[shim_noc_tile_0_0]], MM2S, 0) +// WHOLEARRAY: aie.shim_dma_allocation @air_channel_2_2(%[[shim_noc_tile_0_0]], MM2S, 0) +// WHOLEARRAY: aie.shim_dma_allocation @air_channel_2_3(%[[shim_noc_tile_0_0]], MM2S, 0) #map = affine_map<()[s0] -> (s0 * 256)>