diff --git a/mlir/include/air/Conversion/AIRToAIESchedulingUtils.h b/mlir/include/air/Conversion/AIRToAIESchedulingUtils.h
index a16581896..6d4ad6446 100644
--- a/mlir/include/air/Conversion/AIRToAIESchedulingUtils.h
+++ b/mlir/include/air/Conversion/AIRToAIESchedulingUtils.h
@@ -201,8 +201,7 @@ class ShimDMAAllocator : public DMAAllocator {
 
   FailureOr<allocation_info_t>
   allocNewDmaChannel(air::MemcpyInterface &memcpyOp, int col, int row,
-                     std::vector<Operation *> &dma_ops,
-                     std::string colAllocConstraint = "same_column");
+                     std::vector<Operation *> &dma_ops);
 
   FailureOr<allocation_info_t>
   allocNewDmaChannel(air::MemcpyInterface &memcpyOp,
diff --git a/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp b/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp
index b65a97c02..73f8f296e 100644
--- a/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp
+++ b/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp
@@ -1014,9 +1014,10 @@ air::ShimDMAAllocator::ShimDMAAllocator(AIE::DeviceOp device)
   }
 }
 
-FailureOr<air::allocation_info_t> air::ShimDMAAllocator::allocNewDmaChannel(
-    air::MemcpyInterface &memcpyOp, int col, int row,
-    std::vector<Operation *> &dma_ops, std::string colAllocConstraint) {
+FailureOr<air::allocation_info_t>
+air::ShimDMAAllocator::allocNewDmaChannel(air::MemcpyInterface &memcpyOp,
+                                          int col, int row,
+                                          std::vector<Operation *> &dma_ops) {
   auto isMM2S = isTileOutbound(memcpyOp, dmaMemorySpace);
   if (failed(isMM2S))
     return failure();
@@ -1043,12 +1044,6 @@ FailureOr<air::allocation_info_t> air::ShimDMAAllocator::allocNewDmaChannel(
   }
   AIE::TileOp tile = nullptr;
   int colIdx = 0;
-  if (colAllocConstraint == "same_column") {
-    // Attempt to use shim dma channels within the same column.
-    auto it = find(dma_columns.begin(), dma_columns.end(), col);
-    if (it != dma_columns.end())
-      colIdx = it - dma_columns.begin();
-  }
   int dma_col = dma_columns[colIdx];
 
   // For packet-flow ops, reuse an existing physical channel on this shim tile
diff --git a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_npu.mlir b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_npu.mlir
index e5c723abb..5ce2fd831 100644
--- a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_npu.mlir
+++ b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_npu.mlir
@@ -782,8 +782,6 @@ func.func @func11(%arg0: memref<128xbf16>, %arg1: memref<128xbf16>) {
 // CHECK: aie.device(npu1)
 // CHECK: %[[tile_0_0:.*]] = aie.tile(0, 0)
 // CHECK: %[[tile_1_0:.*]] = aie.tile(1, 0)
-// CHECK: %[[tile_2_0:.*]] = aie.tile(2, 0)
-// CHECK: %[[tile_3_0:.*]] = aie.tile(3, 0)
 // CHECK: %[[tile_0_1:.*]] = aie.tile(0, 1)
 // CHECK: %[[tile_1_1:.*]] = aie.tile(1, 1)
 // CHECK: %[[tile_2_1:.*]] = aie.tile(2, 1)
@@ -841,9 +839,9 @@ func.func @func11(%arg0: memref<128xbf16>, %arg1: memref<128xbf16>) {
 // CHECK: aie.core(%[[tile_1_2]])
 // CHECK: aie.core(%[[tile_0_2]])
 // CHECK: aie.flow(%[[tile_0_1]], DMA : 0, %[[tile_0_0]], DMA : 0)
-// CHECK: aie.flow(%[[tile_1_1]], DMA : 0, %[[tile_1_0]], DMA : 0)
-// CHECK: aie.flow(%[[tile_2_1]], DMA : 0, %[[tile_2_0]], DMA : 0)
-// CHECK: aie.flow(%[[tile_3_1]], DMA : 0, %[[tile_3_0]], DMA : 0)
+// CHECK: aie.flow(%[[tile_1_1]], DMA : 0, %[[tile_0_0]], DMA : 1)
+// CHECK: aie.flow(%[[tile_2_1]], DMA : 0, %[[tile_1_0]], DMA : 0)
+// CHECK: aie.flow(%[[tile_3_1]], DMA : 0, %[[tile_1_0]], DMA : 1)
 // CHECK: aie.flow(%[[tile_0_2]], DMA : 0, %[[tile_0_1]], DMA : 0)
 // CHECK: aie.flow(%[[tile_0_3]], DMA : 0, %[[tile_0_1]], DMA : 1)
 // CHECK: aie.flow(%[[tile_0_4]], DMA : 0, %[[tile_0_1]], DMA : 2)
@@ -1082,12 +1080,12 @@ module {
 // CHECK: %[[tile_0_3:.*]] = aie.tile(0, 3)
 // CHECK: %[[tile_1_3:.*]] = aie.tile(1, 3)
 // CHECK: aie.flow(%[[tile_0_0]], DMA : 0, %[[tile_0_2]], DMA : 0)
-// CHECK: aie.flow(%[[tile_1_0]], DMA : 0, %[[tile_1_2]], DMA : 0)
-// CHECK: aie.flow(%[[tile_0_0]], DMA : 1, %[[tile_0_3]], DMA : 0)
+// CHECK: aie.flow(%[[tile_0_0]], DMA : 1, %[[tile_1_2]], DMA : 0)
+// CHECK: aie.flow(%[[tile_1_0]], DMA : 0, %[[tile_0_3]], DMA : 0)
 // CHECK: aie.flow(%[[tile_1_0]], DMA : 1, %[[tile_1_3]], DMA : 0)
 // CHECK: aie.shim_dma_allocation @air_channel_0_0(%[[tile_0_0]], MM2S, 0)
-// CHECK: aie.shim_dma_allocation @air_channel_0_1(%[[tile_1_0]], MM2S, 0)
-// CHECK: aie.shim_dma_allocation @air_channel_0_2(%[[tile_0_0]], MM2S, 1)
+// CHECK: aie.shim_dma_allocation @air_channel_0_1(%[[tile_0_0]], MM2S, 1)
+// CHECK: aie.shim_dma_allocation @air_channel_0_2(%[[tile_1_0]], MM2S, 0)
 // CHECK: aie.shim_dma_allocation @air_channel_0_3(%[[tile_1_0]], MM2S, 1)
 // CHECK: func.func @func14
 // CHECK: air.channel.put  @channel_0{{.*}} metadataArray = [{base = "air_channel_0_0", index = 0 : i32}, {base = "air_channel_0_1", index = 1 : i32}, {base = "air_channel_0_2", index = 2 : i32}, {base = "air_channel_0_3", index = 3 : i32}]} : (memref<32x16xi32>)
diff --git a/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_npu.mlir b/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_npu.mlir
index 764deb0e4..e481c298d 100644
--- a/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_npu.mlir
+++ b/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_npu.mlir
@@ -9,7 +9,6 @@
 
 // CHECK-LABEL:   aie.device(npu1) @segment_0 {
 // CHECK:   %[[tile_0_0:.*]] = aie.tile(0, 0)
-// CHECK:   %[[tile_1_0:.*]] = aie.tile(1, 0)
 // CHECK:   %[[tile_0_1:.*]] = aie.tile(0, 1)
 // CHECK:   %[[tile_1_1:.*]] = aie.tile(1, 1)
 // CHECK:   %[[tile_0_2:.*]] = aie.tile(0, 2)
@@ -29,7 +28,7 @@
 // CHECK-DAG:    aie.buffer(%[[tile_1_1]]) {{{.*}}} : memref<128x64xi32, 1>
 // CHECK-COUNT-20:    aie.buffer({{.*}}) {{{.*}}} : memref<32x32xi32, 2>
 // CHECK:    aie.flow(%[[tile_0_0]], DMA : 0, %[[tile_0_1]], DMA : 0)
-// CHECK:    aie.flow(%[[tile_1_0]], DMA : 0, %[[tile_1_1]], DMA : 0)
+// CHECK:    aie.flow(%[[tile_0_0]], DMA : 1, %[[tile_1_1]], DMA : 0)
 // CHECK:    aie.flow(%[[tile_0_1]], DMA : 0, %[[tile_0_0]], DMA : 0)
 // CHECK:    aie.flow(%[[tile_0_1]], DMA : 1, %[[tile_0_2]], DMA : 0)
 // CHECK:    aie.flow(%[[tile_0_1]], DMA : 2, %[[tile_0_3]], DMA : 0)
diff --git a/mlir/test/Conversion/AIRToAIE/good_shim_packet_flow_npu_4col.mlir b/mlir/test/Conversion/AIRToAIE/good_shim_packet_flow_npu_4col.mlir
index ac6af7d8a..3c8aba862 100644
--- a/mlir/test/Conversion/AIRToAIE/good_shim_packet_flow_npu_4col.mlir
+++ b/mlir/test/Conversion/AIRToAIE/good_shim_packet_flow_npu_4col.mlir
@@ -10,10 +10,10 @@
 // 4x4 NPU1 array.
 
 // WHOLEARRAY: %[[shim_noc_tile_0_0:.*]] = aie.tile(0, 0)
-// WHOLEARRAY: %[[shim_noc_tile_1_0:.*]] = aie.tile(1, 0)
-// WHOLEARRAY: %[[shim_noc_tile_2_0:.*]] = aie.tile(2, 0)
-// WHOLEARRAY: %[[shim_noc_tile_3_0:.*]] = aie.tile(3, 0)
 // WHOLEARRAY: aie.shim_dma_allocation @air_channel_2_0(%[[shim_noc_tile_0_0]], MM2S, 0)
+// WHOLEARRAY: aie.shim_dma_allocation @air_channel_2_1(%[[shim_noc_tile_0_0]], MM2S, 0)
+// WHOLEARRAY: aie.shim_dma_allocation @air_channel_2_2(%[[shim_noc_tile_0_0]], MM2S, 0)
+// WHOLEARRAY: aie.shim_dma_allocation @air_channel_2_3(%[[shim_noc_tile_0_0]], MM2S, 0)
 
 
 #map = affine_map<()[s0] -> (s0 * 256)>