Xilinx · yenjames · May 26, 2026 · May 26, 2026 · May 27, 2026
@@ -756,10 +756,16 @@ struct AIEObjectFifoStatefulTransformPass
               }
             }
 
-            // try to allocate on neighbor tiles
+            // Try neighbor with more remaining capacity first to avoid
+            // blocking adjacent MemTiles that also need spill space.
             if (!neighborTiles.empty()) {
+              llvm::stable_sort(neighborTiles, [&](TileOp a, TileOp b) {
+                return calculateCurrentUsedMemory(a, state.buffersPerFifo,
+                                                  buffers) <
+                       calculateCurrentUsedMemory(b, state.buffersPerFifo,
+                                                  buffers);
+              });
               for (auto &tile : neighborTiles) {
-                // Try to allocate on this neighbor tile
                 int neighborUsedMemory = calculateCurrentUsedMemory(
                     tile, state.buffersPerFifo, buffers);
                 if (static_cast<int>(neighborUsedMemory + totalSizeBytes) <=
@@ -1977,7 +1983,39 @@ struct AIEObjectFifoStatefulTransformPass
     //   the acquires/releases (uses of the FIFO).
     // - Global release counter tracker to keep track of the objectFifo state
     //===------------------------------------------------------------------===//
-    for (auto createOp : device.getOps<ObjectFifoCreateOp>()) {
+    // Process MemTile ObjectFifos largest-first so large buffers get
+    // priority for home placement and spill targets are chosen before
+    // smaller fifos consume neighbor capacity.
+    SmallVector<ObjectFifoCreateOp> sortedCreateOps(
+        device.getOps<ObjectFifoCreateOp>());
+    if (!sortedCreateOps.empty()) {
+      DataLayout dataLayout = DataLayout::closest(sortedCreateOps[0]);
+      // Sort only among MemTile-producer fifos by buffer size descending.
+      // Non-MemTile fifos keep their IR-order positions undisturbed.
+      auto getBufSize = [&](ObjectFifoCreateOp op) -> int64_t {
+        auto fifoType = llvm::cast<AIEObjectFifoType>(op.getElemType());
+        auto elemType = llvm::cast<MemRefType>(fifoType.getElementType());
+        int64_t bits = dataLayout.getTypeSizeInBits(elemType.getElementType());
+        return elemType.getNumElements() * bits / 8;
+      };
+      SmallVector<size_t> memTileSlots;
+      SmallVector<ObjectFifoCreateOp> memTileFifos;
+      for (size_t i = 0; i < sortedCreateOps.size(); i++) {
+        auto prodTile = dyn_cast<TileOp>(
+            sortedCreateOps[i].getProducerTile().getDefiningOp());
+        if (prodTile && prodTile.isMemTile()) {
+          memTileSlots.push_back(i);
+          memTileFifos.push_back(sortedCreateOps[i]);
+        }
+      }
+      llvm::stable_sort(memTileFifos,
+                        [&](ObjectFifoCreateOp a, ObjectFifoCreateOp b) {
+                          return getBufSize(a) > getBufSize(b);
+                        });
+      for (size_t i = 0; i < memTileSlots.size(); i++)
+        sortedCreateOps[memTileSlots[i]] = memTileFifos[i];
+    }
+    for (auto createOp : sortedCreateOps) {
 
       int share_direction = 0;
       bool shared = !requiresDMAs(createOp, share_direction, state);

@@ -0,0 +1,64 @@
+//===- memtile_spill_order.mlir ----------------------------------*- MLIR -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (C) 2026, Advanced Micro Devices, Inc.
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s
+
+// Three MemTile ObjectFifos where IR-order allocation fails but
+// size-sorted (large-first) allocation succeeds.
+//
+// @block at col 3 (depth 1, 524000B): fills col 3 so @large can't spill right.
+// @small at col 1 (depth 2, 260000B): fits locally (520000 < 524288).
+// @large at col 2 (depth 2, 400000B): must spill one buffer.
+//
+// Without large-first sort (IR order: block, small, large):
+//   @small fills col 1 (4288B remaining), @large can't spill left or right → FAIL.
+//
+// With large-first sort (block 524000, large 400000, small 260000):
+//   @large spills to col 1 (empty), @small spills to col 0 → SUCCESS.
+
+// large_cons_buff_0 on col 2 (home), large_cons_buff_1 spills to col 1
+// CHECK-DAG: aie.buffer(%mem_tile_2_1) {sym_name = "large_cons_buff_0"}
+// CHECK-DAG: aie.buffer(%mem_tile_1_1) {sym_name = "large_cons_buff_1"}
+// small buffers both spill to col 0
+// CHECK-DAG: aie.buffer(%mem_tile_0_1) {sym_name = "small_cons_buff_0"}
+// CHECK-DAG: aie.buffer(%mem_tile_0_1) {sym_name = "small_cons_buff_1"}
+// block buffer on col 3 (home)
+// CHECK-DAG: aie.buffer(%mem_tile_3_1) {sym_name = "block_cons_buff_0"}
+
+module {
+  aie.device(npu2) {
+    %shim0 = aie.tile(0, 0)
+    %shim1 = aie.tile(1, 0)
+    %shim2 = aie.tile(2, 0)
+    %shim3 = aie.tile(3, 0)
+    %mem1 = aie.tile(1, 1)
+    %mem2 = aie.tile(2, 1)
+    %mem3 = aie.tile(3, 1)
+    %core1 = aie.tile(1, 2)
+    %core2 = aie.tile(2, 2)
+    %core3 = aie.tile(3, 2)
+
+    // IR order: block first, then small, then large.
+    aie.objectfifo @block(%shim3, {%mem3}, 1 : i32) : !aie.objectfifo<memref<524000xi8>>
+
+    aie.objectfifo @small(%shim1, {%mem1}, 2 : i32) : !aie.objectfifo<memref<260000xi8>>
+
+    aie.objectfifo @large(%shim2, {%mem2}, 2 : i32) : !aie.objectfifo<memref<400000xi8>>
+
+    aie.objectfifo @small_out(%mem1, {%core1}, 2 : i32) : !aie.objectfifo<memref<260000xi8>>
+    aie.objectfifo.link [@small] -> [@small_out]([] [])
+
+    aie.objectfifo @large_out(%mem2, {%core2}, 2 : i32) : !aie.objectfifo<memref<400000xi8>>
+    aie.objectfifo.link [@large] -> [@large_out]([] [])
+
+    aie.objectfifo @block_out(%mem3, {%core3}, 1 : i32) : !aie.objectfifo<memref<524000xi8>>
+    aie.objectfifo.link [@block] -> [@block_out]([] [])
+  }
+}