diff --git a/include/aie/Dialect/AIE/IR/AIEOps.td b/include/aie/Dialect/AIE/IR/AIEOps.td index 1e41135d5b0..a129bf7cd42 100644 --- a/include/aie/Dialect/AIE/IR/AIEOps.td +++ b/include/aie/Dialect/AIE/IR/AIEOps.td @@ -1834,6 +1834,11 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol] Variadic:$consumerTiles, AIE_ObjectFifo_Depth:$elemNumber, TypeAttrOf:$elemType, + // Optional consumer element type for asymmetric transfer granularity. + // Producer sends elemType-sized transfers, consumer receives + // consumerElemType-sized transfers. Producer element size must be + // an integer multiple of consumer element size. + OptionalAttr>:$consumerElemType, BDDimLayoutArrayAttr:$dimensionsToStream, BDDimLayoutArrayArrayAttr:$dimensionsFromStreamPerConsumer, DefaultValuedAttr:$via_DMA, @@ -1863,6 +1868,7 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol] `,` $elemNumber `)` attr-dict `:` $elemType + custom($consumerElemType) custom(ref($elemNumber), ref($elemType), $initValues) }]; @@ -1886,6 +1892,13 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol] } AIE::BDDimLayoutArrayAttr getDimensionsFromStream(mlir::Value consumerTile); + + /// Returns the consumer element type if set, otherwise the producer type. + mlir::Type getConsumerElemTypeOrDefault() { + if (auto ct = getConsumerElemType()) + return ct.value(); + return getElemType(); + } }]; let builders = [ diff --git a/lib/Dialect/AIE/IR/AIEDialect.cpp b/lib/Dialect/AIE/IR/AIEDialect.cpp index 4edfff37418..92df2b0cad2 100644 --- a/lib/Dialect/AIE/IR/AIEDialect.cpp +++ b/lib/Dialect/AIE/IR/AIEDialect.cpp @@ -536,6 +536,30 @@ LogicalResult ObjectFifoCreateOp::verify() { return emitError("`iter_count` is currently only supported on MemTiles"); } + if (getConsumerElemType().has_value()) { + auto consType = + llvm::dyn_cast(getConsumerElemType().value()); + if (!consType) + return emitError("consumer element type must be an " + "!aie.objectfifo> type"); + auto prodType = llvm::cast(getElemType()); + auto prodMemref = prodType.getElementType(); + auto consMemref = consType.getElementType(); + if (prodMemref.getElementType() != consMemref.getElementType()) + return emitError("producer and consumer must have the same scalar " + "element type, but got ") + << prodMemref.getElementType() << " vs " + << consMemref.getElementType(); + int64_t prodSize = prodMemref.getNumElements(); + int64_t consSize = consMemref.getNumElements(); + if (consSize <= 0) + return emitError("consumer element count must be positive"); + if (prodSize % consSize != 0) + return emitError("producer element size (") + << prodSize << ") must be an integer multiple of consumer " + << "element size (" << consSize << ")"; + } + return success(); } @@ -637,6 +661,24 @@ void xilinx::AIE::printObjectFifoConsumerTiles( } } +static void printObjectFifoConsumerElemType(OpAsmPrinter &p, + ObjectFifoCreateOp op, + TypeAttr consumerElemType) { + if (consumerElemType) + p << " -> " << consumerElemType; +} + +static ParseResult parseObjectFifoConsumerElemType(OpAsmParser &parser, + TypeAttr &consumerElemType) { + if (failed(parser.parseOptionalArrow())) + return success(); // no consumer type + Type type; + if (parser.parseType(type)) + return failure(); + consumerElemType = TypeAttr::get(type); + return success(); +} + static void printObjectFifoInitValues(OpAsmPrinter &p, ObjectFifoCreateOp op, Attribute numElem, TypeAttr type, Attribute initValues) { @@ -999,7 +1041,15 @@ LogicalResult ObjectFifoAcquireOp::verify() { auto objFifoSubviewElem = llvm::cast(getResult().getType()) .getElementType(); - if (objFifoElem != objFifoSubviewElem) + // Also accept the consumer element type for asymmetric ObjectFifos + auto objFifoConsType = llvm::dyn_cast( + getObjectFifo().getConsumerElemTypeOrDefault()); + if (!objFifoConsType) + return emitOpError("ObjectFifo consumer element type must be an " + "!aie.objectfifo> type"); + auto objFifoConsElem = objFifoConsType.getElementType(); + if (objFifoElem != objFifoSubviewElem && + objFifoConsElem != objFifoSubviewElem) return emitOpError( "ObjectFifo element and ObjectFifoSubview element must match.\n"); diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index fbf8deac919..576f8bb5fa6 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -277,6 +277,9 @@ struct AIEObjectFifoStatefulTransformPass if (createOp.getAieStream()) return true; + if (createOp.getConsumerElemType().has_value()) + return true; + if (createOp.getConsumerTiles().size() == 1 && createOp.getDimensionsToStream().empty()) { @@ -1898,7 +1901,9 @@ struct AIEObjectFifoStatefulTransformPass } builder.setInsertionPointAfter(createOp); - auto datatype = llvm::cast(createOp.getElemType()); + // Use consumer element type if specified (asymmetric transfer) + auto datatype = llvm::cast( + createOp.getConsumerElemTypeOrDefault()); auto consumerObjFifoSize = builder.getIntegerAttr(builder.getI32Type(), consumerDepth); // rename and replace split objectFifo diff --git a/python/dialects/aie.py b/python/dialects/aie.py index 07b91da6814..130c4f48223 100644 --- a/python/dialects/aie.py +++ b/python/dialects/aie.py @@ -466,8 +466,14 @@ def __init__( padDimensions=None, disable_synchronization=None, iter_count=None, + consumer_datatype=None, ): self.datatype = try_convert_np_type_to_mlir_type(datatype) + self.consumer_datatype = ( + try_convert_np_type_to_mlir_type(consumer_datatype) + if consumer_datatype is not None + else None + ) if not isinstance(consumerTiles, List): consumerTiles = [consumerTiles] if dimensionsFromStreamPerConsumer is None: @@ -475,6 +481,9 @@ def __init__( if dimensionsToStream is None: dimensionsToStream = [] of_Ty = TypeAttr.get(ObjectFifoType.get(self.datatype)) + consumerElemType = None + if self.consumer_datatype is not None: + consumerElemType = TypeAttr.get(ObjectFifoType.get(self.consumer_datatype)) if initValues is not None: values = [] for e in initValues: @@ -498,20 +507,22 @@ def __init__( initValues=initValues, iter_count=iter_count, ) + if consumerElemType is not None: + self.attributes["consumerElemType"] = consumerElemType def acquire(self, port, num_elem): - subview_t = ObjectFifoSubviewType.get(self.datatype) + # Use consumer_datatype for consumer-side acquire if available + dt = self.datatype + if self.consumer_datatype is not None and port == ObjectFifoPort.Consume: + dt = self.consumer_datatype + subview_t = ObjectFifoSubviewType.get(dt) acq = ObjectFifoAcquireOp(subview_t, port, self.sym_name.value, num_elem) objects = [] if acq.size.value == 1: - return ObjectFifoSubviewAccessOp( - self.datatype, acq.subview, acq.size.value - 1 - ).result + return ObjectFifoSubviewAccessOp(dt, acq.subview, acq.size.value - 1).result for i in range(acq.size.value): - objects.append( - ObjectFifoSubviewAccessOp(self.datatype, acq.subview, i).result - ) + objects.append(ObjectFifoSubviewAccessOp(dt, acq.subview, i).result) return objects def release(self, port, num_elem): diff --git a/python/iron/dataflow/objectfifo.py b/python/iron/dataflow/objectfifo.py index 132ece43c05..b79f9f886fc 100644 --- a/python/iron/dataflow/objectfifo.py +++ b/python/iron/dataflow/objectfifo.py @@ -47,6 +47,7 @@ def __init__( plio: bool = False, pad_dimensions: list[Sequence[int]] | None = None, init_values: list[np.ndarray] | None = None, + consumer_obj_type: type[np.ndarray] | None = None, ): """Construct an ObjectFifo. @@ -58,6 +59,7 @@ def __init__( dims_from_stream_per_cons (list[Sequence[int]] | None, optional): List of data layout transformations applied by each consumer when data is read from the AXI stream, described as pairs of (size, stride) from highest to lowest dimension. Defaults to None. plio (bool, optional): Whether the ObjectFifo uses PLIO connections. Defaults to False. init_values (list[np.ndarray] | None, optional): Per-buffer static initial values for the producer endpoint. One ndarray per producer-side buffer; the producer tile must be able to hold static data at design startup (e.g. a MemTile). Lowers to the ``initValues`` attribute on the underlying ``aie.objectfifo`` op. Defaults to None. + consumer_obj_type (type[np.ndarray] | None, optional): Consumer element type for asymmetric transfer granularity. When set, the producer sends obj_type-sized transfers and the consumer receives consumer_obj_type-sized transfers. Producer element count must be an integer multiple of consumer element count. Defaults to None. Raises: ValueError: If ``depth`` is provided and is less than 1. @@ -82,6 +84,7 @@ def __init__( self._resolving = False self._iter_count: int | None = None self._init_values: list[np.ndarray] | None = init_values + self._consumer_obj_type: type[np.ndarray] | None = consumer_obj_type @classmethod def __get_index(cls) -> int: @@ -290,6 +293,11 @@ def resolve( for con in self._cons ] + consumer_datatype = ( + np_ndarray_type_to_memref_type(self._consumer_obj_type) + if self._consumer_obj_type is not None + else None + ) self._op = object_fifo( self.name, self._prod_tile_op(), @@ -302,6 +310,7 @@ def resolve( padDimensions=self._pad_dimensions, iter_count=self._iter_count, initValues=self._init_values, + consumer_datatype=consumer_datatype, ) if isinstance(self._prod.endpoint, ObjectFifoLink): diff --git a/test/dialect/AIE/objectfifo_bad.mlir b/test/dialect/AIE/objectfifo_bad.mlir index 2380e71da22..ec4e8404d41 100644 --- a/test/dialect/AIE/objectfifo_bad.mlir +++ b/test/dialect/AIE/objectfifo_bad.mlir @@ -50,6 +50,39 @@ aie.device(xcve2302) { %tile12 = aie.tile(1, 2) %tile23 = aie.tile(2, 3) - aie.objectfifo @of0 (%tile12, {%tile23}, 2 : i32) : !aie.objectfifo> = [dense<[[4, 5], [6, 7]]> : memref<2x2xi32>, + aie.objectfifo @of0 (%tile12, {%tile23}, 2 : i32) : !aie.objectfifo> = [dense<[[4, 5], [6, 7]]> : memref<2x2xi32>, dense<[[0, 1, 2], [3, 4, 5]]> : memref<2x2xi32>] } + +// ----- + +// CHECK: producer element size (35) must be an integer multiple of consumer element size (10) + +aie.device(npu2) { + %tile01 = aie.tile(0, 1) + %tile02 = aie.tile(0, 2) + + aie.objectfifo @of0 (%tile01, {%tile02}, 1 : i32) : !aie.objectfifo> -> !aie.objectfifo> +} + +// ----- + +// CHECK: producer and consumer must have the same scalar element type + +aie.device(npu2) { + %tile01 = aie.tile(0, 1) + %tile02 = aie.tile(0, 2) + + aie.objectfifo @of0 (%tile01, {%tile02}, 1 : i32) : !aie.objectfifo> -> !aie.objectfifo> +} + +// ----- + +// CHECK: consumer element count must be positive + +aie.device(npu2) { + %tile01 = aie.tile(0, 1) + %tile02 = aie.tile(0, 2) + + aie.objectfifo @of0 (%tile01, {%tile02}, 1 : i32) : !aie.objectfifo> -> !aie.objectfifo> +} diff --git a/test/objectFifo-stateful-transform/asymmetric_element_type/asymmetric_element_type.mlir b/test/objectFifo-stateful-transform/asymmetric_element_type/asymmetric_element_type.mlir new file mode 100644 index 00000000000..d1ffa5a1814 --- /dev/null +++ b/test/objectFifo-stateful-transform/asymmetric_element_type/asymmetric_element_type.mlir @@ -0,0 +1,54 @@ +//===- asymmetric_element_type.mlir ------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s + +// ObjectFifo with asymmetric element types (producer 40xi32, consumer 10xi32). +// Producer sends 40 elements per BD, consumer receives 10 at a time (4:1 ratio). + +// Consumer-side buffer should be the small type +// CHECK-DAG: aie.buffer(%{{.*}}) {sym_name = "test_wts_cons_buff_0"} : memref<10xi32> + +// Producer-side buffer should be the large type +// CHECK-DAG: aie.buffer(%{{.*}}) {sym_name = "test_wts_buff_0"} : memref<40xi32> + +// Flow connecting producer DMA to consumer DMA +// CHECK: aie.flow + +// Producer DMA sends 40 elements per BD +// CHECK: aie.memtile_dma +// CHECK: aie.dma_bd(%{{.*}} : memref<40xi32>, 0, 40) + +// Consumer DMA receives 10 elements per BD +// CHECK: aie.mem +// CHECK: aie.dma_bd(%{{.*}} : memref<10xi32>, 0, 10) + +module { + aie.device(npu2) { + %mt = aie.tile(0, 1) + %ct = aie.tile(0, 2) + + aie.objectfifo @test_wts(%mt, {%ct}, 1 : i32) + : !aie.objectfifo> + -> !aie.objectfifo> + + %c = aie.core(%ct) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + scf.for %i = %c0 to %c4 step %c1 { + %sv = aie.objectfifo.acquire @test_wts(Consume, 1) : !aie.objectfifosubview> + %elem = aie.objectfifo.subview.access %sv[0] : !aie.objectfifosubview> -> memref<10xi32> + aie.objectfifo.release @test_wts(Consume, 1) + } + aie.end + } + } +} diff --git a/test/objectFifo-stateful-transform/asymmetric_element_type/asymmetric_with_initvalues.mlir b/test/objectFifo-stateful-transform/asymmetric_element_type/asymmetric_with_initvalues.mlir new file mode 100644 index 00000000000..60443d2f0c9 --- /dev/null +++ b/test/objectFifo-stateful-transform/asymmetric_element_type/asymmetric_with_initvalues.mlir @@ -0,0 +1,62 @@ +//===- asymmetric_with_initvalues.mlir ---------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s + +// Asymmetric ObjectFifo with initValues and depth=2. +// Mobilenet weight loading pattern: 2 large buffers with static weight +// data on MemTile, consumer receives small chunks on CoreTile. + +// Consumer buffers use the small type +// CHECK-DAG: aie.buffer(%{{.*}}) {sym_name = "wts_cons_buff_0"} : memref<10xi32> +// CHECK-DAG: aie.buffer(%{{.*}}) {sym_name = "wts_cons_buff_1"} : memref<10xi32> + +// Producer buffers have initial values +// CHECK-DAG: aie.buffer(%{{.*}}) {sym_name = "wts_buff_0"} : memref<40xi32> = dense<1> +// CHECK-DAG: aie.buffer(%{{.*}}) {sym_name = "wts_buff_1"} : memref<40xi32> = dense<2> + +// Producer lock: init = 0 (depth - initValues count = 0) +// CHECK-DAG: aie.lock(%{{.*}}) {init = 0 : i32, sym_name = "wts_prod_lock_0"} +// Consumer lock: init = 2 (initValues count) +// CHECK-DAG: aie.lock(%{{.*}}) {init = 2 : i32, sym_name = "wts_cons_lock_0"} + +// MM2S: 2 BDs for depth=2, each 40 elements +// CHECK: aie.memtile_dma +// CHECK: aie.dma_bd(%{{.*}} : memref<40xi32>, 0, 40) +// CHECK: aie.dma_bd(%{{.*}} : memref<40xi32>, 0, 40) + +// S2MM: 2 BDs (consumer depth=2), 10 elements each +// CHECK: aie.mem +// CHECK: aie.dma_bd(%{{.*}} : memref<10xi32>, 0, 10) +// CHECK: aie.dma_bd(%{{.*}} : memref<10xi32>, 0, 10) + +module { + aie.device(npu2) { + %mt = aie.tile(0, 1) + %ct = aie.tile(0, 2) + + aie.objectfifo @wts(%mt, {%ct}, 2 : i32) + : !aie.objectfifo> + -> !aie.objectfifo> + = [dense<1> : memref<40xi32>, dense<2> : memref<40xi32>] + + %c = aie.core(%ct) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8 = arith.constant 8 : index + scf.for %i = %c0 to %c8 step %c1 { + %sv = aie.objectfifo.acquire @wts(Consume, 1) : !aie.objectfifosubview> + %elem = aie.objectfifo.subview.access %sv[0] : !aie.objectfifosubview> -> memref<10xi32> + aie.objectfifo.release @wts(Consume, 1) + } + aie.end + } + } +} diff --git a/test/python/objFifo_asymmetric.py b/test/python/objFifo_asymmetric.py new file mode 100644 index 00000000000..cd8ffb19c9d --- /dev/null +++ b/test/python/objFifo_asymmetric.py @@ -0,0 +1,61 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2026 AMD Inc. + +# RUN: %python %s | FileCheck %s + +import numpy as np + +from aie.iron import ObjectFifo, Program, Runtime, Worker +from aie.iron.controlflow import range_ +from aie.iron.device import NPU2, AnyMemTile +from aie.iron.dataflow.endpoint import ObjectFifoEndpoint + + +# CHECK: aie.objectfifo @wts({{.*}}) : !aie.objectfifo> -> !aie.objectfifo> +def test_objectfifo_asymmetric(): + """ObjectFifo with asymmetric element types: producer sends 40xi32, + consumer receives 10xi32 (4:1 ratio). The DMA hardware handles the + size mismatch via AXI backpressure. + """ + + dev = NPU2() + prod_ty = np.ndarray[(40,), np.dtype[np.int32]] + cons_ty = np.ndarray[(10,), np.dtype[np.int32]] + + wts = ObjectFifo( + prod_ty, + depth=1, + name="wts", + consumer_obj_type=cons_ty, + init_values=[np.ones(40, dtype=np.int32)], + ) + wts.prod().endpoint = ObjectFifoEndpoint(AnyMemTile) + + of_out = ObjectFifo(cons_ty, depth=2, name="of_out") + + def consumer_body(wts_c, of_out_p): + for _ in range_(4): + elem_in = wts_c.acquire(1) + elem_out = of_out_p.acquire(1) + for i in range_(10): + elem_out[i] = elem_in[i] + wts_c.release(1) + of_out_p.release(1) + + cons = Worker(consumer_body, fn_args=[wts.cons(), of_out.prod()]) + + rt = Runtime() + tensor_ty = np.ndarray[(40,), np.dtype[np.int32]] + with rt.sequence(tensor_ty) as a: + rt.start(cons) + rt.drain(of_out.cons(), a, wait=True) + + module = Program(dev, rt).resolve_program() + print(module) + + +if __name__ == "__main__": + test_objectfifo_asymmetric()