Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ __pycache__
**.cmake
include/**/Makefile
lib/**/Makefile
**/*.prj/**

CLAUDE.md
/platforms/vck190_bare/petalinux/build
Expand Down
10 changes: 10 additions & 0 deletions include/aie/Dialect/AIE/IR/AIEDialect.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ uint32_t getShimBurstLengthBytes(const AIE::AIETargetModel &tm,
uint32_t getShimBurstLengthEncoding(const AIE::AIETargetModel &tm,
uint32_t burstLength);

// Generate a symbol name guaranteed to be unique within the symbol table of
// `symbolTableOp`. Names are formed as "<prefix><n>" for increasing n; the
// counter is advanced past the chosen value so repeated calls with the same
// counter remain efficient and produce distinct names. The returned name is
// not inserted into the symbol table; the caller is responsible for creating
// a symbol with that name before the next call (otherwise the same name will
// be returned again).
std::string generateUniqueSymbolName(mlir::Operation *symbolTableOp,
llvm::StringRef prefix, unsigned &counter);

mlir::LogicalResult
verifyOffsetSizeAndStrideOp(mlir::OffsetSizeAndStrideOpInterface op);

Expand Down
2 changes: 2 additions & 0 deletions include/aie/Dialect/AIE/IR/AIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1011,6 +1011,8 @@ def AIE_DMABDOp: AIE_Op<"dma_bd", []> {
OptionalAttr<AIEI32Attr>:$bd_id,
OptionalAttr<PacketInfoAttr>:$packet,
DefaultValuedOptionalAttr<AIEI32Attr, "0">:$burst_length,
// if set, the aiex.parameter that will override the BD's address
OptionalAttr<FlatSymbolRefAttr>:$offset_parameter,
// should never be assigned by user...
OptionalAttr<AIEI32Attr>:$next_bd_id
);
Expand Down
14 changes: 14 additions & 0 deletions include/aie/Dialect/AIEX/AIEUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,19 @@ struct SubviewTraceResult {
//
// This function checks that all subviews remain static and contiguous.
std::optional<SubviewTraceResult> traceSubviewToBlockArgument(Value value);

// Emit an `aiex.npu.update_from_scratchpad` op that adds the runtime offset
// (held in the scratchpad slot referenced by `bdOp`'s `offset_parameter` /
// `offset_state_table_idx` attributes, multiplied by the element size of
// `bufType`) into the BD address register at `registerAddr`.
//
// `bdOp` must carry both the `offset_parameter` (FlatSymbolRefAttr pointing at
// an `aiex.parameter`) and `offset_state_table_idx` (IntegerAttr, set by
// `--aie-lower-parameters`) attributes. The referenced parameter must have
// type `i32`.
LogicalResult emitUpdateBdAddressFromOffsetParameter(OpBuilder &builder,
Operation *bdOp,
BaseMemRefType bufType,
uint64_t registerAddr);
}
} // namespace xilinx
99 changes: 98 additions & 1 deletion include/aie/Dialect/AIEX/IR/AIEX.td
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,9 @@ def AIE_NpuDmaMemcpyNdOp: AIEX_Op<"npu.dma_memcpy_nd", [
DefaultValuedOptionalAttr<I64Attr, "0">:$d0_zero_after,
DefaultValuedOptionalAttr<I64Attr, "0">:$d1_zero_after,
DefaultValuedOptionalAttr<I64Attr, "0">:$d2_zero_after,
DefaultValuedOptionalAttr<I64Attr, "0">:$burst_length
DefaultValuedOptionalAttr<I64Attr, "0">:$burst_length,
// if set, the aiex.parameter that will override the BD's address
OptionalAttr<FlatSymbolRefAttr>:$offset_parameter
);

let assemblyFormat = [{
Expand Down Expand Up @@ -1390,6 +1392,101 @@ def AIEX_SetLockOp: AIEX_Op<"set_lock", [HasParent<"AIE::RuntimeSequenceOp">, Sk
}];
}

//===----------------------------------------------------------------------===//
// Parameter ops
//===----------------------------------------------------------------------===//

def AIEX_ParameterOp: AIEX_Op<"parameter", [Symbol]> {
let summary = "Declare a scratchpad runtime parameter";
let description = [{
Declares a named runtime parameter that can be set from the host by writing to DDR and read by AIE cores using `aiex.read_parameter`.
Parameters are communicated via the scratchpad memory mechanism (CREATE_SCRATCHPAD + UPDATE_FROM_SCRATCHPAD firmware opcodes).

`aiex.parameter` ops are declared at **module scope** (outside any `aie.device`).
The scratchpad is a single hardware resource shared by all PDIs loaded by a
runtime sequence, so parameters are global to the whole module and may be
referenced from any device.

Parameters can alternatively also be used to offset BD addresses by using them as the `offset_parameter` attribute in `aiex.dma_bd` and `aiex.dma_memcpy_nd`.
The two kinds of use are exclusive. If used this way, they cannot also be read from the cores.
If used as an address offset on a BD, the parameter is a multiple of the BD's element size.

Each parameter occupies one StateTable entry (4 bytes in the scratchpad) in DDR.
The `--aie-lower-parameters` pass assigns the `state_table_idx` and the `kind` attribute (derived from the parameter's usage:
`core` if read by a core via `aiex.read_parameter`, `addr` if used as a DMA offset via the `offset_parameter` attribute on a DMA op).
Indices are unique across the entire module.

The `type` attribute specifies the data type of the parameter (bf16, f32, or an integer type up to i32).
For `kind == addr`, the `type` must be `i32`.
The actual encoding uses a 30-bit value range due to the firmware's 2-bit masking.

Example (at module scope):
```mlir
aiex.parameter @foo : i32
aiex.parameter @bar : bf16
aie.device(npu2) { ... }
```
}];
let arguments = (ins
SymbolNameAttr:$sym_name,
TypeAttr:$type,
// assigned by `--aie-lower-parameters` pass:
OptionalAttr<UI8Attr>:$state_table_idx,
OptionalAttr<ParameterKind>:$kind
);
let results = (outs);
let assemblyFormat = [{ $sym_name `:` $type attr-dict }];
}

def AIEX_ReadParameterOp: AIEX_Op<"read_parameter", []> {
let summary = "Read a scratchpad runtime parameter value on an AIE core";
let description = [{
Reads a runtime parameter previously declared with `aiex.parameter` in an `aie.core`.
You must first synchronize the scratchpad to the core buffers from the runtime sequence using `aiex.sync_parameters_from_host`.

The `--aie-lower-parameters` creates an `aie.buffer` on the core for each unique parameter read, and then replaces each instance of this op with:
1. A `memref.load` from that buffer
2. Arithmetic to decode the value (right-shift by 2 to undo firmware masking)

Example:
```mlir
%val = aiex.read_parameter @foo : i32
```
}];
let arguments = (ins
FlatSymbolRefAttr:$parameter,
// assigned by `--aie-lower-parameters` pass:
OptionalAttr<FlatSymbolRefAttr>:$buffer
);
let results = (outs AnyType:$result);
let assemblyFormat = [{ $parameter `:` type($result) attr-dict }];
let hasVerifier = 1;
}

def AIEX_SyncParametersFromHostOp: AIEX_Op<"sync_parameters_from_host",
[HasParent<"AIE::RuntimeSequenceOp">]> {
let summary = "Sync all parameters from host scratchpad to core buffers";
let description = [{
Lowers to:
1. `aiex.npu.create_scratchpad` with size = 4 * num_parameters
2. For each parameter of kind `core` with a destination aie.buffer on a core:
a. `aiex.npu.write32` to zero-out the target buffer
b. `aiex.npu.update_from_scratchpad` with func=incr, func_arg=0
3. For each parameter of kind `addr`:
a. `aiex.npu_update_from_scratchpad` that adds the parameter value as an offset to the DMA BD's address

Can only be used inside `aie.runtime_sequence`.

Example:
```mlir
aiex.sync_parameters_from_host()
```
}];
let arguments = (ins);
let results = (outs);
let assemblyFormat = [{ attr-dict }];
}

// Include CERT operations
include "aie/Dialect/AIEX/IR/CERTOps.td"

Expand Down
9 changes: 9 additions & 0 deletions include/aie/Dialect/AIEX/IR/AIEXAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,13 @@ def StateTableFunc : I32EnumAttr<"StateTableFunc",
let cppNamespace = "::xilinx::AIEX";
}

def ParameterKindCore : I32EnumAttrCase<"Core", 0, "core">;
def ParameterKindAddr : I32EnumAttrCase<"Addr", 1, "addr">;

def ParameterKind : I32EnumAttr<"ParameterKind",
"Usage kind of a scratchpad runtime parameter",
[ParameterKindCore, ParameterKindAddr]> {
let cppNamespace = "::xilinx::AIEX";
}

#endif // AIEX_ATTRS
4 changes: 4 additions & 0 deletions include/aie/Dialect/AIEX/Transforms/AIEXPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createAIEExpandLoadPdiPass();
std::unique_ptr<mlir::OperationPass<AIE::DeviceOp>>
createAIEXInlineTraceConfigPass();
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createAIELowerParametersPass();
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createAIELowerParametersPass(AIELowerParametersOptions options);

/// Generate the code for registering passes.
#define GEN_PASS_REGISTRATION
Expand Down
29 changes: 29 additions & 0 deletions include/aie/Dialect/AIEX/Transforms/AIEXPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,35 @@ def AIELowerSetLock : Pass<"aie-lower-set-lock", "AIE::DeviceOp"> {
];
}

def AIELowerParameters : Pass<"aie-lower-parameters", "mlir::ModuleOp"> {
let summary = "Lower parameter ops to scratchpad + update_from_scratchpad";
let description = [{
Assigns globally-unique state table indices to every `aiex.parameter` in the module,
creates per-core buffers for each `aiex.read_parameter` usage on cores,
lowers `aiex.sync_parameters_from_host` to create_scratchpad + update_from_scratchpad sequences,
and emits a single parameter layout file.

The scratchpad is a single hardware resource shared by all PDIs loaded by a runtime sequence, so all parameters in the module must occupy distinct indices.
The total parameter count is capped at 32.

The pass also sets a `kind` attribute on each `aiex.parameter` based on how it is used: `core` if read by `aiex.read_parameter`, `addr` if used as a DMA `offset_parameter`.
A parameter may not be used as both.
}];

let constructor = "xilinx::AIEX::createAIELowerParametersPass()";
let dependentDialects = [
"xilinx::AIE::AIEDialect",
"xilinx::AIEX::AIEXDialect",
"mlir::arith::ArithDialect",
"mlir::memref::MemRefDialect",
];
let options = [
Option<"outputParamsFile", "output-params-file", "std::string",
/*default=*/"\"\"",
"Path to write the parameter layout file to.">
];
}

def AIETransformBfpTypes : Pass<"aie-transform-bfp-types", "AIE::DeviceOp"> {
let summary = "Transform bfp types to standard builtin types";

Expand Down
8 changes: 3 additions & 5 deletions lib/Conversion/AIEToConfiguration/AIEToConfiguration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ static LogicalResult convertTransactionOpsToMLIR(
}
OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointToStart(device.getBody());
int id = 0;
unsigned id = 0;
for (auto &op : operations) {
if (op.cmd.Opcode != XAIE_IO_BLOCKWRITE) {
global_data.push_back(nullptr);
Expand All @@ -659,10 +659,8 @@ static LogicalResult convertTransactionOpsToMLIR(
const uint32_t *d = reinterpret_cast<const uint32_t *>(op.cmd.DataPtr);
std::vector<uint32_t> data32(d, d + size);

std::string name = blockwrite_prefix;
do {
name = blockwrite_prefix + std::to_string(id++);
} while (device.lookupSymbol(name));
std::string name =
AIE::generateUniqueSymbolName(device, blockwrite_prefix, id);

MemRefType memrefType = MemRefType::get({size}, builder.getI32Type());
TensorType tensorType =
Expand Down
15 changes: 13 additions & 2 deletions lib/Dialect/AIE/IR/AIEDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,15 @@ uint32_t xilinx::AIE::getShimBurstLengthEncoding(const AIE::AIETargetModel &tm,
return getShimBurstLength(tm, burstLength).first;
}

std::string xilinx::AIE::generateUniqueSymbolName(
mlir::Operation *symbolTableOp, llvm::StringRef prefix, unsigned &counter) {
std::string name;
do {
name = (prefix + llvm::Twine(counter++)).str();
} while (mlir::SymbolTable::lookupSymbolIn(symbolTableOp, name));
return name;
}

LogicalResult
xilinx::AIE::myVerifyOffsetSizeAndStrideOp(OffsetSizeAndStrideOpInterface op) {
std::array<unsigned, 3> maxRanks = op.getArrayAttrMaxRanks();
Expand Down Expand Up @@ -3026,12 +3035,14 @@ LogicalResult RuntimeSequenceOp::verifyBeforeMaterialization() {
!llvm::isa<DeviceOp>(symbolDefOp) &&
!llvm::isa<RuntimeSequenceOp>(symbolDefOp) &&
!llvm::isa<BufferOp>(symbolDefOp) &&
!llvm::isa<memref::GlobalOp>(symbolDefOp)) {
!llvm::isa<memref::GlobalOp>(symbolDefOp) &&
symbolDefOp->getName().getStringRef() != "aiex.parameter") {
op->emitOpError()
<< "references symbol '"
<< symbolRef.getRootReference().getValue()
<< "' which must be either a ShimDMAAllocationOp, DeviceOp, "
"RuntimeSequenceOp, BufferOp or GlobalOp, but got: "
"RuntimeSequenceOp, BufferOp, GlobalOp or ParameterOp, but "
"got: "
<< symbolDefOp->getName().getStringRef();
return WalkResult::interrupt();
}
Expand Down
24 changes: 23 additions & 1 deletion lib/Dialect/AIEX/IR/AIEXDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,8 @@ struct LinearizeContiguousTransfer
op.getIssueTokenAttr(), op.getD0ZeroBeforeAttr(),
op.getD1ZeroBeforeAttr(), op.getD2ZeroBeforeAttr(),
op.getD0ZeroAfterAttr(), op.getD1ZeroAfterAttr(),
op.getD2ZeroAfterAttr(), op.getBurstLengthAttr());
op.getD2ZeroAfterAttr(), op.getBurstLengthAttr(),
op.getOffsetParameterAttr());
return mlir::success();
}
};
Expand Down Expand Up @@ -1117,6 +1118,27 @@ AIE::DeviceOp AIEX::ConfigureOp::getReferencedDeviceOp() {
return referencedDevice;
}

//===----------------------------------------------------------------------===//
// ReadParameterOp
//===----------------------------------------------------------------------===//

LogicalResult AIEX::ReadParameterOp::verify() {
auto device = (*this)->getParentOfType<AIE::DeviceOp>();
if (!device) {
return emitOpError("must be inside an aie.device");
}
if (!(*this)->getParentOfType<AIE::CoreOp>()) {
return emitOpError("must be inside an aie.core");
}
auto module = (*this)->getParentOfType<ModuleOp>();
if (!module || !module.lookupSymbol<AIEX::ParameterOp>(getParameter())) {
return emitOpError("references unknown parameter '")
<< getParameter()
<< "' (aiex.parameter ops are declared at module scope)";
}
return success();
}

LogicalResult AIEX::ConfigureOp::verify() {
AIE::DeviceOp parentDev = getOperation()->getParentOfType<AIE::DeviceOp>();
AIE::DeviceOp referencedDev = getReferencedDeviceOp();
Expand Down
4 changes: 3 additions & 1 deletion lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,9 @@ struct AIECtrlPacketToDmaPass
SmallVector<Value>{}, SmallVector<Value>{},
SmallVector<Value>{}, ArrayRef(staticOffsets),
ArrayRef(staticSizes), ArrayRef(staticStrides),
nullptr, metadata, 0, true, 0, 0, 0, 0, 0, 0);
nullptr, metadata, 0, true, 0, 0, 0, 0, 0, 0,
/*burst_length=*/0,
/*offset_parameter=*/FlatSymbolRefAttr());

auto shimRow = builder.getI32IntegerAttr(0);
auto shimCol = builder.getI32IntegerAttr(col);
Expand Down
8 changes: 8 additions & 0 deletions lib/Dialect/AIEX/Transforms/AIEDMATasksToNPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,14 @@ struct AIEDMATasksToNPUPass
/*addr*/ register_addr,
/*arg_idx*/ arg_idx,
/*arg_plus*/ offset);
// If this BD has an offset_parameter, emit update_from_scratchpad to add
// the runtime offset to the BD address register.
if (bd_op.getOffsetParameterAttr()) {
auto bufType = llvm::cast<BaseMemRefType>(bd_op.getBuffer().getType());
if (failed(emitUpdateBdAddressFromOffsetParameter(
builder, bd_op, bufType, register_addr)))
return failure();
}
} else if (AIE::BufferOp buffer =
llvm::dyn_cast<AIE::BufferOp>(buf.getDefiningOp())) {
uint64_t buf_addr;
Expand Down
9 changes: 9 additions & 0 deletions lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,15 @@ struct DmaToNpuPattern : OpConversionPattern<NpuDmaMemcpyNdOp> {
targetModel.getDmaBdAddressOffset(tileCol, tileRow);
NpuAddressPatchOp::create(rewriter, op->getLoc(), addr, arg_idx, offset);

// If this DMA op has an offset_parameter, emit an update_from_scratchpad
// to add the runtime offset to the BD address register.
if (op.getOffsetParameterAttr()) {
auto bufType = cast<BaseMemRefType>(op.getMemref().getType());
if (failed(emitUpdateBdAddressFromOffsetParameter(rewriter, op, bufType,
addr)))
return failure();
}

// push the patched bd onto the dma task queue
NpuPushQueueOp::create(
rewriter, op->getLoc(), column, row, infoOp.getChannelDirAttr(),
Expand Down
Loading
Loading