WIP by hunhoffe · Pull Request #5 · hunhoffe/mlir-aie

hunhoffe · 2026-04-20T17:03:15Z

No description provided.

github-actions

Remaining comments which cannot be posted as a review comment to avoid GitHub Rate Limit

clang-format

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitDepthPromotion.cpp

Line 330 in 3e39cad

if (tileIt != inferredMap.end() && !tileIt->second.consumerTiles.empty()) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitDmaTaskToConduit.cpp

Lines 86 to 91 in 3e39cad

    
           static void 
        
           dimsToOffsetsStrides(int32_t bdOffset, int64_t len, 
        
                                AIE::BDDimLayoutArrayAttr dimensions, 
        
                                llvm::SmallVectorImpl<int64_t> &offsets, 
        
                                llvm::SmallVectorImpl<int64_t> &sizes, 
        
                                llvm::SmallVectorImpl<int64_t> &strides) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitDmaTaskToConduit.cpp

Lines 146 to 148 in 3e39cad

    
           llvm::StringRef conduitName = 
        
               conduitChannelAttr ? conduitChannelAttr.getValue() 
        
                                  : alloc.getSymName();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitDmaTaskToConduit.cpp

Lines 161 to 164 in 3e39cad

    
           void processRuntimeSequence( 
        
               AIE::RuntimeSequenceOp rtSeq, 
        
               const llvm::StringMap<ShimAllocInfo> &allocMap, 
        
               mlir::OpBuilder &builder) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitDmaTaskToConduit.cpp

Line 210 in 3e39cad

dimsToOffsetsStrides(bdOffset, len, dimensions, offsets, sizes, strides);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitDmaTaskToConduit.cpp

Lines 225 to 226 in 3e39cad

    
           offsetsAttr, sizesAttr, stridesAttr, 
        
           dimensions);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseChannels.cpp

Lines 365 to 366 in 3e39cad

    
           auto nameAttr = 
        
               op->getAttrOfType<mlir::FlatSymbolRefAttr>("name");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseChannels.cpp

Lines 511 to 512 in 3e39cad

    
           auto nameAttr = 
        
               op->getAttrOfType<mlir::FlatSymbolRefAttr>("name");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Line 24 in 3e39cad

    
           //      direct memref.alloc references (L1) or relay buffer references (MemTile).

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Line 127 in 3e39cad

mlir::MLIRContext *ctx) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Line 377 in 3e39cad

mlir::MLIRContext *ctx) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 425 to 426 in 3e39cad

    
           static llvm::SmallVector<std::string> 
        
           getProducedChannels(AIE::CoreOp core) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 447 to 448 in 3e39cad

    
           static llvm::SmallVector<std::string> 
        
           getConsumedChannels(AIE::CoreOp core) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 563 to 565 in 3e39cad

    
           device.walk([&](Create createOp) { 
        
             createMap[createOp.getName()] = createOp; 
        
           });

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 623 to 624 in 3e39cad

    
           std::string intermediateName = 
        
               intermediateConduit.getName().str();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 707 to 709 in 3e39cad

    
           static IntermediateRoute 
        
           decideRoute(Create intermediateConduit, mlir::Value tile, 
        
                       AIE::DeviceOp device) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 741 to 744 in 3e39cad

    
           static std::string emitMemTileRelay(FusableCorePair &pair, 
        
                                                AIE::DeviceOp device, 
        
                                                mlir::OpBuilder &builder, 
        
                                                mlir::MLIRContext *ctx) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 756 to 768 in 3e39cad

    
           builder.create<Create>( 
        
               intermediateConduit.getLoc(), mlir::StringAttr::get(ctx, relayName), 
        
               intermediateConduit.getElementTypeAttr(), 
        
               intermediateConduit.getDepthAttr(), 
        
               /*routing_mode=*/RoutingModeAttr{}, 
        
               /*sync_mode=*/SyncModeAttr{}, 
        
               /*producer_rates=*/nullptr, 
        
               /*consumer_rates=*/nullptr, 
        
               /*fusion_group=*/mlir::StringAttr{}, 
        
               /*bd_repeat=*/nullptr, 
        
               /*dma_repeat=*/nullptr, 
        
               /*producer_dimensions=*/nullptr, 
        
               /*consumer_dimensions=*/nullptr);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 779 to 780 in 3e39cad

    
           mlir::ArrayAttr dstsArr = mlir::ArrayAttr::get( 
        
               ctx, {mlir::FlatSymbolRefAttr::get(ctx, relayName)});

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 791 to 794 in 3e39cad

    
           static mlir::LogicalResult composeCoresBodies( 
        
               FusableCorePair &pair, IntermediateRoute route, 
        
               mlir::OpBuilder &builder, mlir::MLIRContext *ctx, 
        
               AIE::DeviceOp device) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Line 857 in 3e39cad

mlir::Value allocVal; // L1 route only.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 942 to 943 in 3e39cad

    
           mapping.map(consumerFor.getInductionVar(), 
        
                       producerFor.getInductionVar());

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 960 to 961 in 3e39cad

    
           defOp != consumerFor.getOperation() && 
        
           !mapping.contains(operand) && alreadyCloned.insert(defOp).second) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 1050 to 1051 in 3e39cad

    
           mlir::Value deadToken, 
        
           mlir::OpBuilder &builder) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Line 1066 in 3e39cad

resultToken.getUsers().end());

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Line 1094 in 3e39cad

mlir::OpBuilder &builder) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Line 1102 in 3e39cad

token.getUsers().end());

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 1113 to 1114 in 3e39cad

    
           mlir::OpBuilder &builder, 
        
           const std::string &consumerChannelName) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseCoreBodyPass.cpp

Lines 1315 to 1316 in 3e39cad

    
           std::string channelName = 
        
               pair.intermediateConduit.getName().str();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Line 139 in 3e39cad

    
           bool hasShimConsumer = tileIt != inferredMap.end() && !tileIt->second.shimConsumerTiles.empty();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 263 to 264 in 3e39cad

    
           static llvm::SmallVector<ArgGroup> 
        
           buildArgGroupsFromSeq(mlir::Block &seqBody) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 276 to 277 in 3e39cad

    
           int64_t offset = 
        
               (offsetsAttr && !offsetsAttr.empty()) ? offsetsAttr[0] : 0;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 621 to 622 in 3e39cad

    
           auto nameAttr = 
        
               op->getAttrOfType<mlir::FlatSymbolRefAttr>("name");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 653 to 654 in 3e39cad

    
           Key k = {alloc.getTile(), 
        
                    static_cast<int>(alloc.getChannelDir())};

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 664 to 665 in 3e39cad

    
           if (allocs[idx].getChannelIndex() != 
        
               static_cast<int64_t>(idx))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 912 to 915 in 3e39cad

    
           auto computeDeadArgs = []( 
        
               const llvm::SmallVector<ArgGroup> &groups, 
        
               const llvm::StringSet<> &erasedChannels, 
        
               unsigned numOrigArgs) -> llvm::DenseSet<unsigned> {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 934 to 939 in 3e39cad

    
           llvm::DenseSet<unsigned> deadA = computeDeadArgs( 
        
               argGroupsA, erasedChannelsA, 
        
               static_cast<unsigned>(origTypesA.size())); 
        
           llvm::DenseSet<unsigned> deadB = computeDeadArgs( 
        
               argGroupsB, erasedChannelsB, 
        
               static_cast<unsigned>(origTypesB.size()));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitInferModes.cpp

Lines 246 to 247 in 3e39cad

    
           op.setRoutingModeAttr( 
        
               RoutingModeAttr::get(module.getContext(), RoutingMode::SharedMemory));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitInferModes.cpp

Lines 308 to 309 in 3e39cad

    
           unsigned numConsumers = consCoords.empty() ? 1 
        
                                                        : consCoords.size();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitPlaceBuffers.cpp

Line 64 in 3e39cad

    
           if (rest.empty() || rest.find_first_not_of("0123456789") != llvm::StringRef::npos)

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitTileInference.cpp

Lines 335 to 336 in 3e39cad

    
           if (!createOp->getAttrOfType<mlir::StringAttr>( 
        
                   "dma_channel_group"))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAAlloc.cpp

Line 512 in 3e39cad

// Multi-device: allocate into the device that owns the producer tile.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAAlloc.cpp

Lines 666 to 667 in 3e39cad

    
           if (targetModel.isMemTile(consCol, consRow) && 
        
               info.putCount > 1 && info.dmaRepeat == 0) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAAlloc.cpp

Lines 763 to 764 in 3e39cad

    
           auto consLocks = 
        
               state.allocateLockPair(consTileVal, consPrefix, consNBufs, prodInit);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Lines 137 to 138 in 3e39cad

    
           info.producerDimensions = 
        
               mlir::cast<AIE::BDDimLayoutArrayAttr>(*dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Lines 140 to 141 in 3e39cad

    
           auto arrayOfArrays = 
        
               mlir::cast<AIE::BDDimLayoutArrayArrayAttr>(*dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Lines 234 to 235 in 3e39cad

    
           for (int devIdx = 0; 
        
                devIdx < static_cast<int>(state.deviceOps.size()); ++devIdx) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Lines 421 to 422 in 3e39cad

    
           auto arrayOfArrays = 
        
               mlir::cast<AIE::BDDimLayoutArrayArrayAttr>(*dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Line 532 in 3e39cad

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 128 to 129 in 3e39cad

    
           << count << " IDs (aligned to " << p 
        
           << ") but only " << (unsigned)(limit - next) << " IDs remain";

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 672 to 673 in 3e39cad

    
           llvm::DenseMap<mlir::Value, 
        
                          std::pair<mlir::Value, mlir::Value>> pktTileS2MMLock;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 744 to 745 in 3e39cad

    
           if (activeDevIdx >= 0 && 
        
               activeDevIdx < static_cast<int>(deviceOps.size()))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 877 to 879 in 3e39cad

    
           uint32_t maxLocks = targetModel->getNumLocks( 
        
               static_cast<int>(tileOp.getCol()), 
        
               static_cast<int>(tileOp.getRow()));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 881 to 883 in 3e39cad

    
           int currentUsed = lockIdCounter.count(tileVal) 
        
                                 ? lockIdCounter[tileVal] 
        
                                 : 0;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 891 to 892 in 3e39cad

    
           llvm::Twine(static_cast<int>(maxLocks) - currentUsed) + 
        
           " of " + llvm::Twine(maxLocks) + " remain");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 945 to 946 in 3e39cad

    
           ConduitInfo *lookupConduit(mlir::StringRef name, 
        
                                      mlir::Operation *contextOp) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 469 to 471 in 3e39cad

    
           // perBufLen: number of elements per physical buffer for this source conduit. 
        
           // Prefer numElems (from put_memref_async descriptors), then derive from 
        
           // elemType (e.g. memref<48xi32> → 48), otherwise fall back to 1.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 517 to 518 in 3e39cad

    
           if (ConduitInfo *dstInfoR = 
        
                   state.lookupConduit(dstNameR, linkOp.op))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 840 to 841 in 3e39cad

    
           state.pktTileS2MMLock[consTileVal] = { 
        
               locks.first.getResult(), locks.second.getResult()};

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 893 to 894 in 3e39cad

    
           srcPort = 
        
               state.tileNextMM2SChannel[srcProdTile.getResult()]++;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1232 to 1233 in 3e39cad

    
           if (ConduitInfo *dstInfo = 
        
                   state.lookupConduit(dstName2, linkOp.op)) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1856 to 1857 in 3e39cad

    
           std::string qFG = state.qualifyFuseGroup(info.fuseGroup, 
        
                                                     info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1916 to 1917 in 3e39cad

    
           bdTermBlock = 
        
               (info.dmaRepeat > 0) ? addBlock() : nullptr;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2382 to 2383 in 3e39cad

    
           // first conduit emits a dma_start.  Subsequent conduits append BD blocks 
        
           // and the post-pass links all BD chains into a single combined ring.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Line 2398 in 3e39cad

std::to_string(consRow) + "_ch" + std::to_string(s2mmChannel),

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2439 to 2440 in 3e39cad

    
           // Linear chain condition: either dma_repeat>0 (finite DMA task queue), 
        
           // or putCount>1 with no dmaRepeat (N sequential puts merged by

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2442 to 2444 in 3e39cad

    
           isLinearChain = 
        
               (info.dmaRepeat > 0) || 
        
               (info.putCount > 1 && info.dmaRepeat == 0);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2508 to 2509 in 3e39cad

    
           builder.create<AIE::NextBDOp>(state.deviceOp.getLoc(), 
        
                                         bdBlocks[0]);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2529 to 2530 in 3e39cad

    
           std::string bdKey = 
        
               name + "__s2mm_" + std::to_string(consIdx);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2537 to 2540 in 3e39cad

    
           std::string bdKey = 
        
               name + "__s2mm_" + std::to_string(consIdx); 
        
           std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                       info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 729 to 730 in 3e39cad

    
           auto ctrKey = 
        
               std::make_tuple(conduitName.str(), col, row, false);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 741 to 742 in 3e39cad

    
           initBuilder.create<mlir::arith::ConstantIndexOp>( 
        
               loc, rotationBufSlot);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 755 to 756 in 3e39cad

    
           auto ctrKey = 
        
               std::make_tuple(conduitName.str(), col, row, true);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 760 to 761 in 3e39cad

    
           initBuilder.setInsertionPointAfterValue( 
        
               resolvedProducerRotationBuf);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 770 to 772 in 3e39cad

    
           initBuilder.create<mlir::memref::StoreOp>( 
        
               loc, zero, resolvedProducerRotationBuf, 
        
               mlir::ValueRange{slotIdx});

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1044 to 1047 in 3e39cad

    
           // This is the reverse of --dma-task-to-conduit. After --conduit-fuse-operators 
        
           // merges runtime_sequences and eliminates dead block args, the remaining 
        
           // put/get ops correspond 1:1 (positionally) to the runtime_sequence block 
        
           // args.  Each Nth conduit.put_memref/get_memref maps to block arg N.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1063 to 1064 in 3e39cad

    
           if (auto cc = alloc->getAttrOfType<mlir::FlatSymbolRefAttr>( 
        
                   "conduit_channel"))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1105 to 1106 in 3e39cad

    
           bool isS2MM = 
        
               (conduitToDir[conduitName] == AIE::DMAChannelDir::S2MM);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1117 to 1119 in 3e39cad

    
           if (auto dimsAttr = 
        
                   op->getAttrOfType<AIE::BDDimLayoutArrayAttr>( 
        
                       "producer_dimensions"))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1126 to 1129 in 3e39cad

    
           mlir::OperationState configState(loc, 
        
                                            "aiex.dma_configure_task_for"); 
        
           configState.addAttribute( 
        
               "alloc", mlir::FlatSymbolRefAttr::get(ctx, allocSym));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1131 to 1132 in 3e39cad

    
           configState.addAttribute("issue_token", 
        
                                    builder.getBoolAttr(true));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1144 to 1146 in 3e39cad

    
           builder.create<AIE::DMABDOp>( 
        
               loc, bufArg, /*offset=*/0, 
        
               static_cast<int>(numElems), dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1148 to 1150 in 3e39cad

    
           builder.create<AIE::DMABDOp>( 
        
               loc, bufArg, /*offset=*/0, 
        
               static_cast<int>(numElems));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1152 to 1153 in 3e39cad

    
           bdBlock->back().setAttr("burst_length", 
        
                                   builder.getI32IntegerAttr(0));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1175 to 1176 in 3e39cad

    
           mlir::OperationState awaitState(rtSeq.getLoc(), 
        
                                           "aiex.dma_await_task");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1183 to 1184 in 3e39cad

    
           mlir::OperationState freeState(rtSeq.getLoc(), 
        
                                          "aiex.dma_free_task");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAPass.cpp

Lines 330 to 339 in 3e39cad

    
           // Skip cores that already have link_with or link_files set. 
        
           if (coreOp.getLinkWith() || coreOp.getLinkFiles()) 
        
             return; 
        
           // Check if any func.call inside this core references a function 
        
           // with link_with. 
        
           std::string linkWithValue; 
        
           coreOp.walk([&](mlir::func::CallOp callOp) { 
        
             auto it = funcLinkWith.find(callOp.getCallee()); 
        
             if (it != funcLinkWith.end() && linkWithValue.empty()) 
        
               linkWithValue = it->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAPass.cpp

Lines 341 to 344 in 3e39cad

    
             if (!linkWithValue.empty()) 
        
               coreOp.setLinkWithAttr( 
        
                   mlir::StringAttr::get(module.getContext(), linkWithValue)); 
        
           });

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 249 in 3e39cad

locks.second.getResult()};

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 434 to 435 in 3e39cad

    
           if (!isAIE2 && !info.externalBuffers.empty() && 
        
               !info.noLocks) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 484 to 485 in 3e39cad

    
           std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                       info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 529 to 530 in 3e39cad

    
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_4a) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 726 in 3e39cad

llvm::StringMap<unsigned> fuseGroupPacketIDNext; // qFG → next member index

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 783 to 786 in 3e39cad

    
           for (auto &[name, info] : state.conduitMap) { 
        
             // Multi-device: ensure tile lookups target the correct device. 
        
             if (state.isMultiDevice()) 
        
               state.switchToDeviceIndex(info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 788 to 790 in 3e39cad

    
           // Pass 0: only packet conduits.  Pass 1: everything else. 
        
           if (flowPass == 0 && info.routingMode != "packet") continue; 
        
           if (flowPass == 1 && info.routingMode == "packet") continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 792 to 807 in 3e39cad

    
           if (info.routingMode == "cascade") 
        
             continue; 
        
           if (info.sharedMemory) 
        
             continue; 
        
           if (state.linkSrcNamesEarly.count(name) || 
        
               state.linkJoinSrcNames.count(name)) 
        
             continue; 
        
           // Link destinations: flows are emitted by linkPhase() — skip here to 
        
           // avoid duplicate flows. 
        
           if (state.linkDstNames.count(name)) 
        
             continue; 
        
           auto [prodCol, prodRow] = info.producerTileCoord; 
        
           if (prodCol < 0 || prodRow == 0) 
        
             continue; 
        
           if (info.consumerTileCoords.empty()) 
        
             continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 809 to 812 in 3e39cad

    
           AIE::TileOp prodTile = state.lookupTileByCoord(prodCol, prodRow); 
        
           if (!prodTile) 
        
             continue; 
        
           mlir::Value prodTileVal = prodTile.getResult();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 814 to 815 in 3e39cad

    
           if (!info.consumerTileBuffers.count(prodTileVal)) 
        
             continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 817 in 3e39cad

builder.setInsertionPoint(state.deviceBody->getTerminator());

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 819 to 825 in 3e39cad

    
           // ---- Determine hardware MM2S channel count for this tile. ---- 
        
           // Used by the mode=any exhaustion check (Step 3.5). 
        
           uint32_t maxMM2S = 2; // hardware default: 2 MM2S per compute tile 
        
           if (state.targetModel) 
        
             maxMM2S = state.targetModel->getNumSourceSwitchboxConnections( 
        
                 static_cast<int>(prodCol), static_cast<int>(prodRow), 
        
                 AIE::WireBundle::DMA);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 827 to 837 in 3e39cad

    
           // ---- Assign MM2S channel (fused groups share a channel). ---- 
        
           // Check if Phase 4b already assigned an MM2S channel for this conduit 
        
           // (happens when the conduit has both compute and shim consumers — the 
        
           // shim consumer flow and the compute consumer flow share the same 
        
           // producer-side MM2S channel as a hardware broadcast). 
        
           int32_t mm2sChannel = -1; 
        
           bool usedPacketFallback = false; 
        
           { 
        
             auto existingIt = state.conduitMM2SChannel.find(name); 
        
             if (existingIt != state.conduitMM2SChannel.end()) { 
        
               mm2sChannel = existingIt->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 839 in 3e39cad

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 841 to 876 in 3e39cad

    
           if (mm2sChannel >= 0) { 
        
             // Already assigned by Phase 4b — reuse (broadcast from same MM2S port). 
        
           } else if (!info.fuseGroup.empty()) { 
        
             std::string qFG = state.qualifyFuseGroup(info.fuseGroup, 
        
                                                       info.deviceIndex); 
        
             auto it = state.fuseGroupMM2SChannel.find(qFG); 
        
             if (it != state.fuseGroupMM2SChannel.end()) { 
        
               mm2sChannel = it->second; 
        
             } else { 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
               state.fuseGroupMM2SChannel[qFG] = mm2sChannel; 
        
             } 
        
             state.fuseGroupMembers[qFG].push_back(name); 
        
             state.conduitMM2SChannel[name] = mm2sChannel; 
        
           } else if (info.routingMode == "any") { 
        
             // mode=any: check whether a circuit DMA channel is available. 
        
             int32_t nextCh = state.tileNextMM2SChannel.count(prodTileVal) 
        
                                  ? state.tileNextMM2SChannel[prodTileVal] 
        
                                  : 0; 
        
             if (static_cast<uint32_t>(nextCh) < maxMM2S) { 
        
               // A free circuit-mode channel exists — use it. 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
               state.conduitMM2SChannel[name] = mm2sChannel; 
        
             } else { 
        
               // Circuit DMA exhausted; flag that Step 3.5 handles emission below. 
        
               usedPacketFallback = true; 
        
             } 
        
           } else { 
        
             // Check if a circuit DMA channel is available before allocating. 
        
             // If exhausted, fall back to packet-switched DMA for non-cascade/ 
        
             // non-shared-memory channels (extends the mode=any fallback). 
        
             int32_t nextCh = state.tileNextMM2SChannel.count(prodTileVal) 
        
                                  ? state.tileNextMM2SChannel[prodTileVal] 
        
                                  : 0; 
        
             if (static_cast<uint32_t>(nextCh) < maxMM2S) { 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 878 to 884 in 3e39cad

    
           // Track packet-mode channel designation for Step 3.5c. 
        
           if (info.routingMode == "packet" && prodTile) { 
        
             auto key = std::make_pair(prodTile.getOperation(), 
        
                                       static_cast<int>(mm2sChannel)); 
        
             state.pktChannelState.isPacketChannel[key] = true; 
        
             // NOTE: usedPacketFallback is NOT set here; explicit packet-mode 
        
             // broadcast is handled below (single multi-dest packet flow).

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 886 to 893 in 3e39cad

    
           } else if (info.routingMode != "cascade" && 
        
                      info.routingMode != "shared_memory" && 
        
                      info.routingMode != "stream") { 
        
             // Circuit DMA exhausted on producer tile — fall back to 
        
             // packet-switched DMA regardless of explicit routing_mode. 
        
             // tryPacketFallback (Step 3.5c) will reuse an existing 
        
             // packet-designated MM2S channel if one exists. 
        
             usedPacketFallback = true;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 895 to 897 in 3e39cad

    
           // Cascade/shared_memory/stream cannot use packet fallback. 
        
           mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
           state.conduitMM2SChannel[name] = mm2sChannel;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 899 in 3e39cad

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 901 to 924 in 3e39cad

    
           // ---- Explicit packet-mode broadcast: single multi-dest flow. ---- 
        
           // For routing_mode="packet", emit one aie.packet_flow with all consumer 
        
           // destinations and a single packet ID.  The switchbox hardware broadcasts 
        
           // each packet to all destinations.  This matches the oracle's behavior 
        
           // (AIEObjectFifoStatefulTransform) where one bdPacket ID is used for all 
        
           // producer MM2S BDs and one packet_flow carries multiple packet_dest ops. 
        
           if (info.routingMode == "packet" && mm2sChannel >= 0 && 
        
               !info.consumerTileCoords.empty()) { 
        
             if (!state.packetIDAllocator) { 
        
               state.module.emitError( 
        
                   "internal error: packetIDAllocator not initialized"); 
        
               state.passFailed = true; 
        
               return; 
        
             } 
        
             // Use pre-allocated aligned block ID if this channel belongs to a fuse 
        
             // group with multiple packet members; otherwise fall back to sequential. 
        
             std::optional<uint8_t> pktID; 
        
             std::string qFG; 
        
             if (!info.fuseGroup.empty()) { 
        
               qFG = state.qualifyFuseGroup(info.fuseGroup, info.deviceIndex); 
        
               auto baseIt = fuseGroupPacketIDBase.find(qFG); 
        
               if (baseIt != fuseGroupPacketIDBase.end()) { 
        
                 unsigned idx = fuseGroupPacketIDNext[qFG]++; 
        
                 pktID = static_cast<uint8_t>(baseIt->second + idx);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 926 in 3e39cad

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 927 to 946 in 3e39cad

    
           if (!pktID) { 
        
             mlir::Value pktDomain = state.getMemTileDomain(prodTileVal); 
        
             pktID = state.packetIDAllocator->allocate(pktDomain); 
        
           } 
        
           if (!pktID) { 
        
             state.passFailed = true; 
        
             return; 
        
           } 
        
           state.conduitPacketID[name] = *pktID; 
        
           auto pktFlow = builder.create<AIE::PacketFlowOp>( 
        
               state.deviceOp.getLoc(), static_cast<int8_t>(*pktID), 
        
               /*keep_pkt_header=*/mlir::BoolAttr{}, 
        
               /*priority_route=*/mlir::BoolAttr{}); 
        
           mlir::Region &region = pktFlow.getPorts(); 
        
           mlir::Block *pktBlock = builder.createBlock(&region); 
        
           builder.setInsertionPointToStart(pktBlock); 
        
           builder.create<AIE::PacketSourceOp>(state.deviceOp.getLoc(), prodTileVal, 
        
                                               AIE::WireBundle::DMA, 
        
                                               static_cast<int32_t>(mm2sChannel));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 948 in 3e39cad

for (unsigned consIdx = 0; consIdx < info.consumerTileCoords.size();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 954 in 3e39cad

AIE::TileOp consTile = state.lookupTileByCoord(consCol, consRow);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 957 in 3e39cad

mlir::Value consTileVal = consTile.getResult();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 959 to 988 in 3e39cad

    
           // Allocate S2MM channel on the consumer tile. 
        
           // Only share S2MM ports between packet channels that have the same 
        
           // dma_channel_group.  Independent packet channels (no group) get 
        
           // separate S2MM ports to prevent data crossover (e.g., Q/K vs V in 
        
           // flash attention — same tile, different data). 
        
           // Effective group key: dma_channel_group_s2mm if set, else 
        
           // dma_channel_group. 
        
           std::string s2mmGrp = state.qualifyFuseGroup( 
        
               !info.fuseGroupS2MM.empty() ? info.fuseGroupS2MM 
        
                                           : info.fuseGroup, 
        
               info.deviceIndex); 
        
           int32_t s2mmChannel; 
        
           bool s2mmShared = false; 
        
           if (!s2mmGrp.empty()) { 
        
             auto it = state.fuseGroupS2MMChannel.find(s2mmGrp); 
        
             if (it != state.fuseGroupS2MMChannel.end()) { 
        
               s2mmChannel = it->second; 
        
               s2mmShared = true; 
        
             } 
        
           } 
        
           if (!s2mmShared) { 
        
             uint32_t maxS2MM_pkt = 2; 
        
             if (state.targetModel) 
        
               maxS2MM_pkt = state.targetModel->getNumDestSwitchboxConnections( 
        
                   static_cast<int>(consCol), static_cast<int>(consRow), 
        
                   AIE::WireBundle::DMA); 
        
             int32_t nextS2MM_pkt = state.tileNextS2MMChannel.count(consTileVal) 
        
                                        ? state.tileNextS2MMChannel[consTileVal] 
        
                                        : 0; 
        
             if (static_cast<uint32_t>(nextS2MM_pkt) >= maxS2MM_pkt) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 990 to 993 in 3e39cad

    
           llvm::Twine("conduit-to-dma: S2MM DMA channel exhausted on " 
        
                       "tile (") + 
        
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_pkt) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 997 to 1059 in 3e39cad

    
                 s2mmChannel = state.tileNextS2MMChannel[consTileVal]++; 
        
                 if (!s2mmGrp.empty()) 
        
                   state.fuseGroupS2MMChannel[s2mmGrp] = s2mmChannel; 
        
               } 
        
               state.conduitConsS2MMChannel[{name, consIdx}] = s2mmChannel; 
        
               // Lock sharing: only share locks when channels share an S2MM port 
        
               // (same dma_channel_group).  Independent channels use separate locks. 
        
               if (s2mmShared) { 
        
                 auto lockIt = state.pktTileS2MMLock.find(consTileVal); 
        
                 if (lockIt != state.pktTileS2MMLock.end()) { 
        
                   info.consumerTileLocks[consTileVal] = { 
        
                       lockIt->second.first.getDefiningOp<AIE::LockOp>(), 
        
                       lockIt->second.second.getDefiningOp<AIE::LockOp>()}; 
        
                 } 
        
               } else if (!s2mmGrp.empty()) { 
        
                 // First in group: record locks for future group members. 
        
                 auto &locks = info.consumerTileLocks[consTileVal]; 
        
                 if (locks.first && locks.second) { 
        
                   state.pktTileS2MMLock[consTileVal] = {locks.first.getResult(), 
        
                                                          locks.second.getResult()}; 
        
                 } 
        
               } 
        
               builder.create<AIE::PacketDestOp>(state.deviceOp.getLoc(), consTileVal, 
        
                                                 AIE::WireBundle::DMA, 
        
                                                 static_cast<int32_t>(s2mmChannel)); 
        
               // Record the emitted port pair so that fuse group partners sharing 
        
               // the same MM2S+S2MM ports do not emit a duplicate circuit flow. 
        
               FlowKey fk{prodTileVal.getAsOpaquePointer(), mm2sChannel, 
        
                          consTileVal.getAsOpaquePointer(), s2mmChannel}; 
        
               emittedFlowPorts.insert(fk); 
        
             } 
        
             builder.create<AIE::EndOp>(state.deviceOp.getLoc()); 
        
             builder.setInsertionPointAfter(pktFlow); 
        
             continue; // skip per-consumer circuit/fallback flow loop 
        
           } 
        
           // ---- Emit flows per consumer. ---- 
        
           for (unsigned consIdx = 0; consIdx < info.consumerTileCoords.size(); 
        
                ++consIdx) { 
        
             auto [consCol, consRow] = info.consumerTileCoords[consIdx]; 
        
             if (consRow == 0) 
        
               continue; 
        
             // For single-consumer conduits, adjacent tiles use shared memory 
        
             // (Phase 3c) — no DMA flow needed.  For broadcast (multi-consumer), 
        
             // Phase 3c is skipped; all consumers use DMA, so flows are needed 
        
             // for every consumer regardless of adjacency. 
        
             // Exception: forceDMA forces DMA even for adjacent tiles. 
        
             // Also: when shim consumers exist, the producer needs DMA MM2S 
        
             // regardless (to reach the shim tile via the switchbox network), 
        
             // so the compute consumer flow must also be emitted. 
        
             if (!info.forceDMA && info.consumerTileCoords.size() == 1 && 
        
                 info.shimConsumerTileCoords.empty()) { 
        
               bool explicitSharedMem = (info.routingMode == "shared_memory"); 
        
               bool rightAdj = state.targetModel->isLegalMemAffinity(prodCol, prodRow, 
        
                                                                     consCol, consRow); 
        
               bool leftAdj = state.targetModel->isLegalMemAffinity(consCol, consRow, 
        
                                                                    prodCol, prodRow); 
        
               if (explicitSharedMem || rightAdj || leftAdj)

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1061 to 1087 in 3e39cad

    
           } 
        
           AIE::TileOp consTile = state.lookupTileByCoord(consCol, consRow); 
        
           if (!consTile) 
        
             continue; 
        
           mlir::Value consTileVal = consTile.getResult(); 
        
           if (usedPacketFallback) { 
        
             // Step 3.5: attempt packet DMA fallback. 
        
             bool ok = 
        
                 tryPacketFallback(state, name, info, prodTileVal, prodCol, prodRow, 
        
                                   consTileVal, consCol, consRow, consIdx); 
        
             if (!ok) { 
        
               // Step 4: all modes exhausted — emit a hard error. 
        
               state.deviceOp.emitError( 
        
                   llvm::Twine("conduit-to-dma: no DMA resources available for " 
        
                               "conduit '") + 
        
                   name + "': circuit DMA MM2S channels exhausted on tile (" + 
        
                   llvm::Twine(prodCol) + "," + llvm::Twine(prodRow) + 
        
                   ") and packet DMA fallback is also ineligible " 
        
                   "(check BD budget, lock budget, and packet flow ID budget)"); 
        
               // B-3 fix: return immediately so the outer conduit loop does not 
        
               // continue processing subsequent conduits with broken state after 
        
               // both circuit DMA and packet fallback have been exhausted. 
        
               state.passFailed = true; 
        
               return;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1089 to 1092 in 3e39cad

    
             // tryPacketFallback records conduitMM2SChannel and 
        
             // conduitConsS2MMChannel internally; skip the circuit path below. 
        
             continue; 
        
           }

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1094 to 1102 in 3e39cad

    
           // S2MM fuse group: reuse existing S2MM channel if another conduit 
        
           // in the same fuse group already allocated one on this tile. 
        
           int32_t s2mmChannel; 
        
           if (!info.fuseGroupS2MM.empty()) { 
        
             std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                         info.deviceIndex); 
        
             auto it = state.fuseGroupS2MMChannel.find(qS2MM); 
        
             if (it != state.fuseGroupS2MMChannel.end()) { 
        
               s2mmChannel = it->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 1104 in 3e39cad

uint32_t maxS2MM_4c = 2;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1116 to 1117 in 3e39cad

    
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_4c) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1122 to 1143 in 3e39cad

    
               state.fuseGroupS2MMChannel[qS2MM] = s2mmChannel; 
        
             } 
        
             std::string bdKey = name + "__s2mm_" + std::to_string(consIdx); 
        
             state.fuseGroupMembers[qS2MM].push_back(bdKey); 
        
           } else { 
        
             // Bounds-check S2MM channels on the consumer tile. 
        
             uint32_t maxS2MM_4c = 2; 
        
             if (state.targetModel) 
        
               maxS2MM_4c = state.targetModel->getNumDestSwitchboxConnections( 
        
                   static_cast<int>(consCol), static_cast<int>(consRow), 
        
                   AIE::WireBundle::DMA); 
        
             int32_t nextS2MM_4c = state.tileNextS2MMChannel.count(consTileVal) 
        
                                       ? state.tileNextS2MMChannel[consTileVal] 
        
                                       : 0; 
        
             if (static_cast<uint32_t>(nextS2MM_4c) >= maxS2MM_4c) { 
        
               state.deviceOp.emitError( 
        
                   llvm::Twine("conduit-to-dma: S2MM DMA channel exhausted on " 
        
                               "tile (") + 
        
                   llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
                   "): all " + llvm::Twine(maxS2MM_4c) + " channels in use"); 
        
               state.passFailed = true; 
        
               return;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1145 to 1157 in 3e39cad

    
             s2mmChannel = state.tileNextS2MMChannel[consTileVal]++; 
        
           } 
        
           state.conduitConsS2MMChannel[{name, consIdx}] = s2mmChannel; 
        
           // Deduplicate: skip if the same source→dest port pair was already 
        
           // emitted by the packet broadcast path or a fuse group partner. 
        
           // This prevents duplicate packet_flow + aie.flow for the same ports 
        
           // when MM2S fuse group members share the same consumer S2MM channel. 
        
           FlowKey fk{prodTileVal.getAsOpaquePointer(), mm2sChannel, 
        
                      consTileVal.getAsOpaquePointer(), s2mmChannel}; 
        
           if (emittedFlowPorts.count(fk)) 
        
             continue; 
        
           emittedFlowPorts.insert(fk);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1159 to 1161 in 3e39cad

    
           state.emitFlow(info.routingMode, prodTileVal, AIE::WireBundle::DMA, 
        
                          mm2sChannel, consTileVal, AIE::WireBundle::DMA, 
        
                          s2mmChannel);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 1163 in 3e39cad

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ObjectFifoToConduit.cpp

Lines 1053 to 1054 in 3e39cad

    
           loc, elemTy, 
        
           mlir::FlatSymbolRefAttr::get(ctx, name));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ObjectFifoToConduit.cpp

Lines 1271 to 1272 in 3e39cad

    
           loc, storedVal, 
        
           mlir::FlatSymbolRefAttr::get(ctx, name));

github-actions

Remaining comments which cannot be posted as a review comment to avoid GitHub Rate Limit

clang-format

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 263 to 264 in 39c4268

    
           static llvm::SmallVector<ArgGroup> 
        
           buildArgGroupsFromSeq(mlir::Block &seqBody) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 276 to 277 in 39c4268

    
           int64_t offset = 
        
               (offsetsAttr && !offsetsAttr.empty()) ? offsetsAttr[0] : 0;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 621 to 622 in 39c4268

    
           auto nameAttr = 
        
               op->getAttrOfType<mlir::FlatSymbolRefAttr>("name");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 653 to 654 in 39c4268

    
           Key k = {alloc.getTile(), 
        
                    static_cast<int>(alloc.getChannelDir())};

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 664 to 665 in 39c4268

    
           if (allocs[idx].getChannelIndex() != 
        
               static_cast<int64_t>(idx))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 912 to 915 in 39c4268

    
           auto computeDeadArgs = []( 
        
               const llvm::SmallVector<ArgGroup> &groups, 
        
               const llvm::StringSet<> &erasedChannels, 
        
               unsigned numOrigArgs) -> llvm::DenseSet<unsigned> {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitFuseOperators.cpp

Lines 934 to 939 in 39c4268

    
           llvm::DenseSet<unsigned> deadA = computeDeadArgs( 
        
               argGroupsA, erasedChannelsA, 
        
               static_cast<unsigned>(origTypesA.size())); 
        
           llvm::DenseSet<unsigned> deadB = computeDeadArgs( 
        
               argGroupsB, erasedChannelsB, 
        
               static_cast<unsigned>(origTypesB.size()));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitInferModes.cpp

Lines 246 to 247 in 39c4268

    
           op.setRoutingModeAttr( 
        
               RoutingModeAttr::get(module.getContext(), RoutingMode::SharedMemory));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitInferModes.cpp

Lines 308 to 309 in 39c4268

    
           unsigned numConsumers = consCoords.empty() ? 1 
        
                                                        : consCoords.size();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitPlaceBuffers.cpp

Line 64 in 39c4268

    
           if (rest.empty() || rest.find_first_not_of("0123456789") != llvm::StringRef::npos)

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitTileInference.cpp

Lines 335 to 336 in 39c4268

    
           if (!createOp->getAttrOfType<mlir::StringAttr>( 
        
                   "dma_channel_group"))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAAlloc.cpp

Line 512 in 39c4268

// Multi-device: allocate into the device that owns the producer tile.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAAlloc.cpp

Lines 666 to 667 in 39c4268

    
           if (targetModel.isMemTile(consCol, consRow) && 
        
               info.putCount > 1 && info.dmaRepeat == 0) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAAlloc.cpp

Lines 763 to 764 in 39c4268

    
           auto consLocks = 
        
               state.allocateLockPair(consTileVal, consPrefix, consNBufs, prodInit);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Lines 137 to 138 in 39c4268

    
           info.producerDimensions = 
        
               mlir::cast<AIE::BDDimLayoutArrayAttr>(*dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Lines 140 to 141 in 39c4268

    
           auto arrayOfArrays = 
        
               mlir::cast<AIE::BDDimLayoutArrayArrayAttr>(*dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Lines 234 to 235 in 39c4268

    
           for (int devIdx = 0; 
        
                devIdx < static_cast<int>(state.deviceOps.size()); ++devIdx) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Lines 421 to 422 in 39c4268

    
           auto arrayOfArrays = 
        
               mlir::cast<AIE::BDDimLayoutArrayArrayAttr>(*dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACollect.cpp

Line 532 in 39c4268

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 128 to 129 in 39c4268

    
           << count << " IDs (aligned to " << p 
        
           << ") but only " << (unsigned)(limit - next) << " IDs remain";

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 672 to 673 in 39c4268

    
           llvm::DenseMap<mlir::Value, 
        
                          std::pair<mlir::Value, mlir::Value>> pktTileS2MMLock;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 744 to 745 in 39c4268

    
           if (activeDevIdx >= 0 && 
        
               activeDevIdx < static_cast<int>(deviceOps.size()))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 877 to 879 in 39c4268

    
           uint32_t maxLocks = targetModel->getNumLocks( 
        
               static_cast<int>(tileOp.getCol()), 
        
               static_cast<int>(tileOp.getRow()));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 881 to 883 in 39c4268

    
           int currentUsed = lockIdCounter.count(tileVal) 
        
                                 ? lockIdCounter[tileVal] 
        
                                 : 0;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 891 to 892 in 39c4268

    
           llvm::Twine(static_cast<int>(maxLocks) - currentUsed) + 
        
           " of " + llvm::Twine(maxLocks) + " remain");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h

Lines 945 to 946 in 39c4268

    
           ConduitInfo *lookupConduit(mlir::StringRef name, 
        
                                      mlir::Operation *contextOp) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 469 to 471 in 39c4268

    
           // perBufLen: number of elements per physical buffer for this source conduit. 
        
           // Prefer numElems (from put_memref_async descriptors), then derive from 
        
           // elemType (e.g. memref<48xi32> → 48), otherwise fall back to 1.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 517 to 518 in 39c4268

    
           if (ConduitInfo *dstInfoR = 
        
                   state.lookupConduit(dstNameR, linkOp.op))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 840 to 841 in 39c4268

    
           state.pktTileS2MMLock[consTileVal] = { 
        
               locks.first.getResult(), locks.second.getResult()};

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 893 to 894 in 39c4268

    
           srcPort = 
        
               state.tileNextMM2SChannel[srcProdTile.getResult()]++;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1232 to 1233 in 39c4268

    
           if (ConduitInfo *dstInfo = 
        
                   state.lookupConduit(dstName2, linkOp.op)) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1856 to 1857 in 39c4268

    
           std::string qFG = state.qualifyFuseGroup(info.fuseGroup, 
        
                                                     info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1916 to 1917 in 39c4268

    
           bdTermBlock = 
        
               (info.dmaRepeat > 0) ? addBlock() : nullptr;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2382 to 2383 in 39c4268

    
           // first conduit emits a dma_start.  Subsequent conduits append BD blocks 
        
           // and the post-pass links all BD chains into a single combined ring.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Line 2398 in 39c4268

std::to_string(consRow) + "_ch" + std::to_string(s2mmChannel),

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2439 to 2440 in 39c4268

    
           // Linear chain condition: either dma_repeat>0 (finite DMA task queue), 
        
           // or putCount>1 with no dmaRepeat (N sequential puts merged by

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2442 to 2444 in 39c4268

    
           isLinearChain = 
        
               (info.dmaRepeat > 0) || 
        
               (info.putCount > 1 && info.dmaRepeat == 0);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2508 to 2509 in 39c4268

    
           builder.create<AIE::NextBDOp>(state.deviceOp.getLoc(), 
        
                                         bdBlocks[0]);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2529 to 2530 in 39c4268

    
           std::string bdKey = 
        
               name + "__s2mm_" + std::to_string(consIdx);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2537 to 2540 in 39c4268

    
           std::string bdKey = 
        
               name + "__s2mm_" + std::to_string(consIdx); 
        
           std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                       info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 729 to 730 in 39c4268

    
           auto ctrKey = 
        
               std::make_tuple(conduitName.str(), col, row, false);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 741 to 742 in 39c4268

    
           initBuilder.create<mlir::arith::ConstantIndexOp>( 
        
               loc, rotationBufSlot);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 755 to 756 in 39c4268

    
           auto ctrKey = 
        
               std::make_tuple(conduitName.str(), col, row, true);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 760 to 761 in 39c4268

    
           initBuilder.setInsertionPointAfterValue( 
        
               resolvedProducerRotationBuf);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 770 to 772 in 39c4268

    
           initBuilder.create<mlir::memref::StoreOp>( 
        
               loc, zero, resolvedProducerRotationBuf, 
        
               mlir::ValueRange{slotIdx});

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1044 to 1047 in 39c4268

    
           // This is the reverse of --dma-task-to-conduit. After --conduit-fuse-operators 
        
           // merges runtime_sequences and eliminates dead block args, the remaining 
        
           // put/get ops correspond 1:1 (positionally) to the runtime_sequence block 
        
           // args.  Each Nth conduit.put_memref/get_memref maps to block arg N.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1063 to 1064 in 39c4268

    
           if (auto cc = alloc->getAttrOfType<mlir::FlatSymbolRefAttr>( 
        
                   "conduit_channel"))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1105 to 1106 in 39c4268

    
           bool isS2MM = 
        
               (conduitToDir[conduitName] == AIE::DMAChannelDir::S2MM);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1117 to 1119 in 39c4268

    
           if (auto dimsAttr = 
        
                   op->getAttrOfType<AIE::BDDimLayoutArrayAttr>( 
        
                       "producer_dimensions"))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1126 to 1129 in 39c4268

    
           mlir::OperationState configState(loc, 
        
                                            "aiex.dma_configure_task_for"); 
        
           configState.addAttribute( 
        
               "alloc", mlir::FlatSymbolRefAttr::get(ctx, allocSym));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1131 to 1132 in 39c4268

    
           configState.addAttribute("issue_token", 
        
                                    builder.getBoolAttr(true));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1144 to 1146 in 39c4268

    
           builder.create<AIE::DMABDOp>( 
        
               loc, bufArg, /*offset=*/0, 
        
               static_cast<int>(numElems), dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1148 to 1150 in 39c4268

    
           builder.create<AIE::DMABDOp>( 
        
               loc, bufArg, /*offset=*/0, 
        
               static_cast<int>(numElems));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1152 to 1153 in 39c4268

    
           bdBlock->back().setAttr("burst_length", 
        
                                   builder.getI32IntegerAttr(0));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1175 to 1176 in 39c4268

    
           mlir::OperationState awaitState(rtSeq.getLoc(), 
        
                                           "aiex.dma_await_task");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1183 to 1184 in 39c4268

    
           mlir::OperationState freeState(rtSeq.getLoc(), 
        
                                          "aiex.dma_free_task");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAPass.cpp

Lines 330 to 339 in 39c4268

    
           // Skip cores that already have link_with or link_files set. 
        
           if (coreOp.getLinkWith() || coreOp.getLinkFiles()) 
        
             return; 
        
           // Check if any func.call inside this core references a function 
        
           // with link_with. 
        
           std::string linkWithValue; 
        
           coreOp.walk([&](mlir::func::CallOp callOp) { 
        
             auto it = funcLinkWith.find(callOp.getCallee()); 
        
             if (it != funcLinkWith.end() && linkWithValue.empty()) 
        
               linkWithValue = it->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAPass.cpp

Lines 341 to 344 in 39c4268

    
             if (!linkWithValue.empty()) 
        
               coreOp.setLinkWithAttr( 
        
                   mlir::StringAttr::get(module.getContext(), linkWithValue)); 
        
           });

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 249 in 39c4268

locks.second.getResult()};

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 434 to 435 in 39c4268

    
           if (!isAIE2 && !info.externalBuffers.empty() && 
        
               !info.noLocks) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 484 to 485 in 39c4268

    
           std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                       info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 529 to 530 in 39c4268

    
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_4a) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 726 in 39c4268

llvm::StringMap<unsigned> fuseGroupPacketIDNext; // qFG → next member index

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 783 to 786 in 39c4268

    
           for (auto &[name, info] : state.conduitMap) { 
        
             // Multi-device: ensure tile lookups target the correct device. 
        
             if (state.isMultiDevice()) 
        
               state.switchToDeviceIndex(info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 788 to 790 in 39c4268

    
           // Pass 0: only packet conduits.  Pass 1: everything else. 
        
           if (flowPass == 0 && info.routingMode != "packet") continue; 
        
           if (flowPass == 1 && info.routingMode == "packet") continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 792 to 807 in 39c4268

    
           if (info.routingMode == "cascade") 
        
             continue; 
        
           if (info.sharedMemory) 
        
             continue; 
        
           if (state.linkSrcNamesEarly.count(name) || 
        
               state.linkJoinSrcNames.count(name)) 
        
             continue; 
        
           // Link destinations: flows are emitted by linkPhase() — skip here to 
        
           // avoid duplicate flows. 
        
           if (state.linkDstNames.count(name)) 
        
             continue; 
        
           auto [prodCol, prodRow] = info.producerTileCoord; 
        
           if (prodCol < 0 || prodRow == 0) 
        
             continue; 
        
           if (info.consumerTileCoords.empty()) 
        
             continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 809 to 812 in 39c4268

    
           AIE::TileOp prodTile = state.lookupTileByCoord(prodCol, prodRow); 
        
           if (!prodTile) 
        
             continue; 
        
           mlir::Value prodTileVal = prodTile.getResult();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 814 to 815 in 39c4268

    
           if (!info.consumerTileBuffers.count(prodTileVal)) 
        
             continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 817 in 39c4268

builder.setInsertionPoint(state.deviceBody->getTerminator());

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 819 to 825 in 39c4268

    
           // ---- Determine hardware MM2S channel count for this tile. ---- 
        
           // Used by the mode=any exhaustion check (Step 3.5). 
        
           uint32_t maxMM2S = 2; // hardware default: 2 MM2S per compute tile 
        
           if (state.targetModel) 
        
             maxMM2S = state.targetModel->getNumSourceSwitchboxConnections( 
        
                 static_cast<int>(prodCol), static_cast<int>(prodRow), 
        
                 AIE::WireBundle::DMA);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 827 to 837 in 39c4268

    
           // ---- Assign MM2S channel (fused groups share a channel). ---- 
        
           // Check if Phase 4b already assigned an MM2S channel for this conduit 
        
           // (happens when the conduit has both compute and shim consumers — the 
        
           // shim consumer flow and the compute consumer flow share the same 
        
           // producer-side MM2S channel as a hardware broadcast). 
        
           int32_t mm2sChannel = -1; 
        
           bool usedPacketFallback = false; 
        
           { 
        
             auto existingIt = state.conduitMM2SChannel.find(name); 
        
             if (existingIt != state.conduitMM2SChannel.end()) { 
        
               mm2sChannel = existingIt->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 839 in 39c4268

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 841 to 876 in 39c4268

    
           if (mm2sChannel >= 0) { 
        
             // Already assigned by Phase 4b — reuse (broadcast from same MM2S port). 
        
           } else if (!info.fuseGroup.empty()) { 
        
             std::string qFG = state.qualifyFuseGroup(info.fuseGroup, 
        
                                                       info.deviceIndex); 
        
             auto it = state.fuseGroupMM2SChannel.find(qFG); 
        
             if (it != state.fuseGroupMM2SChannel.end()) { 
        
               mm2sChannel = it->second; 
        
             } else { 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
               state.fuseGroupMM2SChannel[qFG] = mm2sChannel; 
        
             } 
        
             state.fuseGroupMembers[qFG].push_back(name); 
        
             state.conduitMM2SChannel[name] = mm2sChannel; 
        
           } else if (info.routingMode == "any") { 
        
             // mode=any: check whether a circuit DMA channel is available. 
        
             int32_t nextCh = state.tileNextMM2SChannel.count(prodTileVal) 
        
                                  ? state.tileNextMM2SChannel[prodTileVal] 
        
                                  : 0; 
        
             if (static_cast<uint32_t>(nextCh) < maxMM2S) { 
        
               // A free circuit-mode channel exists — use it. 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
               state.conduitMM2SChannel[name] = mm2sChannel; 
        
             } else { 
        
               // Circuit DMA exhausted; flag that Step 3.5 handles emission below. 
        
               usedPacketFallback = true; 
        
             } 
        
           } else { 
        
             // Check if a circuit DMA channel is available before allocating. 
        
             // If exhausted, fall back to packet-switched DMA for non-cascade/ 
        
             // non-shared-memory channels (extends the mode=any fallback). 
        
             int32_t nextCh = state.tileNextMM2SChannel.count(prodTileVal) 
        
                                  ? state.tileNextMM2SChannel[prodTileVal] 
        
                                  : 0; 
        
             if (static_cast<uint32_t>(nextCh) < maxMM2S) { 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 878 to 884 in 39c4268

    
           // Track packet-mode channel designation for Step 3.5c. 
        
           if (info.routingMode == "packet" && prodTile) { 
        
             auto key = std::make_pair(prodTile.getOperation(), 
        
                                       static_cast<int>(mm2sChannel)); 
        
             state.pktChannelState.isPacketChannel[key] = true; 
        
             // NOTE: usedPacketFallback is NOT set here; explicit packet-mode 
        
             // broadcast is handled below (single multi-dest packet flow).

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 886 to 893 in 39c4268

    
           } else if (info.routingMode != "cascade" && 
        
                      info.routingMode != "shared_memory" && 
        
                      info.routingMode != "stream") { 
        
             // Circuit DMA exhausted on producer tile — fall back to 
        
             // packet-switched DMA regardless of explicit routing_mode. 
        
             // tryPacketFallback (Step 3.5c) will reuse an existing 
        
             // packet-designated MM2S channel if one exists. 
        
             usedPacketFallback = true;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 895 to 897 in 39c4268

    
           // Cascade/shared_memory/stream cannot use packet fallback. 
        
           mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
           state.conduitMM2SChannel[name] = mm2sChannel;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 899 in 39c4268

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 901 to 924 in 39c4268

    
           // ---- Explicit packet-mode broadcast: single multi-dest flow. ---- 
        
           // For routing_mode="packet", emit one aie.packet_flow with all consumer 
        
           // destinations and a single packet ID.  The switchbox hardware broadcasts 
        
           // each packet to all destinations.  This matches the oracle's behavior 
        
           // (AIEObjectFifoStatefulTransform) where one bdPacket ID is used for all 
        
           // producer MM2S BDs and one packet_flow carries multiple packet_dest ops. 
        
           if (info.routingMode == "packet" && mm2sChannel >= 0 && 
        
               !info.consumerTileCoords.empty()) { 
        
             if (!state.packetIDAllocator) { 
        
               state.module.emitError( 
        
                   "internal error: packetIDAllocator not initialized"); 
        
               state.passFailed = true; 
        
               return; 
        
             } 
        
             // Use pre-allocated aligned block ID if this channel belongs to a fuse 
        
             // group with multiple packet members; otherwise fall back to sequential. 
        
             std::optional<uint8_t> pktID; 
        
             std::string qFG; 
        
             if (!info.fuseGroup.empty()) { 
        
               qFG = state.qualifyFuseGroup(info.fuseGroup, info.deviceIndex); 
        
               auto baseIt = fuseGroupPacketIDBase.find(qFG); 
        
               if (baseIt != fuseGroupPacketIDBase.end()) { 
        
                 unsigned idx = fuseGroupPacketIDNext[qFG]++; 
        
                 pktID = static_cast<uint8_t>(baseIt->second + idx);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 926 in 39c4268

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 927 to 946 in 39c4268

    
           if (!pktID) { 
        
             mlir::Value pktDomain = state.getMemTileDomain(prodTileVal); 
        
             pktID = state.packetIDAllocator->allocate(pktDomain); 
        
           } 
        
           if (!pktID) { 
        
             state.passFailed = true; 
        
             return; 
        
           } 
        
           state.conduitPacketID[name] = *pktID; 
        
           auto pktFlow = builder.create<AIE::PacketFlowOp>( 
        
               state.deviceOp.getLoc(), static_cast<int8_t>(*pktID), 
        
               /*keep_pkt_header=*/mlir::BoolAttr{}, 
        
               /*priority_route=*/mlir::BoolAttr{}); 
        
           mlir::Region &region = pktFlow.getPorts(); 
        
           mlir::Block *pktBlock = builder.createBlock(&region); 
        
           builder.setInsertionPointToStart(pktBlock); 
        
           builder.create<AIE::PacketSourceOp>(state.deviceOp.getLoc(), prodTileVal, 
        
                                               AIE::WireBundle::DMA, 
        
                                               static_cast<int32_t>(mm2sChannel));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 948 in 39c4268

for (unsigned consIdx = 0; consIdx < info.consumerTileCoords.size();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 954 in 39c4268

AIE::TileOp consTile = state.lookupTileByCoord(consCol, consRow);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 957 in 39c4268

mlir::Value consTileVal = consTile.getResult();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 959 to 988 in 39c4268

    
           // Allocate S2MM channel on the consumer tile. 
        
           // Only share S2MM ports between packet channels that have the same 
        
           // dma_channel_group.  Independent packet channels (no group) get 
        
           // separate S2MM ports to prevent data crossover (e.g., Q/K vs V in 
        
           // flash attention — same tile, different data). 
        
           // Effective group key: dma_channel_group_s2mm if set, else 
        
           // dma_channel_group. 
        
           std::string s2mmGrp = state.qualifyFuseGroup( 
        
               !info.fuseGroupS2MM.empty() ? info.fuseGroupS2MM 
        
                                           : info.fuseGroup, 
        
               info.deviceIndex); 
        
           int32_t s2mmChannel; 
        
           bool s2mmShared = false; 
        
           if (!s2mmGrp.empty()) { 
        
             auto it = state.fuseGroupS2MMChannel.find(s2mmGrp); 
        
             if (it != state.fuseGroupS2MMChannel.end()) { 
        
               s2mmChannel = it->second; 
        
               s2mmShared = true; 
        
             } 
        
           } 
        
           if (!s2mmShared) { 
        
             uint32_t maxS2MM_pkt = 2; 
        
             if (state.targetModel) 
        
               maxS2MM_pkt = state.targetModel->getNumDestSwitchboxConnections( 
        
                   static_cast<int>(consCol), static_cast<int>(consRow), 
        
                   AIE::WireBundle::DMA); 
        
             int32_t nextS2MM_pkt = state.tileNextS2MMChannel.count(consTileVal) 
        
                                        ? state.tileNextS2MMChannel[consTileVal] 
        
                                        : 0; 
        
             if (static_cast<uint32_t>(nextS2MM_pkt) >= maxS2MM_pkt) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 990 to 993 in 39c4268

    
           llvm::Twine("conduit-to-dma: S2MM DMA channel exhausted on " 
        
                       "tile (") + 
        
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_pkt) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 997 to 1059 in 39c4268

    
                 s2mmChannel = state.tileNextS2MMChannel[consTileVal]++; 
        
                 if (!s2mmGrp.empty()) 
        
                   state.fuseGroupS2MMChannel[s2mmGrp] = s2mmChannel; 
        
               } 
        
               state.conduitConsS2MMChannel[{name, consIdx}] = s2mmChannel; 
        
               // Lock sharing: only share locks when channels share an S2MM port 
        
               // (same dma_channel_group).  Independent channels use separate locks. 
        
               if (s2mmShared) { 
        
                 auto lockIt = state.pktTileS2MMLock.find(consTileVal); 
        
                 if (lockIt != state.pktTileS2MMLock.end()) { 
        
                   info.consumerTileLocks[consTileVal] = { 
        
                       lockIt->second.first.getDefiningOp<AIE::LockOp>(), 
        
                       lockIt->second.second.getDefiningOp<AIE::LockOp>()}; 
        
                 } 
        
               } else if (!s2mmGrp.empty()) { 
        
                 // First in group: record locks for future group members. 
        
                 auto &locks = info.consumerTileLocks[consTileVal]; 
        
                 if (locks.first && locks.second) { 
        
                   state.pktTileS2MMLock[consTileVal] = {locks.first.getResult(), 
        
                                                          locks.second.getResult()}; 
        
                 } 
        
               } 
        
               builder.create<AIE::PacketDestOp>(state.deviceOp.getLoc(), consTileVal, 
        
                                                 AIE::WireBundle::DMA, 
        
                                                 static_cast<int32_t>(s2mmChannel)); 
        
               // Record the emitted port pair so that fuse group partners sharing 
        
               // the same MM2S+S2MM ports do not emit a duplicate circuit flow. 
        
               FlowKey fk{prodTileVal.getAsOpaquePointer(), mm2sChannel, 
        
                          consTileVal.getAsOpaquePointer(), s2mmChannel}; 
        
               emittedFlowPorts.insert(fk); 
        
             } 
        
             builder.create<AIE::EndOp>(state.deviceOp.getLoc()); 
        
             builder.setInsertionPointAfter(pktFlow); 
        
             continue; // skip per-consumer circuit/fallback flow loop 
        
           } 
        
           // ---- Emit flows per consumer. ---- 
        
           for (unsigned consIdx = 0; consIdx < info.consumerTileCoords.size(); 
        
                ++consIdx) { 
        
             auto [consCol, consRow] = info.consumerTileCoords[consIdx]; 
        
             if (consRow == 0) 
        
               continue; 
        
             // For single-consumer conduits, adjacent tiles use shared memory 
        
             // (Phase 3c) — no DMA flow needed.  For broadcast (multi-consumer), 
        
             // Phase 3c is skipped; all consumers use DMA, so flows are needed 
        
             // for every consumer regardless of adjacency. 
        
             // Exception: forceDMA forces DMA even for adjacent tiles. 
        
             // Also: when shim consumers exist, the producer needs DMA MM2S 
        
             // regardless (to reach the shim tile via the switchbox network), 
        
             // so the compute consumer flow must also be emitted. 
        
             if (!info.forceDMA && info.consumerTileCoords.size() == 1 && 
        
                 info.shimConsumerTileCoords.empty()) { 
        
               bool explicitSharedMem = (info.routingMode == "shared_memory"); 
        
               bool rightAdj = state.targetModel->isLegalMemAffinity(prodCol, prodRow, 
        
                                                                     consCol, consRow); 
        
               bool leftAdj = state.targetModel->isLegalMemAffinity(consCol, consRow, 
        
                                                                    prodCol, prodRow); 
        
               if (explicitSharedMem || rightAdj || leftAdj)

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1061 to 1087 in 39c4268

    
           } 
        
           AIE::TileOp consTile = state.lookupTileByCoord(consCol, consRow); 
        
           if (!consTile) 
        
             continue; 
        
           mlir::Value consTileVal = consTile.getResult(); 
        
           if (usedPacketFallback) { 
        
             // Step 3.5: attempt packet DMA fallback. 
        
             bool ok = 
        
                 tryPacketFallback(state, name, info, prodTileVal, prodCol, prodRow, 
        
                                   consTileVal, consCol, consRow, consIdx); 
        
             if (!ok) { 
        
               // Step 4: all modes exhausted — emit a hard error. 
        
               state.deviceOp.emitError( 
        
                   llvm::Twine("conduit-to-dma: no DMA resources available for " 
        
                               "conduit '") + 
        
                   name + "': circuit DMA MM2S channels exhausted on tile (" + 
        
                   llvm::Twine(prodCol) + "," + llvm::Twine(prodRow) + 
        
                   ") and packet DMA fallback is also ineligible " 
        
                   "(check BD budget, lock budget, and packet flow ID budget)"); 
        
               // B-3 fix: return immediately so the outer conduit loop does not 
        
               // continue processing subsequent conduits with broken state after 
        
               // both circuit DMA and packet fallback have been exhausted. 
        
               state.passFailed = true; 
        
               return;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1089 to 1092 in 39c4268

    
             // tryPacketFallback records conduitMM2SChannel and 
        
             // conduitConsS2MMChannel internally; skip the circuit path below. 
        
             continue; 
        
           }

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1094 to 1102 in 39c4268

    
           // S2MM fuse group: reuse existing S2MM channel if another conduit 
        
           // in the same fuse group already allocated one on this tile. 
        
           int32_t s2mmChannel; 
        
           if (!info.fuseGroupS2MM.empty()) { 
        
             std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                         info.deviceIndex); 
        
             auto it = state.fuseGroupS2MMChannel.find(qS2MM); 
        
             if (it != state.fuseGroupS2MMChannel.end()) { 
        
               s2mmChannel = it->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 1104 in 39c4268

uint32_t maxS2MM_4c = 2;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1116 to 1117 in 39c4268

    
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_4c) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1122 to 1143 in 39c4268

    
               state.fuseGroupS2MMChannel[qS2MM] = s2mmChannel; 
        
             } 
        
             std::string bdKey = name + "__s2mm_" + std::to_string(consIdx); 
        
             state.fuseGroupMembers[qS2MM].push_back(bdKey); 
        
           } else { 
        
             // Bounds-check S2MM channels on the consumer tile. 
        
             uint32_t maxS2MM_4c = 2; 
        
             if (state.targetModel) 
        
               maxS2MM_4c = state.targetModel->getNumDestSwitchboxConnections( 
        
                   static_cast<int>(consCol), static_cast<int>(consRow), 
        
                   AIE::WireBundle::DMA); 
        
             int32_t nextS2MM_4c = state.tileNextS2MMChannel.count(consTileVal) 
        
                                       ? state.tileNextS2MMChannel[consTileVal] 
        
                                       : 0; 
        
             if (static_cast<uint32_t>(nextS2MM_4c) >= maxS2MM_4c) { 
        
               state.deviceOp.emitError( 
        
                   llvm::Twine("conduit-to-dma: S2MM DMA channel exhausted on " 
        
                               "tile (") + 
        
                   llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
                   "): all " + llvm::Twine(maxS2MM_4c) + " channels in use"); 
        
               state.passFailed = true; 
        
               return;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1145 to 1157 in 39c4268

    
             s2mmChannel = state.tileNextS2MMChannel[consTileVal]++; 
        
           } 
        
           state.conduitConsS2MMChannel[{name, consIdx}] = s2mmChannel; 
        
           // Deduplicate: skip if the same source→dest port pair was already 
        
           // emitted by the packet broadcast path or a fuse group partner. 
        
           // This prevents duplicate packet_flow + aie.flow for the same ports 
        
           // when MM2S fuse group members share the same consumer S2MM channel. 
        
           FlowKey fk{prodTileVal.getAsOpaquePointer(), mm2sChannel, 
        
                      consTileVal.getAsOpaquePointer(), s2mmChannel}; 
        
           if (emittedFlowPorts.count(fk)) 
        
             continue; 
        
           emittedFlowPorts.insert(fk);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1159 to 1161 in 39c4268

    
           state.emitFlow(info.routingMode, prodTileVal, AIE::WireBundle::DMA, 
        
                          mm2sChannel, consTileVal, AIE::WireBundle::DMA, 
        
                          s2mmChannel);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 1163 in 39c4268

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ObjectFifoToConduit.cpp

Lines 1053 to 1054 in 39c4268

    
           loc, elemTy, 
        
           mlir::FlatSymbolRefAttr::get(ctx, name));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ObjectFifoToConduit.cpp

Lines 1271 to 1272 in 39c4268

    
           loc, storedVal, 
        
           mlir::FlatSymbolRefAttr::get(ctx, name));

github-actions

Remaining comments which cannot be posted as a review comment to avoid GitHub Rate Limit

clang-format

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1232 to 1233 in 27b72c1

    
           if (ConduitInfo *dstInfo = 
        
                   state.lookupConduit(dstName2, linkOp.op)) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1856 to 1857 in 27b72c1

    
           std::string qFG = state.qualifyFuseGroup(info.fuseGroup, 
        
                                                     info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 1916 to 1917 in 27b72c1

    
           bdTermBlock = 
        
               (info.dmaRepeat > 0) ? addBlock() : nullptr;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2382 to 2383 in 27b72c1

    
           // first conduit emits a dma_start.  Subsequent conduits append BD blocks 
        
           // and the post-pass links all BD chains into a single combined ring.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Line 2398 in 27b72c1

std::to_string(consRow) + "_ch" + std::to_string(s2mmChannel),

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2439 to 2440 in 27b72c1

    
           // Linear chain condition: either dma_repeat>0 (finite DMA task queue), 
        
           // or putCount>1 with no dmaRepeat (N sequential puts merged by

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2442 to 2444 in 27b72c1

    
           isLinearChain = 
        
               (info.dmaRepeat > 0) || 
        
               (info.putCount > 1 && info.dmaRepeat == 0);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2508 to 2509 in 27b72c1

    
           builder.create<AIE::NextBDOp>(state.deviceOp.getLoc(), 
        
                                         bdBlocks[0]);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2529 to 2530 in 27b72c1

    
           std::string bdKey = 
        
               name + "__s2mm_" + std::to_string(consIdx);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALink.cpp

Lines 2537 to 2540 in 27b72c1

    
           std::string bdKey = 
        
               name + "__s2mm_" + std::to_string(consIdx); 
        
           std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                       info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 729 to 730 in 27b72c1

    
           auto ctrKey = 
        
               std::make_tuple(conduitName.str(), col, row, false);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 741 to 742 in 27b72c1

    
           initBuilder.create<mlir::arith::ConstantIndexOp>( 
        
               loc, rotationBufSlot);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 755 to 756 in 27b72c1

    
           auto ctrKey = 
        
               std::make_tuple(conduitName.str(), col, row, true);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 760 to 761 in 27b72c1

    
           initBuilder.setInsertionPointAfterValue( 
        
               resolvedProducerRotationBuf);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 770 to 772 in 27b72c1

    
           initBuilder.create<mlir::memref::StoreOp>( 
        
               loc, zero, resolvedProducerRotationBuf, 
        
               mlir::ValueRange{slotIdx});

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1044 to 1047 in 27b72c1

    
           // This is the reverse of --dma-task-to-conduit. After --conduit-fuse-operators 
        
           // merges runtime_sequences and eliminates dead block args, the remaining 
        
           // put/get ops correspond 1:1 (positionally) to the runtime_sequence block 
        
           // args.  Each Nth conduit.put_memref/get_memref maps to block arg N.

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1063 to 1064 in 27b72c1

    
           if (auto cc = alloc->getAttrOfType<mlir::FlatSymbolRefAttr>( 
        
                   "conduit_channel"))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1105 to 1106 in 27b72c1

    
           bool isS2MM = 
        
               (conduitToDir[conduitName] == AIE::DMAChannelDir::S2MM);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1117 to 1119 in 27b72c1

    
           if (auto dimsAttr = 
        
                   op->getAttrOfType<AIE::BDDimLayoutArrayAttr>( 
        
                       "producer_dimensions"))

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1126 to 1129 in 27b72c1

    
           mlir::OperationState configState(loc, 
        
                                            "aiex.dma_configure_task_for"); 
        
           configState.addAttribute( 
        
               "alloc", mlir::FlatSymbolRefAttr::get(ctx, allocSym));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1131 to 1132 in 27b72c1

    
           configState.addAttribute("issue_token", 
        
                                    builder.getBoolAttr(true));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1144 to 1146 in 27b72c1

    
           builder.create<AIE::DMABDOp>( 
        
               loc, bufArg, /*offset=*/0, 
        
               static_cast<int>(numElems), dims);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1148 to 1150 in 27b72c1

    
           builder.create<AIE::DMABDOp>( 
        
               loc, bufArg, /*offset=*/0, 
        
               static_cast<int>(numElems));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1152 to 1153 in 27b72c1

    
           bdBlock->back().setAttr("burst_length", 
        
                                   builder.getI32IntegerAttr(0));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1175 to 1176 in 27b72c1

    
           mlir::OperationState awaitState(rtSeq.getLoc(), 
        
                                           "aiex.dma_await_task");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMALower.cpp

Lines 1183 to 1184 in 27b72c1

    
           mlir::OperationState freeState(rtSeq.getLoc(), 
        
                                          "aiex.dma_free_task");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAPass.cpp

Lines 330 to 339 in 27b72c1

    
           // Skip cores that already have link_with or link_files set. 
        
           if (coreOp.getLinkWith() || coreOp.getLinkFiles()) 
        
             return; 
        
           // Check if any func.call inside this core references a function 
        
           // with link_with. 
        
           std::string linkWithValue; 
        
           coreOp.walk([&](mlir::func::CallOp callOp) { 
        
             auto it = funcLinkWith.find(callOp.getCallee()); 
        
             if (it != funcLinkWith.end() && linkWithValue.empty()) 
        
               linkWithValue = it->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMAPass.cpp

Lines 341 to 344 in 27b72c1

    
             if (!linkWithValue.empty()) 
        
               coreOp.setLinkWithAttr( 
        
                   mlir::StringAttr::get(module.getContext(), linkWithValue)); 
        
           });

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 249 in 27b72c1

locks.second.getResult()};

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 434 to 435 in 27b72c1

    
           if (!isAIE2 && !info.externalBuffers.empty() && 
        
               !info.noLocks) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 484 to 485 in 27b72c1

    
           std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                       info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 529 to 530 in 27b72c1

    
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_4a) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 726 in 27b72c1

llvm::StringMap<unsigned> fuseGroupPacketIDNext; // qFG → next member index

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 783 to 786 in 27b72c1

    
           for (auto &[name, info] : state.conduitMap) { 
        
             // Multi-device: ensure tile lookups target the correct device. 
        
             if (state.isMultiDevice()) 
        
               state.switchToDeviceIndex(info.deviceIndex);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 788 to 790 in 27b72c1

    
           // Pass 0: only packet conduits.  Pass 1: everything else. 
        
           if (flowPass == 0 && info.routingMode != "packet") continue; 
        
           if (flowPass == 1 && info.routingMode == "packet") continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 792 to 807 in 27b72c1

    
           if (info.routingMode == "cascade") 
        
             continue; 
        
           if (info.sharedMemory) 
        
             continue; 
        
           if (state.linkSrcNamesEarly.count(name) || 
        
               state.linkJoinSrcNames.count(name)) 
        
             continue; 
        
           // Link destinations: flows are emitted by linkPhase() — skip here to 
        
           // avoid duplicate flows. 
        
           if (state.linkDstNames.count(name)) 
        
             continue; 
        
           auto [prodCol, prodRow] = info.producerTileCoord; 
        
           if (prodCol < 0 || prodRow == 0) 
        
             continue; 
        
           if (info.consumerTileCoords.empty()) 
        
             continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 809 to 812 in 27b72c1

    
           AIE::TileOp prodTile = state.lookupTileByCoord(prodCol, prodRow); 
        
           if (!prodTile) 
        
             continue; 
        
           mlir::Value prodTileVal = prodTile.getResult();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 814 to 815 in 27b72c1

    
           if (!info.consumerTileBuffers.count(prodTileVal)) 
        
             continue;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 817 in 27b72c1

builder.setInsertionPoint(state.deviceBody->getTerminator());

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 819 to 825 in 27b72c1

    
           // ---- Determine hardware MM2S channel count for this tile. ---- 
        
           // Used by the mode=any exhaustion check (Step 3.5). 
        
           uint32_t maxMM2S = 2; // hardware default: 2 MM2S per compute tile 
        
           if (state.targetModel) 
        
             maxMM2S = state.targetModel->getNumSourceSwitchboxConnections( 
        
                 static_cast<int>(prodCol), static_cast<int>(prodRow), 
        
                 AIE::WireBundle::DMA);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 827 to 837 in 27b72c1

    
           // ---- Assign MM2S channel (fused groups share a channel). ---- 
        
           // Check if Phase 4b already assigned an MM2S channel for this conduit 
        
           // (happens when the conduit has both compute and shim consumers — the 
        
           // shim consumer flow and the compute consumer flow share the same 
        
           // producer-side MM2S channel as a hardware broadcast). 
        
           int32_t mm2sChannel = -1; 
        
           bool usedPacketFallback = false; 
        
           { 
        
             auto existingIt = state.conduitMM2SChannel.find(name); 
        
             if (existingIt != state.conduitMM2SChannel.end()) { 
        
               mm2sChannel = existingIt->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 839 in 27b72c1

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 841 to 876 in 27b72c1

    
           if (mm2sChannel >= 0) { 
        
             // Already assigned by Phase 4b — reuse (broadcast from same MM2S port). 
        
           } else if (!info.fuseGroup.empty()) { 
        
             std::string qFG = state.qualifyFuseGroup(info.fuseGroup, 
        
                                                       info.deviceIndex); 
        
             auto it = state.fuseGroupMM2SChannel.find(qFG); 
        
             if (it != state.fuseGroupMM2SChannel.end()) { 
        
               mm2sChannel = it->second; 
        
             } else { 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
               state.fuseGroupMM2SChannel[qFG] = mm2sChannel; 
        
             } 
        
             state.fuseGroupMembers[qFG].push_back(name); 
        
             state.conduitMM2SChannel[name] = mm2sChannel; 
        
           } else if (info.routingMode == "any") { 
        
             // mode=any: check whether a circuit DMA channel is available. 
        
             int32_t nextCh = state.tileNextMM2SChannel.count(prodTileVal) 
        
                                  ? state.tileNextMM2SChannel[prodTileVal] 
        
                                  : 0; 
        
             if (static_cast<uint32_t>(nextCh) < maxMM2S) { 
        
               // A free circuit-mode channel exists — use it. 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
               state.conduitMM2SChannel[name] = mm2sChannel; 
        
             } else { 
        
               // Circuit DMA exhausted; flag that Step 3.5 handles emission below. 
        
               usedPacketFallback = true; 
        
             } 
        
           } else { 
        
             // Check if a circuit DMA channel is available before allocating. 
        
             // If exhausted, fall back to packet-switched DMA for non-cascade/ 
        
             // non-shared-memory channels (extends the mode=any fallback). 
        
             int32_t nextCh = state.tileNextMM2SChannel.count(prodTileVal) 
        
                                  ? state.tileNextMM2SChannel[prodTileVal] 
        
                                  : 0; 
        
             if (static_cast<uint32_t>(nextCh) < maxMM2S) { 
        
               mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 878 to 884 in 27b72c1

    
           // Track packet-mode channel designation for Step 3.5c. 
        
           if (info.routingMode == "packet" && prodTile) { 
        
             auto key = std::make_pair(prodTile.getOperation(), 
        
                                       static_cast<int>(mm2sChannel)); 
        
             state.pktChannelState.isPacketChannel[key] = true; 
        
             // NOTE: usedPacketFallback is NOT set here; explicit packet-mode 
        
             // broadcast is handled below (single multi-dest packet flow).

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 886 to 893 in 27b72c1

    
           } else if (info.routingMode != "cascade" && 
        
                      info.routingMode != "shared_memory" && 
        
                      info.routingMode != "stream") { 
        
             // Circuit DMA exhausted on producer tile — fall back to 
        
             // packet-switched DMA regardless of explicit routing_mode. 
        
             // tryPacketFallback (Step 3.5c) will reuse an existing 
        
             // packet-designated MM2S channel if one exists. 
        
             usedPacketFallback = true;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 895 to 897 in 27b72c1

    
           // Cascade/shared_memory/stream cannot use packet fallback. 
        
           mm2sChannel = state.tileNextMM2SChannel[prodTileVal]++; 
        
           state.conduitMM2SChannel[name] = mm2sChannel;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 899 in 27b72c1

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 901 to 924 in 27b72c1

    
           // ---- Explicit packet-mode broadcast: single multi-dest flow. ---- 
        
           // For routing_mode="packet", emit one aie.packet_flow with all consumer 
        
           // destinations and a single packet ID.  The switchbox hardware broadcasts 
        
           // each packet to all destinations.  This matches the oracle's behavior 
        
           // (AIEObjectFifoStatefulTransform) where one bdPacket ID is used for all 
        
           // producer MM2S BDs and one packet_flow carries multiple packet_dest ops. 
        
           if (info.routingMode == "packet" && mm2sChannel >= 0 && 
        
               !info.consumerTileCoords.empty()) { 
        
             if (!state.packetIDAllocator) { 
        
               state.module.emitError( 
        
                   "internal error: packetIDAllocator not initialized"); 
        
               state.passFailed = true; 
        
               return; 
        
             } 
        
             // Use pre-allocated aligned block ID if this channel belongs to a fuse 
        
             // group with multiple packet members; otherwise fall back to sequential. 
        
             std::optional<uint8_t> pktID; 
        
             std::string qFG; 
        
             if (!info.fuseGroup.empty()) { 
        
               qFG = state.qualifyFuseGroup(info.fuseGroup, info.deviceIndex); 
        
               auto baseIt = fuseGroupPacketIDBase.find(qFG); 
        
               if (baseIt != fuseGroupPacketIDBase.end()) { 
        
                 unsigned idx = fuseGroupPacketIDNext[qFG]++; 
        
                 pktID = static_cast<uint8_t>(baseIt->second + idx);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 926 in 27b72c1

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 927 to 946 in 27b72c1

    
           if (!pktID) { 
        
             mlir::Value pktDomain = state.getMemTileDomain(prodTileVal); 
        
             pktID = state.packetIDAllocator->allocate(pktDomain); 
        
           } 
        
           if (!pktID) { 
        
             state.passFailed = true; 
        
             return; 
        
           } 
        
           state.conduitPacketID[name] = *pktID; 
        
           auto pktFlow = builder.create<AIE::PacketFlowOp>( 
        
               state.deviceOp.getLoc(), static_cast<int8_t>(*pktID), 
        
               /*keep_pkt_header=*/mlir::BoolAttr{}, 
        
               /*priority_route=*/mlir::BoolAttr{}); 
        
           mlir::Region &region = pktFlow.getPorts(); 
        
           mlir::Block *pktBlock = builder.createBlock(&region); 
        
           builder.setInsertionPointToStart(pktBlock); 
        
           builder.create<AIE::PacketSourceOp>(state.deviceOp.getLoc(), prodTileVal, 
        
                                               AIE::WireBundle::DMA, 
        
                                               static_cast<int32_t>(mm2sChannel));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 948 in 27b72c1

for (unsigned consIdx = 0; consIdx < info.consumerTileCoords.size();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 954 in 27b72c1

AIE::TileOp consTile = state.lookupTileByCoord(consCol, consRow);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 957 in 27b72c1

mlir::Value consTileVal = consTile.getResult();

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 959 to 988 in 27b72c1

    
           // Allocate S2MM channel on the consumer tile. 
        
           // Only share S2MM ports between packet channels that have the same 
        
           // dma_channel_group.  Independent packet channels (no group) get 
        
           // separate S2MM ports to prevent data crossover (e.g., Q/K vs V in 
        
           // flash attention — same tile, different data). 
        
           // Effective group key: dma_channel_group_s2mm if set, else 
        
           // dma_channel_group. 
        
           std::string s2mmGrp = state.qualifyFuseGroup( 
        
               !info.fuseGroupS2MM.empty() ? info.fuseGroupS2MM 
        
                                           : info.fuseGroup, 
        
               info.deviceIndex); 
        
           int32_t s2mmChannel; 
        
           bool s2mmShared = false; 
        
           if (!s2mmGrp.empty()) { 
        
             auto it = state.fuseGroupS2MMChannel.find(s2mmGrp); 
        
             if (it != state.fuseGroupS2MMChannel.end()) { 
        
               s2mmChannel = it->second; 
        
               s2mmShared = true; 
        
             } 
        
           } 
        
           if (!s2mmShared) { 
        
             uint32_t maxS2MM_pkt = 2; 
        
             if (state.targetModel) 
        
               maxS2MM_pkt = state.targetModel->getNumDestSwitchboxConnections( 
        
                   static_cast<int>(consCol), static_cast<int>(consRow), 
        
                   AIE::WireBundle::DMA); 
        
             int32_t nextS2MM_pkt = state.tileNextS2MMChannel.count(consTileVal) 
        
                                        ? state.tileNextS2MMChannel[consTileVal] 
        
                                        : 0; 
        
             if (static_cast<uint32_t>(nextS2MM_pkt) >= maxS2MM_pkt) {

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 990 to 993 in 27b72c1

    
           llvm::Twine("conduit-to-dma: S2MM DMA channel exhausted on " 
        
                       "tile (") + 
        
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_pkt) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 997 to 1059 in 27b72c1

    
                 s2mmChannel = state.tileNextS2MMChannel[consTileVal]++; 
        
                 if (!s2mmGrp.empty()) 
        
                   state.fuseGroupS2MMChannel[s2mmGrp] = s2mmChannel; 
        
               } 
        
               state.conduitConsS2MMChannel[{name, consIdx}] = s2mmChannel; 
        
               // Lock sharing: only share locks when channels share an S2MM port 
        
               // (same dma_channel_group).  Independent channels use separate locks. 
        
               if (s2mmShared) { 
        
                 auto lockIt = state.pktTileS2MMLock.find(consTileVal); 
        
                 if (lockIt != state.pktTileS2MMLock.end()) { 
        
                   info.consumerTileLocks[consTileVal] = { 
        
                       lockIt->second.first.getDefiningOp<AIE::LockOp>(), 
        
                       lockIt->second.second.getDefiningOp<AIE::LockOp>()}; 
        
                 } 
        
               } else if (!s2mmGrp.empty()) { 
        
                 // First in group: record locks for future group members. 
        
                 auto &locks = info.consumerTileLocks[consTileVal]; 
        
                 if (locks.first && locks.second) { 
        
                   state.pktTileS2MMLock[consTileVal] = {locks.first.getResult(), 
        
                                                          locks.second.getResult()}; 
        
                 } 
        
               } 
        
               builder.create<AIE::PacketDestOp>(state.deviceOp.getLoc(), consTileVal, 
        
                                                 AIE::WireBundle::DMA, 
        
                                                 static_cast<int32_t>(s2mmChannel)); 
        
               // Record the emitted port pair so that fuse group partners sharing 
        
               // the same MM2S+S2MM ports do not emit a duplicate circuit flow. 
        
               FlowKey fk{prodTileVal.getAsOpaquePointer(), mm2sChannel, 
        
                          consTileVal.getAsOpaquePointer(), s2mmChannel}; 
        
               emittedFlowPorts.insert(fk); 
        
             } 
        
             builder.create<AIE::EndOp>(state.deviceOp.getLoc()); 
        
             builder.setInsertionPointAfter(pktFlow); 
        
             continue; // skip per-consumer circuit/fallback flow loop 
        
           } 
        
           // ---- Emit flows per consumer. ---- 
        
           for (unsigned consIdx = 0; consIdx < info.consumerTileCoords.size(); 
        
                ++consIdx) { 
        
             auto [consCol, consRow] = info.consumerTileCoords[consIdx]; 
        
             if (consRow == 0) 
        
               continue; 
        
             // For single-consumer conduits, adjacent tiles use shared memory 
        
             // (Phase 3c) — no DMA flow needed.  For broadcast (multi-consumer), 
        
             // Phase 3c is skipped; all consumers use DMA, so flows are needed 
        
             // for every consumer regardless of adjacency. 
        
             // Exception: forceDMA forces DMA even for adjacent tiles. 
        
             // Also: when shim consumers exist, the producer needs DMA MM2S 
        
             // regardless (to reach the shim tile via the switchbox network), 
        
             // so the compute consumer flow must also be emitted. 
        
             if (!info.forceDMA && info.consumerTileCoords.size() == 1 && 
        
                 info.shimConsumerTileCoords.empty()) { 
        
               bool explicitSharedMem = (info.routingMode == "shared_memory"); 
        
               bool rightAdj = state.targetModel->isLegalMemAffinity(prodCol, prodRow, 
        
                                                                     consCol, consRow); 
        
               bool leftAdj = state.targetModel->isLegalMemAffinity(consCol, consRow, 
        
                                                                    prodCol, prodRow); 
        
               if (explicitSharedMem || rightAdj || leftAdj)

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1061 to 1087 in 27b72c1

    
           } 
        
           AIE::TileOp consTile = state.lookupTileByCoord(consCol, consRow); 
        
           if (!consTile) 
        
             continue; 
        
           mlir::Value consTileVal = consTile.getResult(); 
        
           if (usedPacketFallback) { 
        
             // Step 3.5: attempt packet DMA fallback. 
        
             bool ok = 
        
                 tryPacketFallback(state, name, info, prodTileVal, prodCol, prodRow, 
        
                                   consTileVal, consCol, consRow, consIdx); 
        
             if (!ok) { 
        
               // Step 4: all modes exhausted — emit a hard error. 
        
               state.deviceOp.emitError( 
        
                   llvm::Twine("conduit-to-dma: no DMA resources available for " 
        
                               "conduit '") + 
        
                   name + "': circuit DMA MM2S channels exhausted on tile (" + 
        
                   llvm::Twine(prodCol) + "," + llvm::Twine(prodRow) + 
        
                   ") and packet DMA fallback is also ineligible " 
        
                   "(check BD budget, lock budget, and packet flow ID budget)"); 
        
               // B-3 fix: return immediately so the outer conduit loop does not 
        
               // continue processing subsequent conduits with broken state after 
        
               // both circuit DMA and packet fallback have been exhausted. 
        
               state.passFailed = true; 
        
               return;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1089 to 1092 in 27b72c1

    
             // tryPacketFallback records conduitMM2SChannel and 
        
             // conduitConsS2MMChannel internally; skip the circuit path below. 
        
             continue; 
        
           }

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1094 to 1102 in 27b72c1

    
           // S2MM fuse group: reuse existing S2MM channel if another conduit 
        
           // in the same fuse group already allocated one on this tile. 
        
           int32_t s2mmChannel; 
        
           if (!info.fuseGroupS2MM.empty()) { 
        
             std::string qS2MM = state.qualifyFuseGroup(info.fuseGroupS2MM, 
        
                                                         info.deviceIndex); 
        
             auto it = state.fuseGroupS2MMChannel.find(qS2MM); 
        
             if (it != state.fuseGroupS2MMChannel.end()) { 
        
               s2mmChannel = it->second;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 1104 in 27b72c1

uint32_t maxS2MM_4c = 2;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1116 to 1117 in 27b72c1

    
           llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
           "): all " + llvm::Twine(maxS2MM_4c) + " channels in use");

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1122 to 1143 in 27b72c1

    
               state.fuseGroupS2MMChannel[qS2MM] = s2mmChannel; 
        
             } 
        
             std::string bdKey = name + "__s2mm_" + std::to_string(consIdx); 
        
             state.fuseGroupMembers[qS2MM].push_back(bdKey); 
        
           } else { 
        
             // Bounds-check S2MM channels on the consumer tile. 
        
             uint32_t maxS2MM_4c = 2; 
        
             if (state.targetModel) 
        
               maxS2MM_4c = state.targetModel->getNumDestSwitchboxConnections( 
        
                   static_cast<int>(consCol), static_cast<int>(consRow), 
        
                   AIE::WireBundle::DMA); 
        
             int32_t nextS2MM_4c = state.tileNextS2MMChannel.count(consTileVal) 
        
                                       ? state.tileNextS2MMChannel[consTileVal] 
        
                                       : 0; 
        
             if (static_cast<uint32_t>(nextS2MM_4c) >= maxS2MM_4c) { 
        
               state.deviceOp.emitError( 
        
                   llvm::Twine("conduit-to-dma: S2MM DMA channel exhausted on " 
        
                               "tile (") + 
        
                   llvm::Twine(consCol) + "," + llvm::Twine(consRow) + 
        
                   "): all " + llvm::Twine(maxS2MM_4c) + " channels in use"); 
        
               state.passFailed = true; 
        
               return;

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1145 to 1157 in 27b72c1

    
             s2mmChannel = state.tileNextS2MMChannel[consTileVal]++; 
        
           } 
        
           state.conduitConsS2MMChannel[{name, consIdx}] = s2mmChannel; 
        
           // Deduplicate: skip if the same source→dest port pair was already 
        
           // emitted by the packet broadcast path or a fuse group partner. 
        
           // This prevents duplicate packet_flow + aie.flow for the same ports 
        
           // when MM2S fuse group members share the same consumer S2MM channel. 
        
           FlowKey fk{prodTileVal.getAsOpaquePointer(), mm2sChannel, 
        
                      consTileVal.getAsOpaquePointer(), s2mmChannel}; 
        
           if (emittedFlowPorts.count(fk)) 
        
             continue; 
        
           emittedFlowPorts.insert(fk);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Lines 1159 to 1161 in 27b72c1

    
           state.emitFlow(info.routingMode, prodTileVal, AIE::WireBundle::DMA, 
        
                          mm2sChannel, consTileVal, AIE::WireBundle::DMA, 
        
                          s2mmChannel);

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ConduitToDMARoute.cpp

Line 1163 in 27b72c1

}

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ObjectFifoToConduit.cpp

Lines 1053 to 1054 in 27b72c1

    
           loc, elemTy, 
        
           mlir::FlatSymbolRefAttr::get(ctx, name));

[clang-format] _{reported by reviewdog 🐶}

mlir-aie/lib/Dialect/Conduit/Transforms/ObjectFifoToConduit.cpp

Lines 1271 to 1272 in 27b72c1

    
           loc, storedVal, 
        
           mlir::FlatSymbolRefAttr::get(ctx, name));

- Remove stale XFAIL markers from passC_delta tests (now passing) - ConduitToDMALink: clean up iter_count repeat_count computation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Lit suite: 163 PASS (was 158). Hardware validated: all three bottleneck variants match the oracle at 0.33-0.34 ms/iter on npu1. Fix 1 (Pass A): released window reuse — ObjectFifoToConduit.cpp findWindowInDominatingBlock incorrectly reused a preamble acquire window that had been released before a loop body, suppressing AcquireGreaterEqual inside the loop and causing deadlock. Fix: track releasedWindows; skip them. Regression test: objectfifo_produce_acquire_in_loop_body.mlir Fix 2 (Pass C delta): Produce acquire delta=0 in nested scopes — ConduitToDMALower.cpp Cross-block state reset propagated lastAcquireCount as heldCount for all ports. For Produce channels (no DMA pre-fill), heldCount must not be reset. Fix: only reset heldCount=lastAcquireCount for Consume-port channels. Regression test: passC_produce_acquire_loop_delta.mlir Fix 3 (Pass C dispatch): PEANO scf.index_switch bug — ConduitToDMALower.cpp PEANO (llvm-aie v20.0.0, commit 0e7cfc0e) generates incorrect .data lookup tables for scf.index_switch with modular indices. For depth=4 at counter=3: (3+1)%4=0 loaded buff_3 instead of buff_0, causing lock-free concurrent DMA+core access and hardware fault for N_middle>=6 sliding windows. Fix: replace scf::IndexSwitchOp with nested scf::IfOp chain (cf.cond_br), which PEANO compiles correctly without lookup tables. Regression tests: passC_no_index_switch.mlir, passC_no_index_switch_regression.mlir Supporting: rotation counter uses memref.alloca (not aie.buffer) — aie.buffer caused init stores and accesses to disagree on the storage address due to linker-script symbol vs .data section offset mismatch. Updated 7 lit tests for scf.if output format; passC_rotation_counter_aie_buffer.mlir updated to check for memref.alloca. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

- Conduit Python bindings: ConduitBinding.td, conduit.py, CMake wiring, CAPI registration (lib/CAPI/Dialects.h + Dialects.cpp). All 18 ops importable via aie.dialects.conduit. API: create_(), Acquire, Release, Link, SubviewAccess with real MLIR type system. - benchmarks/yolov8n/: ObjectFifo + Conduit stubs (both compile clean), yolov8n_full_conduit.mlir (layers 0-4, 4-column npu2 layout), scalar kernels with correct Quark AdaRound requant shifts, CPU references, Makefile + README + hw_constraints analysis. - weights/conv{00..63}_*.npy: all 64 Conv QDQ int8 weights + scales from yolov8n_VINT8_adaround_npu.onnx (per-tensor symmetric, zp=0, pow2 scales). - yolov8n_full_plan.md: 8-column npu2 tiling plan, OC-parallel strategy. - CLAUDE_PHOENIX.md: session state + next-session todo list. - In progress at session end: layers 5-9 MLIR, neck MLIR, full-resolution 512x512 OC-merge verification (test_oc_merge.mlir). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

## Pass C fixes (ConduitToDMALink.cpp, ConduitToDMAPass.cpp, ConduitToDMARoute.cpp) 1. MemTile relay join sources: Phase 3j buffer allocation + Phase 5 flow emission for MemTile→MemTile relay channels used as join sources (previously caused silent miscompilation) 2. BD parity pool check: verifyMemTileBDParity() after linkPhase() — AIE2 MemTiles partition 48 BDs into even-channel (0-23) and odd-channel (24-47) pools of 24 each; this check catches violations at compile time instead of crashing aiecc 3. S2MM channel overflow check: bounds check at all 4 tileNextS2MMChannel++ sites using getNumDestSwitchboxConnections() — prevents silent generation of invalid DMA:6/7 channel indices that crash aiecc pathfinder ## New lit tests (9 total) conduit_oc_merge_join, conduit_memsame_column_relay, conduit_acquire_async_sliding_window, conduit_full_resolution_element_types, conduit_memtile_relay_join_source, conduit_bd_parity_{error,ok}, conduit_s2mm_channel_overflow, conduit_memtile_channel_overflow Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Phase 5.5 (join-source MM2S path) created aie.mem blocks without registering them in tileToDMARegion. When the same compute tile was also a broadcast consumer (processed later by Phase 5.5 consumer S2MM path), Phase 5.5 could not find the existing region and created a second aie.mem for the same tile. aiecc silently discards the second block, causing a hardware deadlock. Fix: before creating a new aie.mem, check tileToDMARegion. If an existing region is found, append the MM2S chain into it rather than creating a duplicate. Also removed ~80 lines of dead code: the linkSrcNamesEarly distribute-source path in Phase 5.5 was proven unreachable. Regression test: conduit_broadcast_join_single_mem.mlir Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…ates arith.remui is a software divide; AIE2 has no hardware divide instruction. Since the rotation counter is always in [0, depth-1] and the release delta is at most depth, the sum counter+delta < 2*depth always holds. One conditional subtract (or a single AND for power-of-2 depths) is sufficient. Add emitFastModulo lambda in ConduitToDMALower.cpp: - Power-of-2 depth d: arith.andi(sum, d-1) — single instruction - General depth d: arith.cmpi uge + arith.subi + arith.select — branchless Apply at all 5 sites: 4 Release counter updates (sync+async, Consume+Produce) and the subview_access offset computation. Update 8 lit tests: all arith.remui CHECK patterns updated to andi (power-of-2) or cmpi+subi+select (general). Add conduit_to_dma_rotation_modulo_general.mlir to explicitly test and assert the general-depth branchless path (depth=3). 174/174 Conduit lit PASS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Three locations in linkPhase() append new DMA channels into an existing aie.mem region by finding its aie.end block and erasing it. Previously these used `if (endBlock)` guards that silently dropped the entire channel emission if the end block was absent (which indicates a malformed IR). Replace each silent skip with an `assert` that fires immediately with a diagnostic message: - Join-source MM2S append (Phase 5.5, line ~973) - Case B compute→shim MM2S append (Phase 5.5, line ~1333) - Case C fused/unfused MM2S append (Phase 5.5, line ~1140) The Phase 5.5 S2MM path (existingEndBlock) correctly uses if/else and is not affected by this change. 174/174 Conduit lit PASS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

1. conduit_to_dma_multi_consumer_shim.mlir — Phase 4b multiConsumer path: conduit with BOTH a compute consumer AND a shim consumer. Exercises the indexed lock suffix (_cons_0 / _cons_1), conduitMM2SChannel allocation by Phase 4b, and MM2S channel reuse by Phase 4.5a for both flows. 2. objectfifo_disable_sync_distribute_src.mlir — disable_synchronization on the distribute link SOURCE (line 154 guard in ConduitToDMALink.cpp: `if (isDistribute && !srcInfo.disableSynchronization)`). The existing disable_sync_link test only covers the join DESTINATION side. This test confirms no MemTile slice locks are emitted for the source but BD chains and destination fifo locks are still present. 3. conduit_to_dma_shim_consumer_npu2.mlir — Phase 4b (compute→shim) on npu2 (AIE2p, Strix). Confirms shim-side lock init=0, correct naming, and flow emission on the AIE2p target model. Existing shim_lock_init test uses npu1_1col only. 177/177 Conduit lit PASS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

P0 fixes (block any public release): - ConduitToDMALink.cpp: remove 9 llvm::errs() DEBUG prints from Phase 5 distribute/join S2MM channel assignment logic. These fired unconditionally on every --conduit-to-dma invocation. - ConduitToDMALink.cpp: replace 3 assert(endBlock) IR-validity checks with emitError + passFailed + return in the Case B, Case C, and join-source aie.mem append paths. Crashes become graceful diagnostics. - ConduitToDMACommon.h: replace assert(buffer) in emitBDBlock with mlir::emitError + early return. Null buffer is now a diagnostic. P1 correctness fixes: - ObjectFifoToConduit.cpp: fix cascade Produce release handler pairing. The forward scan for the matching acquire now stops at the release op (break on &scan == op) instead of scanning the entire block and picking the last acquire. Before this fix, two acquire/release pairs for the same cascade fifo in one block caused a use-after-free crash: the first release claimed the second acquire (wrong), erased it, and the second release then walked over the freed op. - ConduitDepthPromotion.cpp: fix memory budget pre-population using actual conduit depth instead of hardcoded 2. The old code charged perSlotBytes*2 for every existing conduit regardless of depth, causing the budget check to undercount memory for depth>2 conduits and allowing invalid promotions that exceed tile memory. Regression tests (11 new files): - objectfifo_cascade_two_pairs_same_block.mlir: two acquire/release pairs for the same cascade fifo in one block — would crash before the fix - depth_promote_memory_budget_actual_depth.mlir: depth-4 existing conduit fills tile budget; candidate must be rejected (not promoted) - invalid_cascade_verifiers.mlir: 5 verifier error cases (non-cascade conduit ref, float type, wrong width, cascade link mode error) - invalid_m4_dynamic_dim.mlir: M4 dynamic dimension warning on create - invalid_subview_type_mismatch.mlir: M2 result type mismatch - invalid_release_async_escape.mlir: M10 escape via return + call - invalid_get_memref_async_escape.mlir: M10 escape via return - invalid_wait_all_async_m8c.mlir: M8c non-token operand + M10 escape - invalid_link_forward_offsets.mlir: forward mode with 0 srcs / 0 dsts - invalid_link_csdf_join.mlir: join unannotated skip + balanced PASS - invalid_link_csdf_distribute.mlir: Denolf Eq. 48 skip + PASS paths Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…-approved) All 10 Section A fixes from the synthesized audit plan, reviewed and approved by code-reviewer before merge. Each fix has a corresponding regression test. A-1: Case B MM2S channel hardcoded to 0 (ConduitToDMALink.cpp) Both Case B branches now dynamically allocate MM2S channels via tileNextMM2SChannel[prodTileVal]++ with bounds check. Previously, when two conduits shared a compute→shim path, both got channel 0 and one was silently discarded by aiecc, causing hardware deadlock. A-2: Phase 4.5 shim allocation: only first consumer processed (ObjectFifoToConduit.cpp) Removed early `break` from shim consumer loop. Now emits one aie.shim_dma_allocation per shim tile; multi-shim names get _0, _1, ... suffixes. Previously only the first shim consumer in a broadcast got an allocation. A-4: Pass B dynamic-stride error leaves air op in IR (AirChannelToConduit.cpp) Error path now adds the op to putGetToErase before signalPassFailure() so it is cleaned up. Previously the op survived into mixed-state IR. A-5: Pass B channel decl erased with remaining uses (AirChannelToConduit.cpp) Phase 5 now errors+no-erase when symbolKnownUseEmpty is false. Previously it warned and erased anyway, producing dangling symbol references. A-6: AirChannelIndexFlattener erases decls on failure (AirChannelIndexFlattener.cpp) Added passFailed bool tracking. Phase 4 decl erase guarded by !passFailed. Previously, a dynamic-index error in Phase 3 still allowed Phase 4 to erase the original decls, leaving unrewritten put/get ops with dangling refs. A-7: SubviewAccess from WaitWindow always uses Port::Consume (ConduitToDMALower.cpp) WaitWindow-driven SubviewAccess now derives port by comparing the enclosing CoreOp tile against conduit's producerTileCoord. Previously produce-side async programs selected the consumer rotation counter, causing wrong buffer selection. A-8: emitFastModulo no guard for release count > depth (ConduitToDMALower.cpp) All 4 emitFastModulo call sites now have a count>depth guard emitting a hard error. Previously a release with count>depth silently produced a wrong rotation counter value (single-subtract insufficient for counter+delta>=2*depth). A-9: wait_window name not verified against token channel (ConduitOps.cpp) WaitWindow::verify() now traces the defining acquire_async op and confirms channel names match. Previously wait_window %tok for "wrong_chan" compiled silently and lowered with incorrect lock operations. A-10: cascade channels not rejected in distribute/join links (ConduitOps.cpp) Link::verify() now rejects cascade-routed channels as srcs/dsts in distribute or join links. Previously this produced wrong-architecture DMA configurations silently. Regression tests (8 new files, all reviewer-approved): conduit_case_b_mm2s_channel_distinct.mlir (A-1, full objectfifo pipeline) objectfifo_two_shim_consumers.mlir (A-2) air_channel_dynamic_stride_error.mlir (A-4+A-5) air_channel_flatten_no_erase_on_failure.mlir (A-6) conduit_subview_waitwindow_produce_port.mlir (A-7) conduit_release_count_exceeds_depth_error.mlir (A-8) invalid_wait_window_name_mismatch.mlir (A-9) invalid_cascade_link_distribute.mlir (A-10) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Fix 1 (4 test failures): ConduitToDMALink.cpp Case B MM2S double-allocation. Both Case B branches (append-existing and create-new) now check conduitMM2SChannel[name] before allocating a new MM2S channel, reusing the channel already set by routePhase for shim-consumer conduits. The A-1 fix had incremented tileNextMM2SChannel unconditionally, causing channel 0 to be over-counted and subsequent conduits to overflow the 2-channel limit. Fix 2 (1 test failure): AirChannelToConduit.cpp removed putGetToErase.push_back(op) from error paths. The op's SSA token result was still used by downstream air.wait_all ops; erasing it caused LLVM assertion "operation destroyed but still has uses". Pass failure is already signaled; leaving the op in place is safe since the pipeline aborts. Fix 3 (1 test failure): conduit_release_count_exceeds_depth_error.mlir CHECK pattern updated to match actual M8 verifier message format. Fix 4 (1 test failure): conduit_subview_waitwindow_produce_port.mlir CHECK sym_name updated from fifo_prod_buff_0 to fifo_buff_0 to match Pass C's actual buffer naming for compute-to-compute producer paths. All 203 lit tests now pass: 203/203 PASS / 0 FAIL / 0 XFAIL. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

… tests 10 reviewer-approved fixes from synthesized audit plan Section B. B-4 and B-6 deferred (cascade design pending). B-1 (ConduitToDMALower): Produce-port heldCount reset to 0 at loop entry. B-2 (ConduitToDMALink): Record join relay S2MM in conduitConsS2MMChannel. B-3 (ConduitToDMARoute): 6 passFailed+continue→return sites; documentation. B-5 (ConduitOps, Conduit.td): AnyAttr blocklist verifier + documentation. B-7 (AirChannelToConduit): Hard error for rank≥3 memref operands. B-8 (ObjectFifoToConduit): Reject via_cascade+aie_stream combination. B-9 (ConduitToDMAPass): passFailed protocol comment. B-10 (ConduitToDMAPass): verifyMemTileBDParity() AIE2-only guard. B-11 (ConduitToDMACommon.h): resolveForTile() DeviceOp sentinel. New pass: --aie-check-cascade-pairing (AIECheckCascadePairing.cpp) Validates aie.cascade_flow ↔ aie.put_cascade / aie.get_cascade pairing. Enables future removal of cascade ops from the Conduit dialect. Registered in AIEPasses.h/.td and CMakeLists.txt. Regression tests (9 new, all reviewer-approved). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…AIE2 updates B-3 additional regression test (reviewer-approved): 4 objectfifos on a 2-channel shim tile; of3 triggers S2MM overflow (expected-error); of4 proves the loop exits (return, not continue) by producing no second error. Updated conduit_bd_parity_{error,ok}.mlir to note they require AIE2 target (per B-10: verifyMemTileBDParity() now skips for non-AIE2 architectures). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…ease channels When acquire_count > release_count (sliding-window pattern), Pass C was allocating only `depth` physical buffers and initializing prod_lock to `depth`, causing hardware deadlock at iteration 2: consumer holds maxAcquire slots, releases minRelease, DMA fills minRelease, but prod_lock never reaches maxAcquire again. Fix: Phase 2.6 in collectPhase scans Consume-port acquire/release pairs via the window SSA def-use chain and computes the maximum pairwise sliding overhead (acquireCount - releaseCount). ConduitInfo.slidingWindowOverhead stores this value. nConsumerBuffers() = depth + slidingWindowOverhead. Updated sites: - ConduitToDMAAlloc.cpp: normal consumer loop + Phase 3c shared-mem path use nConsumerBuffers() for buffer count and prod_lock init. - ConduitToDMALink.cpp: Case A S2MM BD ring uses nBufs BD blocks (not depth). - ConduitToDMALower.cpp: release counter modulo uses nConsumerBuffers() for both sync Release and async ReleaseAsync paths. CSDF patterns (acquire N, release N — fully balanced) are unaffected: the sliding overhead is 0 for those, so nConsumerBuffers() == depth. Regression test: conduit_partial_release_buffers.mlir depth=2, acquire=3, release=1 → 4 buffers, prod_lock init=4, 4-block BD ring. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Rejects programs that mix Tier 2 (acquire/release) and Tier 3 (get_memref/put_memref) ops for the same channel name within the same aie.core region. Without this check, the rotation counter invariant breaks silently, causing incorrect physical buffer addressing at runtime. Cross-endpoint mixing (T3 on shim, T2 on compute) remains valid and is the canonical mixed-tier pattern. Implementation details: - Walks each aie.core region, maintaining per-core T2 and T3 channel maps - Fires one error per channel per core (alreadyErrored set suppresses duplicate reports from Release/SubviewAccess tracing the same window) - Release (blocking) has no name attr — traces back through window SSA value to the defining Acquire or WaitWindow to recover the channel name - 7 test cases: 3 failing (acquire+get, reversed order, async variants) and 4 passing (cross-endpoint, cross-core, T2-only, T3-only) Lit suite: 205 PASS / 1 pre-existing FAIL (static_strides, unrelated) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Pass C's linkPhase previously hard-rejected conduit.link ops where the relay tile is a compute tile (not a MemTile), causing 4 positive corpus tests to fail. This adds a CoreTile relay path that: 1. Creates S2MM + MM2S DMA chains on the relay tile's aie.mem block using the src conduit's pre-allocated relay buffers and locks. 2. Allocates relay locks when missing (shim-to-relay case where Phase 3 does not create consumer-side locks on the relay tile). 3. Emits aie.flow from relay tile MM2S → consumer tile S2MM. Phase 3c in allocPhase is updated to skip link-dst conduits from shared-memory detection. Link dst conduits receive data via DMA from the relay, not directly from the producer tile, so adjacent-tile shared-memory detection is incorrect for them. Also fixes a missing #include for llvm/ADT/StringSet.h in ConduitCheckTiers.cpp that caused a build error in the M-12 verifier. Corpus: 108/130 → 113/130 PASS Tests fixed: link_via_shared_mem.mlir, link_via_shared_mem2.mlir, link_via_shared_mem3.mlir, link_via_shared_mem_diff_memref.mlir, duplicate_link_test.mlir (compute-tile relay now handled) Remaining: 17 failures = 16 negative/error tests + 1 AIE1 hard-error Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…e+1) Previous formula (depth + acq - rel) over-allocated: depth=4, acquire=3 yielded 6 buffers instead of 4, exceeding 32KB tile memory budget. Correct formula: max(depth, maxConsumerAcquire + 1) — minimum buffers needed is max_acquire + 1 (window + 1 DMA slot), capped at depth if depth is already sufficient. Rename slidingWindowOverhead → maxConsumerAcquire in ConduitInfo to reflect the new semantics: we track the maximum acquire count directly (not the overhead), since the formula uses it as max(depth, count+1). Also add hard error for Produce-port partial-release when maxProdAcquire > depth (silent under-allocation; not yet supported). Cases where maxProdAcquire <= depth are safe (effectiveDepth handles them). New lit tests: - conduit_partial_release_buffers.mlir: updated comments to document both the old (wrong) and new (correct) formulas; depth=2, acquire=3 case still allocates 4 buffers (max(2,4)=4, same as old formula coincidentally) - conduit_partial_release_depth4.mlir: new test for depth=4, acquire=3 → max(4,4)=4 buffers (NOT 6 from the wrong formula) Bottleneck investigation: conduit_direct_bottleneck.mlir compiles correctly. inRows (depth=4, maxAcquire=3): max(4,4)=4 buffers, prod_lock init=4. Lock deltas: preamble AcquireGreaterEqual(2), middle loop AGE(1), tail AGE(1). S2MM BD ring covers all 4 buffers. IR looks correct; correct=False on hardware is likely a runtime issue (PEANO codegen or test harness), not a Pass C bug. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Broadcast channels use capacity = product(broadcast_shape) for fan-out, not buffer slots. M7 would misinterpret this as buffer capacity. Guard: skip rate annotation when channel is detected as broadcast. SPSC channels with infer-rates=true correctly get producer_rates/consumer_rates; broadcast channels are left unannotated. New lit test: air_channel_infer_rates_broadcast_guard.mlir (PASS). Lit suite: 207 PASS / 2 FAIL (2 pre-existing failures unchanged). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Phase 1.5 was merging acquire counts from all consumer cores per fifo name via module.walk, producing incorrect multi-rate sequences for multi-consumer fifos (e.g., core A: always 1, core B: always 2 → spurious [1,2] CSDF pattern). Fix: track per-(fifoName, CoreOp) using a pair key. Each core's acquire sequence is collected independently. For single-consumer fifos, the one core's sequence is used as accessPattern; for multi-consumer fifos, no merged pattern is emitted (the old merge was invalid). Add --objectfifo-to-conduit=infer-rates=true option (default false): - Single-consumer fifos: attaches producer_rates from Produce-port release counts and consumer_rates from the single consumer's Consume-port acquire counts, enabling M6/M7 CSDF verification. - Multi-consumer fifos: skipped with an op.emitRemark; rates are ambiguous because each consumer has its own per-core rate sequence. New regression test: objectfifo_csdf_infer_rates.mlir verifies: (a) single-consumer gets rates annotated correctly (b) multi-consumer emits a remark without attaching rates (c) default (infer-rates=false) produces no rate annotations Lit suite: 208 PASS / 1 pre-existing FAIL (air_channel_to_conduit_static_strides.mlir — unrelated pre-existing) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

plio was added as an untyped ad-hoc attribute bypassing ODS. Now declared as OptionalAttr<BoolAttr>:$plio on conduit.create with verifier check (plio only valid on shim tiles). Removes two stale comment lines claiming "P1-F rejects repeat_count > 1" which was never implemented. MUST-before-upstream: ad-hoc attributes violate MLIR dialect conventions. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

A dropped !conduit.window.token (no wait_window or wait_all use) leaves the hardware lock permanently acquired, causing deadlock. Add a 3-line use_empty() check in AcquireAsync::verify(). Lit test: 1 new test PASS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…ifier The plio verifier incorrectly required producer_tile to be the shim endpoint. plio objectfifos can flow in either direction (shim→compute or compute→shim), so the verifier now checks that at least one endpoint is at row 0: either producer_tile row==0 or shim_consumer_tiles non-empty. Adds plio_attribute.mlir with three cases: (a) shim producer (producer_tile row=0) — PASS (b) compute→shim (shim_consumer_tiles set, row=0) — PASS (c) no shim endpoint at all — ERROR (expected-error verified) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Cross-tier temporal fusion works for Tier 3 depth=1 (universal in Pass A/B output). Tier 3 with depth>1 has a multi-block BD ring that would create ordering conflicts when chained into a fuse group. Add guard with remark. Hand-authored IR is the only path to this case. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

… ring count Three bug fixes (commits 6c3fdb5, 2455feb) lacked triggering lit tests: - conduit_produce_sliding_window_error.mlir: Produce-port acq > depth errors - conduit_core_tile_relay.mlir: CoreTile relay lowers to two flows + BD chains - conduit_bd_ring_count.mlir: S2MM BD ring covers all nConsumerBuffers() slots Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Replaces OptionalAttr<StrAttr> with typed ODS enums for both attributes: - RoutingMode enum: circuit | packet | cascade | stream (absent = unresolved; "any" sentinel removed per C-4 decision) - LinkMode enum: distribute | join | forward Both enums follow the Conduit_Port pattern with EnumAttr assemblyFormat "`<` $value `>`" producing syntax like #conduit.routing_mode<circuit>. C++ changes: - ConduitOps.cpp: Link::verify() and Create::verify() use enum comparisons; cascade verifier error message updated to enum syntax; M5 string check removed (ODS parser enforces valid values at parse time) - ObjectFifoToConduit.cpp: emits RoutingModeAttr / LinkModeAttr directly - AirChannelToConduit.cpp: same; cascade check uses RoutingMode::Cascade - ConduitToDMACollect.cpp: reads routing_mode enum via stringifyRoutingMode; absent routing_mode → "any" internally so Pass C Step 3.5 still fires - ConduitToDMALink.cpp: getMode() returns LinkMode enum - ConduitInferModes.cpp: absent routing_mode = unresolved; uses typed setRoutingModeAttr() setter for all resolution outputs - ConduitDepthPromotion.cpp: reads routing_mode via typed accessor Test updates: 49 .mlir files updated to use enum syntax for all routing_mode and mode attributes. routing_mode="any" inputs removed (absent = unresolved). MUST-before-upstream: type enforcement is required for all cross-op attrs in MLIR dialect upstreaming reviews. Lit suite: 214/215 PASS (1 pre-existing failure: air_channel_to_conduit_static_strides.mlir) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…ndtrip Dedicated test for C-4 enum migration (routing_mode + link_mode attrs): - Case 1: all four routing_mode values (circuit/packet/cascade/stream) parse and roundtrip via the new #conduit.routing_mode<X> enum syntax - Case 2: all three link_mode values (distribute/join/forward) parse and roundtrip via the new #conduit.link_mode<X> enum syntax - Case 3: absent routing_mode (unresolved) is valid and produces no error Negative cases (invalid string → ODS parser error) are in invalid.mlir: @bad_routing_mode (routing_mode = "broadcast") and @bad_unknown_mode (mode = "relay"). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…ease_async conduit.acquire_async now requires an explicit port attribute (Conduit_PortAttr:$port), removing the implicit "always Consume" assumption. Pass C lowering (Step 8a) reads op.getPort() instead of hardcoding Port::Consume, enabling both Consume and Produce async acquisitions through the deferred-lock path. conduit.release_async gains an optional Conduit_WindowType:$window SSA operand. When present, the verifier confirms: (1) operand type is !conduit.window<T> (2) if the defining op is conduit.acquire or conduit.wait_window, its channel name matches the $name attribute The name-only path (no $window) remains valid for standalone producer-side async releases where no prior acquire SSA value is in scope. Assembly formats: With window: release_async(%win : type) {attrs} : !conduit.window.token Without window: release_async {attrs} : !conduit.window.token ConduitLivenessCheck updated to also accept release_async with $window as a valid M11 release path (direct use-list check complements name lookup). Updated 12 existing test files to add required port attr to acquire_async. Updated roundtrip.mlir to use SSA window form on release_async. New test: conduit_async_ops_improved.mlir (5 cases, all pass). Lit suite: 202 PASS / 18 FAIL (all failures pre-existing from C-4/C-2/C-7 work in progress). Zero regressions from C-6 changes. SHOULD-before-upstream: breaks post-upstream API liability for acquire_async callers that omitted port; makes release_async safety-checkable by type. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

…Buffers The existing nConsumerBuffers override (putCount > 1 && dmaRepeat == 0 => allocate putCount slots) is correct only when the consumer's gets sit OUTSIDE a loop. Inside a loop, the per-iteration re-acquire pattern means standard depth-promoted slot count is the right answer, not putCount — over-allocation in the in-loop case wastes BD chain budget and triggers spurious BD-chain-overflow diagnostics on otherwise-fine shapes. Adds a consumerGetsInLoop bool on ConduitInfo, populated in collectPhase by walking GetMemrefAsync / Acquire(Consume) / AcquireAsync and checking getParentOfType<LoopLikeOpInterface>(). nConsumerBuffers now requires !consumerGetsInLoop alongside the existing predicates. This is Tier 0 of the broader BD-chain-overflow audit (Sprint N+2 Task #14): consumer-side S2MM fixed. Case B (compute MM2S) and Join MM2S still need the targetModel.getNumBDs() cap helper from Task #15 — their BUG fixtures get comment updates documenting why this fix does NOT flip them yet, so the next person reading them isn't misled. New canonical case-A fixture pins the success shape post-fix. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…eded ArithProgression canon was producing BD chains with arbitrary outer-dim depth, then Pass C would refuse to emit the chain when length exceeded the target tile's dim cap (3 for compute, 4 for MemTile/Shim per AIEDialect.cpp:2233-2236 + AIEDMATasksToNPU.cpp:347-350). Late-pipeline diagnostic, confusing to debug. Add tileBDDimCap() helper in CanonicalizeChannelPutsUtils. Gate both tryCollapseArithPuts and tryCollapseArithGets on kExisting + 1 <= worstDimCap across all participating tiles. Refuses cleanly with a diagnostic naming the channel + worst-cap. Pinned by canon_arith_progression_dim_cap_refuse.mlir (4-dim shim->compute scenario; expected-warning + CHECK-NOT). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

Implements Track 3 Phase 2 per CLAUDE.md USER-LOCKED 2026-04-26. Extends --conduit-fuse-operators (does NOT add a sibling pass) to merge K producers writing to the same fusion_group. Consumer-side IR shape is K separate fused_intermediate_N channels, preserving Pass C's single-producer assumption. Mechanism: - ObjectFifoToConduit propagates fusion_index : i32 from aie.objectfifo onto conduit.create. Used to pair K producers with K consumer-side inputs in fan-in shapes. Discardable; absent on non-convergent IR. - ConduitFuseOperators gains: getFusionIndex helper, fgInfo pre-scan, Q2 mixed-fg rejection diagnostic, K-stable convergentNameMap reservation by (fg, fusion_index) pair, classifyConvergent tri-state for non-adjacent producer/consumer, devB-selection swap, fg+fg-index pair matching with element-type fallback gated OFF for convergent participants (cross-element-type SCOPED OUT for initial landing per the locked decision; revisit after renameChannelRefs LOW bug fix). - --i / devBIdx bookkeeping fix for the non-adjacent erase case. Lit fixtures: K=2 basic (PASS), K=3 three-producers (XFAIL pinning the Phase 3 shape pre-impl), LCM rate-align (XFAIL), memory-overflow placeholder (XFAIL), mixed-fg rejection (PASS via --verify-diagnostics). Note: this commit is the IMPLEMENTATION. Earlier commit 4c0611e landed the lit-NPU smoke for an already-shipped K=2 capability that was incorrectly described as such — PLAN.md is being updated in the conduit-notes top-level repo to reflect the actual landing sequence. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

include/aie/Dialect/Conduit/Transforms/ConduitToDMACommon.h was added in 308a048 but no source includes it — the private lib/Dialect/Conduit/Transforms/ConduitToDMACommon.h is the actually- used header. Verified by repo-wide grep for the include path: zero matches across all .cpp / .h / .td / CMakeLists.txt. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

Pre-commit hook (.pre-commit-config.yaml clang-format-17.0.6) flagged violations in the unpushed commits between origin/conduit-dialect and HEAD. Apply clang-format-17 to the union of .cpp/.h files touched by those commits. Pure whitespace reflow, semantic no-op. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…cer-tile groups ConduitFuseChannels was annotating cross-producer S2MM groups with dma_channel_group_s2mm + fuse_mode_s2mm, but Pass C routePhase Sub-case 4a then emits per-conduit aie.flow ops without honoring the grouping. Two flows targeting the same destination port (consumer-tile DMA:N) make aie-routing reject the duplicate-dst circuit connect. This was blocking fuse_channels_npu + fuse_hybrid_swiglu_npu lit-NPU smokes from passing on hardware. Path c (locked design ~25 LOC): pre-scan nameToGroup, mark any group whose member channels have differing producer tiles as cross-producer, and skip the annotation step for those groups. Cross-producer fusion is deferred to Sprint N+4's packet-routing path. Same-producer S2MM fusion (the routable case) keeps the annotation. Producer-tile lookup mirrors the MM2S step's inferredMap + extractCoord pattern (same file, lines 320-324). Predicate iterates nameToGroup so its domain matches exactly the annotation loop it guards. Generic by default — structural property of the group, not shape-specific. Lit fixture coverage: - fuse_channels_s2mm.mlir: was the buggy positive pin (cross-producer topology, CHECK-SAME on the now-suppressed annotation). Flipped to file-scope CHECK-NOT for the s2mm attrs. - fuse_channels_s2mm_same_producer.mlir: NEW companion. Both producers on tile(0,2), consumer on tile(0,4). Positive CHECK pin for the same-producer annotation path. Two RUN lines: FileCheck + --conduit-to-dma to verify Pass C accepts the annotated group cleanly (verify-downstream protocol per CLAUDE.md fix-discipline). - cross_fusion_chain_channels_then_operators.mlir + infer_iter_count_then_fuse_channels.mlir: also pinned the buggy cross-producer annotation as a side observable. Removed those CHECK-SAME lines (kept dma_repeat lines — each fixture's primary purpose), added file-scope CHECK-NOT for the s2mm attrs, and a citation comment cross-referencing the new same-producer companion. Conduit lit subset: 350/3/0 -> 351/3/0 (the new companion adds +1 PASS; the two cross-producer fixtures stay PASS). Closes Xilinx#99 for the routable case; cross-producer remains deferred for the upcoming packet-routing path. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

@Inter

… error After commit 431f853 restricted dma_channel_group_s2mm annotation to same-producer-tile groups, NPU verify on fuse_channels_npu still failed with `aie-routing` rejecting duplicate-dst circuit connect. Root cause: same-producer-tile is necessary but not sufficient for circuit-routing fold. Two distinct shim conduits each have their own shim MM2S channel — circuit routing is point-to-point and cannot multiplex two distinct sources to one consumer dst port. Pass C was emitting one aie.flow per (consumer tile, conduit) pair, producing duplicate-dst circuit connects that aie-routing then rejected with an opaque routing-pipeline error. Fix at the actual bug location (ConduitToDMACommon.cpp `emitFlow`): centralized check covering ALL emitFlow call sites uniformly per the "generic by default" rule. Tracks circuit-flow destination port ownership in a new `circuitDstPortOwner` map keyed by (dstTileOp*, dstBundle, dstChan), value = (srcTileOp*, srcBundle, srcChan) of the first emitter. Three branches: - dst not present → insert + emit FlowOp (the normal path) - dst present + same src → silent dedup return (legal: fuse-group partner conduits emit identical flows; matches what aie-routing would dedupe anyway) - dst present + different src → emit clean diagnostic naming both source ports + the shared dst port; passFailed=true; do not emit Op* keys make the map naturally device-scoped (each device's tile ops have distinct identity), so no per-device reset needed (mirrors tileNextS2MMChannel / fuseGroupS2MMChannel state lifetimes). Packet flows take the upstream branch in emitFlow and are NOT tracked — packet routing legally multiplexes distinct sources via packet IDs. Lit fixtures (3 files touched + 1 new): passc_dup_dst_feasibility_error.mlir (NEW): error pin for the exact Xilinx#99 shape — two shim-produced conduits, fuse-channels groups them at consumer-side S2MM, Pass C emit fires the diagnostic. Includes documentation of the silent-dedup companion path (covered implicitly by the broader test suite). fuse_channels_npu/{aie.mlir, test.cpp, conduit.lit}: source IR rewrite per approach (a) — drop ext_in_d, Mul becomes inter * 2.0 inline constant. Removes the multi-source-S2MM trigger; preserves the original test purpose (@Inter self-loop + @ext_out producer- side fuse-channels grouping). HW dispatch should now PASS where it previously failed. path_c_async_fuse_channels_token_preserve.mlir + path_c_async_passA_fuse_passC_roundtrip.mlir: latent fixtures encoding wrong-behavior CHECKs on the same shape as Xilinx#99. Same pattern as the 2 fixtures flipped in commit 431f853. Converted from positive-CHECK pins to expected-error pins via --verify-diagnostics; preserves the structural-shape coverage while pinning the new infeasibility outcome. Conduit lit subset: 351/3/0 → 352/3/0 (the new pin adds +1; the 2 flipped fixtures stay PASS via expected-error). bd-chains: 59/2/0 unchanged. install/build mtime parity verified. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

@Inter

The fixture's core scf.for upper bound was %cmax = 0xFFFFFE (16M) since its introducing commit (4c0611e). Pass A's dma_repeat inference (ObjectFifoToConduit.cpp:1200-1203) correctly reads this authored bound and stamps dma_repeat = 16777214 on the @Inter self- loop conduit. test.cpp dispatches NUM_INVOCATIONS = 4 host BO pairs; core blocks on iteration 5's @ext_in_a acquire waiting for the 5th buffer that never arrives → XRT timeout → ERT_CMD_STATE_ABORT (status 8). Pre-Xilinx#99 (commit 431f853) the fixture failed at compile time with aie-routing duplicate-dst rejection on the multi-source-S2MM case ext_in_a + ext_in_d → tile(0,2) DMA:0. The fixture never reached HW to expose the 16M-loop runtime bug. Xilinx#99 closure landed yesterday (commit 4012ed5 Pass C feasibility error + source-IR rewrite dropping ext_in_d). HW dispatch now reaches the core, exposing this latent runtime bug. Trivial fixture fix: bound the core loop to NUM_INVOCATIONS = 4 to match host dispatch count. Comment block documents the history so future readers don't re-introduce. Pass A diagnostic (warn on dma_repeat > 0xFFFFFE = BD field max) is deferred — needs Llama-trip-count audit per fuse-channels-runtime- forensics report (real workloads may stamp values in [0xFFFFFE, 2^30) without it being a bug). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

@Inter

…epth=1 self-loop Per fuse-channels-status8-deep H0/H4 forensics: same-tile depth=1 self-loop conduits (e.g. @Inter on tile(0,2) feeding itself with depth=1) are pure shared-memory access. The core reads/writes its own tile-local buffer directly via the buffer + locks allocated in ConduitToDMAAlloc.cpp's `sameTile` carve-out at line 128 — no DMA exists. Pre-fix Pass C emitted a SPURIOUS producer-side MM2S BD chain (Case C, ConduitToDMALink.cpp:1706+) AND a SPURIOUS consumer-side S2MM BD chain (Case A, line 2193+). The S2MM consumer-side allocation collided with @ext_in's S2MM channel 0 assignment; HW programs whichever DMAStartOp wins; the loser's BD chain is silently overwritten; shim-sent data lands in the wrong buffer; core blocks on a lock that never gets released → status 8 ABORT. Stateful (--aie-objectFifo-stateful-transform) on the same fixture emits 2 dma_starts (one per real shim source) and 0 BDs for @Inter (just buffer + locks). Conduit was emitting 4. Captured-stateful diff confirmed at /tmp/fc_h0/ (insts.bin byte-identical; CDO sizes diverge in init/elfs sections). Fix: insert a single early-skip predicate at the top of Phase 5.5's generic conduit loop in ConduitToDMALink.cpp, after the buffer/lock checks and before the link-source handling. Predicate is structural, not shape-specific: pCol == cCol && pRow == cRow && consumerTileCoords.size() == 1 && shimConsumerTileCoords.empty() && depth == 1 → continue depth==1 restriction preserves the depth>1 self-loop rotation path fixed by Xilinx#92 (rotation counters require real BD chains). Single insertion point covers BOTH Case C (producer MM2S) and Case A (consumer S2MM) downstream — they share the same loop iteration so one continue handles both. Lit pin: passc_self_loop_no_dma_bd_chain.mlir. 3 conduits matching fuse_channels_npu's shape (shim→tile @ext_in + same-tile depth=1 @Inter + tile→shim @ext_out). CHECK that @Inter buffer + 2 locks survive on tile(0,2); CHECK exactly 2 aie.flow ops; CHECK-COUNT-2 aie.dma_start in tile(0,2)'s aie.mem block + trailing CHECK-NOT to pin exhaustiveness; CHECK-NOT for any self-loop aie.flow on tile(0,2). Conduit lit: 352/3/0 → 353/3/0 (+1 for new pin). Zero regressions, zero latent fixture flips. Build + install/build mtime parity OK. NPU re-verify of test/npu-xrt/fuse_channels_npu/ queued separately by lead — should now PASS (closes Xilinx#99 fully on HW after the prior 4012ed5 Pass C feasibility error landing). Cross-bug NOT addressed (Task Xilinx#36 / fallback-desync at lines 2247-2252): the H4 fix removes the symptom by skipping the spurious Case A entirely, but the underlying S2MM channel allocator desync between routePhase (tileNextS2MMChannel) and DMALink fallback (preUsedS2MMChannels) remains latent. Separate audit task. Layer A cleanup NOT addressed (Task Xilinx#37): --conduit-depth-promote still over-promotes same-tile depth=1 self-loops to depth=2 with rotation counter + cf.switch. Layer B fix (this commit) is sufficient for HW correctness; Layer A is separate cleanup for IR-shape parity with stateful. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

@chan

CLAUDE.md "Active Open Bugs" Xilinx#98 row: HomogeneousRepeatPattern.cpp:142,229 stamped dma_repeat = N (1-indexed = "fire N times") while Pass A's IRON path stamps the same field 0-indexed (= "additional fires beyond 1" per aiex.py:289-291). Two conventions for the same field; v6 emits both verbatim → over-fire by 1 if HomogeneousRepeat-collapsed channel reaches the runtime-sequence shim emit. Initial brief was a 2-line tactical fix (canon stamps N-1). issue-98-fix's audit found the convention was inconsistent across READERS too: - Pass C shim emit (ConduitToDMALower.cpp:1273-1359): 0-indexed (firmware fires value+1 times) - ChannelPutsExpand, getEffective*Count, ConduitCheckLoopBalance: all 1-indexed The 2-liner would have created new latent inconsistencies (broken roundtrip fixture, false-positive loop-balance warnings on canon-collapsed channels). Option B convention sweep (this commit): make 0-indexed convention universal across producer + reader sites: - HomogeneousRepeatPattern.cpp: stamp N-1 (both put + get collapse) - CanonicalizeChannelPutsUtils.h+cpp: getDmaRepeatOr1 → getDmaRepeatOr0 (rename + default 0); getEffective*Count: raw * (1 + getDmaRepeatOr0()) - ChannelPutsExpand.cpp: read additional, N = additional + 1 for homogeneous (arith path unchanged — uses lead.getSize() directly) - ConduitCheckLoopBalance.cpp: store totalFires = (*ic) + 1; comparison + warning text updated; new warning emits both "fires N total DMA sends" AND "dma_repeat = N-1, 0-indexed" for clarity - ArithProgressionPattern.cpp:239,418: predicate rename only (no value stamp) Aligns in-IR field literally with what firmware reads (no mental conversions). v6's stability rests on 0-indexed-at-firmware-boundary; this extends the same convention up through the IR. Lit fixtures (3 enumerated, all handled per Option B's audit): - NEW canon_homogeneous_repeat_zero_indexed.mlir (93 LOC): named-by-fix pin; 4 IRON-pattern identical puts on @chan, depth=2, shim→compute; pins dma_repeat = 3 + exactly one surviving put. - FLIP homogeneous_repeat_collapse.mlir: dma_repeat = 4 → 3 + comment expansion citing Xilinx#98. - FLIP check_loop_balance.mlir per F1 (preserves boundary-case coverage): short_dma 4→3, long_dma 64→63 (= 4 fires, 64 fires under new convention), comments shifted to "(N+1 total fires)" notation, CHECK lines updated to new warning format. CASE 2 boundary case (trip=64 == fires=64 → no warn) intent preserved. - UNCHANGED homogeneous_repeat_roundtrip.mlir: canon writes N-1, expand reads additional+1, both paths agree → roundtrip is now a free correctness check on the new convention. Conduit lit: 353/3/0 → 354/3/0 (+1 for new pin). Build + install/build mtime parity verified (1777649406). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

@Inter

…epth=1 self-loop Per fuse-channels-status8-deep H0/H4 forensics + npu-op7-alone H4 IR discrimination: --conduit-depth-promote was over-promoting same-tile depth=1 self-loop conduits to depth=2, even though they're pure shared-memory access (the core reads+writes its own tile-local buffer directly via the buffer + locks allocated in ConduitToDMAAlloc.cpp:128's `sameTile` carve-out). The over-promotion broke the H4 fix at Pass C DMALink (Layer B, commit e3a8c0b): Pass C's `depth == 1` predicate didn't fire because depth-promote had already mutated @Inter from depth=1 to depth=2 by the time Pass C saw the conduit. fuse_channels_npu HW re-verify (3rd attempt) captured this exactly: routing-side H4 worked (no self-loop flow, 2 flows on tile(0,2) ✓) but BD-emit-side H4 didn't fire — IR still showed 4 dma_starts on tile(0,2), with the spurious @Inter S2MM channel 0 colliding with @ext_in's S2MM channel 0. Layer A (this commit): early-skip in ConduitDepthPromotion.cpp's Step 5 candidate loop, after Criterion 0 cascade check. Predicate mirrors Layer B exactly: producer_tile == single consumer_tile && no shim consumers → continue (depth==1 implicit via candidate-set construction at 191) Silent skip (no remark) matches Cascade Criterion 0 style — Llama emits many of these self-loops; remarks would be log noise. Layer A + Layer B are defense in depth: - With Layer A only: depth stays at 1, Pass C's H4 predicate fires correctly (no spurious BD chains) - With Layer B only: caught at Pass C even if upstream didn't filter (current behavior, but blocked by depth-promote mutation when depth-promote runs first) - With both: any future code path that reaches Pass C with depth>=1 self-loop is caught (Layer B), AND the IR stays clean upstream (Layer A). Together they match stateful's emit shape: 1 buffer per same-tile depth=1 self-loop, lock init=1, no rotation counter, no dma_start. Lit pin: passc_self_loop_no_depth_promote.mlir. Same 3-conduit fixture skeleton as passc_self_loop_no_dma_bd_chain.mlir (Layer B's pin) but RUN line is --objectfifo-to-conduit --conduit-depth-promote. Three CHECK-DAG lines (order-independent, FileCheck-friendly) + CHECK-NOT for the "promoted" remark. Cross-references the sibling Layer B fixture in docstring. Conduit lit: 354/3/0 → 355/3/0 (+1 for new pin). Zero regressions, zero latent fixture flips (8 depth-promote fixtures audited in design step). Build + install/build mtime parity verified. NPU re-verify of test/npu-xrt/fuse_channels_npu/ queued separately by lead — should now PASS at last (4th attempt; both layers in place; closes Xilinx#99 fully on HW). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

Per issue-97-investigator's H(c) verdict (Task Xilinx#38, /tmp/issue_97_forensics.md), NEW root cause superseding the H(a)/H(b) framing in CLAUDE.md: `aie-combine-device same-tile=true` splices devB's body into devA without deduping `aie.tile` ops → multiple `aie.tile(0,2)` SSAs per coord (plus shim duplicates). After fuse-core-bodies + fuse-operators, the surviving merged core is bound to the FIRST-defined tile SSA, but downstream Pass C state has last-writer-wins semantics in `tileCache` (ConduitToDMACollect.cpp:328) which lets the orphan SSA win. Buffer- creation walk's SSA-equality check (ConduitToDMAAlloc.cpp:325 — `core.getTile() == tileVal`) then misses the surviving core, no buffer gets created, rotation slots stay null, and ConduitToDMALower.cpp:111 fires "depth>1 buffer rotation requires a rotation counter" on the post-combine fused IR. Blocks fuse_core_bodies_npu HW. Locked Fix B (~25 LOC including comment): file-static `dedupeTileOps` helper invoked from `ConduitToDMAPass::runOnOperation()` immediately after state init and before `collectPhase`. Single-pass walk per device: first-defined wins (matches aie-combine-device's bodyA-first-then-bodyB splice order), `replaceAllUsesWith(canonical)`, erase orphans. AIE::TileOp serves both compute and shim — same predicate handles both, automatically fixing the cross-bug shim-SSA-equality flag flagged in the forensics report's "Cross-bug observation" section. No edits needed in ConduitToDMACollect.cpp / ConduitToDMAAlloc.cpp / ConduitToDMACommon.h — those existing predicates become correct automatically once orphan SSAs are gone. Lit pin: `passc_tile_dedupe.mlir`. Hand-written post-combine-device duplicate-tile shape (raw `conduit.create` + `aie.shim_dma_allocation` + `aie.core` form bypassing Pass A — Pass A's `--objectfifo-to-conduit` rejects asymmetric tile bindings before Pass C ever sees them, so the fixture had to be at the post-Pass-A IR level). Two `aie.tile(0,2)` SSAs + two `aie.tile(0,0)` SSAs + surviving core on first compute tile + depth=2 conduit. CHECK-COUNT-1 + trailing CHECK-NOT exhaustiveness pattern (per commit e3a8c0b) pins exact post-dedupe count of one tile SSA per coord. Sanity-checked: with the dedupe call temporarily disabled, the pin FAILs (CHECK-COUNT mismatch + diagnostic fires); re-enabled, the pin PASSes. Conduit lit: 355/3/0 → 356/3/0 (+1 for new pin). Zero regressions. Build + install/build mtime parity verified. Also incidentally absorbed: clang-format reflow on ConduitCheckLoopBalance.cpp (1 line) and ConduitToDMACommon.cpp (16 lines) — both whitespace-only, zero semantic change, auto-applied by the build's pre-commit hook during iteration. Cross-bug Task Xilinx#36 (S2MM channel-allocation fallback hardening) NOT addressed by this commit — separate audit task remains. NPU re-verify of test/npu-xrt/fuse_core_bodies_npu/ queued separately by lead (closes Xilinx#97 fully on HW). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…rcase step First composition fixture in the user-locked step-by-step staircase: test --conduit-fuse-core-bodies + --conduit-fuse-channels in the same pipeline, validating that fuse-core-bodies' OUTPUT satisfies fuse-channels' INPUT eligibility (the question single-pass smokes can't answer). Source IR: two-device source (devAdd + devMul both targeting tile(0,2)), intermediate pair @inter_add/@consume_add tagged fusion_group="addmul_corebody" so fuse-core-bodies' Step 0 unifies them. NUM_INVOCATIONS=4 explicit constant bound on outer scf.for (per Xilinx#25 lesson — avoids the 0xFFFFFE → status 8 trap that bit fuse_channels_npu earlier today). bf16 inline compute (no func.call). No conduit.wait_all anywhere — skirts the XFAIL'd MED bug at path_c_async_fuse_corebody_blocks_at_wait_all.mlir. RUN line: --use-conduit --conduit-fuse-core-bodies-flag --conduit-fuse-channels-flag --no-xchesscc --no-xbridge. Empirical outcome: outcome (a) from the design proposal materializes — fuse-core-bodies fully merges the cores and erases the intermediate; fuse-channels then sees 1 producer-side conduit on tile(0,2) and is structurally a no-op. The composition works (no crash, no diagnostic, byte-correct output). Documents this in the conduit.lit comment block per the lead's "outcome (a) gets weaker signal — document explicitly" note. Reference compute: out[j] = (in[j]+1.0)*2.0, in[j]=(j%16), NUM_INVOCATIONS=4. Per-invocation HW verification across 4 dispatches. HW PASS verified: lit -v ./test/npu-xrt/fuse_channels_after_core_bodies_npu/ → PASS (1 of 1, 1.91s, 100.00%). Next staircase steps queued (Tasks Xilinx#48 + Xilinx#47): - Xilinx#48 fuse_spatial_and_channels_npu (spatial + channels composition) - Xilinx#47 fuse_spatial_core_bodies_channels_npu (full hybrid 2-op) - Then Task #12 fuse_hybrid_swiglu_npu (capstone-precursor: K=2 convergent + 4-op SwiGLU shape) Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…note Per the lead's design-ack observation: outcome (a) (fuse-core-bodies fully merges + fuse-channels no-op) was the EMPIRICAL landing state on this fixture's source IR. That tests "fuse-channels doesn't choke on post-fuse-core-bodies IR" but does NOT test "fuse-channels finds eligible work in fuse-core-bodies' OUTPUT" — the stronger composition signal. Adds an OBSERVED OUTCOME block to conduit.lit documenting: 1. Empirical landing state (1 device, 1 core, 2 flows). 2. Explicit caveat that this is the WEAKER of the two designed outcomes. 3. Pointer to future fixtures for the stronger signal: fuse_spatial_and_channels_npu/ (Task Xilinx#48) + fuse_spatial_core_bodies_channels_npu/ (Task Xilinx#47), plus a note that the stronger "fuse-channels-with-work-post-merge" signal needs a topology fuse-core-bodies cannot fully erase (multi-consumer or rate-mismatched intermediate). Comment-only edit. NO NPU rerun. Test still PASS at the same shape. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…step Second composition fixture in the user-locked staircase. Tests --conduit-fuse-spatial + --conduit-fuse-channels in the same pipeline at minimum-viable shape (Add + Mul + auxiliary constant emit). Source IR: - 2 separate aie.device blocks (devAdd + devMul), both pre-fuse cores at tile(0,2) (matches fuse_operators_basic_npu's proven shape). --conduit-fuse-spatial column-offsets devMul to put consumer core at tile(1,2). - Intermediate channel pair @inter_add ↔ @consume_add tagged fusion_group="addmul_spatial_chan" for spatial pairing. - devMul's core has TWO sequential sections inside outer scf.for (cmax=4 to match NUM_INVOCATIONS, per Xilinx#25 lesson): Section A: @consume_add Consume → Mul → @ext_out_mul Produce Section B: @ext_out_aux Produce ← inline arith.constant 7.0:bf16 Two producer-side conduits on tile(1,2) post-spatial with disjoint windows — exactly the eligibility surface fuse-channels groups. - All memref<64xbf16>, depth=2, no wait_all (skirts XFAIL'd MED bug), no cross-element-type (Track 3 Phase 4 deferred). Reference compute: out_mul[j] = (in[j]+1.0)*2.0, out_aux[j] = 7.0, in[j] = j%16. NUM_INVOCATIONS=4. Per-invocation HW verification across 4 dispatches. OBSERVED OUTCOME — STRONGER signal than Task Xilinx#46: fuse-channels finds eligible work post-spatial. Confirmed via aie-opt: both producer-side conduits get stamped dma_channel_group="group0", fuse_mode="static". This complements fuse_channels_after_core_bodies_npu's observed weaker (no-op) outcome and exercises the actual composition's annotation work end-to-end. Pass C lowers cleanly through the annotated IR (5 distinct flows, no Xilinx#99-class duplicate-dst collisions). Pass C kept the two HW DMA channels separate (DMA:0 + DMA:1) despite the group annotation — no resource pressure forces folding on a tile with available channels; runtime correctness still verified. NPU dispatch queued separately by lead (per protocol fix from Task Xilinx#46's process disclosure — fixture-builder agents do compile-only validation; serializer queues HW dispatch). Next staircase step: Task Xilinx#47 fuse_spatial_core_bodies_channels_npu (full hybrid 2-op, all 3 fusion passes simultaneously). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…pass staircase Third composition fixture in the user-locked staircase. Tests the production composition order: --conduit-fuse-spatial + --conduit-fuse-core-bodies-flag + --conduit-fuse-channels-flag in the same pipeline at minimum-viable shape (Add + Mul + auxiliary constant). Source IR cloned from fuse_spatial_and_channels_npu (Task Xilinx#48) topology with fusion_group key renamed addmul_spatial_chan → addmul_hybrid for clarity. Same 2-device (devAdd + devMul) source on tile(0,2), intermediate pair @inter_add ↔ @consume_add tagged for fusion_group matching, devMul has Section A (mul → @ext_out_mul) + Section B (constant 7.0 → @ext_out_aux), all depth=2 memref<64xbf16>, no wait_all consumers, no cross-element-type. OBSERVED OUTCOME (recorded honestly in lit header per "verify your understanding" rule): - aie-combine-device same-tile=true: REAL WORK (devices merged) - --conduit-fuse-core-bodies: REAL WORK (cores merged with memref.alloca L1 intermediate replacing inter_add/consume_add) - --conduit-fuse-operators: NO-OP (devices.size()<2 guard hit because combine-device already merged — expected interaction per aiecc.cpp:1513-1518) - --conduit-fuse-channels: NO-OP (depth>1 producer-side conduits not Tier 3 eligible in merged-onto-same-tile shape; emits "Tier 3 channel with depth>1 not supported in fuse groups" for both @ext_out_mul and @ext_out_aux) Honest characterization: HW signal class is SAME as compose-A (Xilinx#46) "full hybrid pipeline composes without crash, only ONE pass does meaningful structural work" — NOT the stronger "fuse-channels-with- work" signal initially anticipated. Recorded explicitly in lit header so future readers don't misinterpret. Stronger fuse-channels-active hybrid signal still requires either depth=1 producer-side conduits (Tier 3 eligible) or the upcoming swiglu functional NPU smoke (Task #12 / fuse_hybrid_swiglu_npu). Final post-Pass-C IR: 1 device, 1 core on tile(0,2), 3 surviving aie.objectfifo channels lowered to aie.dma_bd / aie.lock / aie.buffer, 3 shim_dma_allocations. NPU dispatch queued separately by lead (per protocol fix from Xilinx#46 process disclosure). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…n erase Step 0 mergeAndUnifyDevices raw-erased put/get_memref_async ops via op->erase() during convergent-fusion device merge, leaving any conduit.wait_all{token=false} consumer with dangling SSA → LLVM ERROR: operation destroyed but still has uses. Split the op-collection loop: async ops route through the existing safeEraseAsyncOp helper (token-aware deletion + downstream wait_all rebuild); blocking ops keep raw erase. Added forward-declarations for safeEraseAsyncOp + removeTokenFromWaitOp at the anonymous-namespace top. Surfaces in fuse_hybrid_swiglu_npu HW smoke (4-device convergent-fusion path). Distinct from commit 53cd53e which fixed composeCoresBodies for the simpler single-device case; mergeAndUnifyDevices is a parallel cleanup path. Dialect-level lit pin added: 2 producer devices fused into 1 consumer with get_memref_async + wait_all{token=false} — minimal IR shape that pre-fix crashed. Lit subset 357 PASS / 3 XFAIL / 0 FAIL (was 356/3/0 baseline). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

Bundled fixes for Sprint N+3 fusion staircase + Llama decode-hang work: * DeviceMergeUtils.cpp: walk cloned ops post-IRMapping clone, reproject arg_index IntegerAttrs against merged-seq absolute space (~25 LOC). Fixes scaling bug where 4-device fusion left aliased arg_indexes. Pinned by combine_device_runtime_seq_arg_index_reprojection.mlir. * ConduitPruneRuntimeSeqArgs.cpp: NEW pass walking each post-fusion runtime_sequence; collects "live" arg indices from BOTH conduit put/get_memref{,_async} arg_index attrs AND any block arg with non- empty SSA uses; renumbers + erases dead block args + rebuilds FunctionType. Pipeline insertion after --conduit-fuse-channels. Pinned by conduit_fused_runtime_sequence_dead_arg_prune.mlir. Subsequent CHECK update to aiecc_bare_use_conduit_includes_dma_task reflects the new pass appearing in pipeline output. * ConduitFuseOperators.cpp: add deviceTilesSubsetOf predicate at Step 4; when devB's tiles are a subset of devA's, set colOffset=0 (preserve explicit co-location). Add 2nd --conduit-fuse-core-bodies pass to aiecc.cpp pipeline (gated on fuse_spatial && fuse_core_bodies) to merge the same-tile cores left behind. Pinned by fuse_operators_explicit_colocation_no_offset.mlir. * ConduitAppendCoreSpin.cpp: NEW pass appending an empty scf.for 0..i64-max-1 spin loop before aie.end in every aie.core. Cores never reach aie.end, eliminating the firmware-runtime hang at orchestrator-position-LAST (op18 LM-head in Llama decode). Pipeline insertion at end of conduit pipeline. Pinned by conduit_append_core_spin.mlir. * ConduitDepthPromotion.cpp: at Step 5 (depth=1 -> depth>1 promote), strip stale dma_channel_group + fuse_mode attrs (no longer Tier-3-eligible after promote; fall back to ungrouped Pass C path). Pinned by depth_promote_strips_stale_grouping.mlir. Verify: ninja aie-opt aiecc clean; lit subset 362 PASS / 3 XFAIL / 0 FAIL (was 357/3/0 baseline + 5 new pins). Llama single-token decode HW PASSes ("SCENE I. King Leont"). CI 40-token + swiglu HW have known follow-up bugs queued for next session. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…on behavior Companion to ConduitFuseOperators.cpp deviceTilesSubsetOf change (commit a0c69e3). Pre-existing fixtures deliberately co-located source devices and CHECK'd for the offset-always-fires behavior. After the new predicate, when source already places devB's tiles within devA's set, no offset fires (preserves explicit co-location). Each fixture updated via strategy (a): flip devB's shim col (or compute tile rows in convergent K=3 case) so the subset check fails and offset path still fires per pre-fix CHECKs. Tests still verify the same merged-device emission, just with distinct-source-tile inputs instead of identical-source-tile inputs. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

Per Task Xilinx#63 design call (mixed convergent + 1:1 fusion_group mode-mixing scope-out): the mulsink_cb 1:1 fusion_group annotations on mul_inter / consume_mul are redundant under the hybrid pipeline. aie-combine-device{same-tile=true} (gated by --conduit-fuse-core-bodies-flag) already merges devMul + devSink because both live on tile(0,2). Annotating with shared fusion_group triggered --conduit-fuse-operators's deliberate mixed-mode scope-out (rejection pinned by fuse_operators_convergent_mixed_fusion_groups_BUG). Removed the redundant annotations + updated header comments + lit narrative to attribute the 1:1 mul-sink merge to combine-device's same-tile=true path. Companion HW-correctness work for this fixture remains open: even with annotations dropped, swiglu HW still fails (sinks output zero). Investigations Tasks Xilinx#102 + Xilinx#104 + Xilinx#106 (Sprint N+3 follow-ups). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

Pure whitespace/wrapping fixups requested by the pre-push clang-format hook for files modified in commits a0c69e3, c5b26b8, a71eb60 and earlier Sprint N+3 staircase commits. No logic changes. Verify with `git diff -w HEAD~1 HEAD` (empty). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

Bug: --conduit-fuse-operators K=2 convergent merge produced a merged consumer-device aie.runtime_sequence whose put/get_memref arg ORDER was determined by the iterative pairwise merge order inside the pass (which device gets absorbed first), NOT by source-IR device declaration order. Symmetric inputs (gate == up in fuse_hybrid_swiglu_npu) made the swap invisible — fuse_hybrid_swiglu_npu/aie.mlir:87-91 acknowledged the "interchangeable" hiding mechanism explicitly. With non-symmetric inputs the gate-side and up-side host buffers bind to each other's arg slots at runtime, silently corrupting the downstream consumer. Source-IR declaration order is the only stable contract host code (test.cpp BO indices, no-host-orchestrator harnesses) can rely on. Fix design: REORDER-AFTER-MERGE (option A) at end of runOnOperation in ConduitFuseOperators.cpp. Phase 1 (pre-tag, ~14 LOC): walk each device in source-IR order and stamp every conduit.{put,get}_memref{,_async} with a discardable attr `_source_device_index = N : i64`. The tag survives Step 8's clone-and-move because both clone and op->remove()/insert() preserve discardable attrs. Phase 2 (post-merge reorder, ~190 LOC): for each surviving runtime_sequence, - build per-arg src-device key from put/get_memref ops - alias-conflict guard for Task Xilinx#81 K=2 case (warn + strip + skip) - orphan args sort last via a sentinel - llvm::stable_sort by source-IR-device-index - identity-permutation no-op fast path (1:1 fusion stays untouched) - defensive SSA-dead check on block args - erase + re-add args in new order - renumber arg_index attrs via oldToNew[] mapping - stable_sort + moveBefore the put/get ops to the head of the body in new-arg-index order (textual order mirrors arg_index) - permute host-side aiex.run operands targeting this seq via clone-and-erase - strip _source_device_index tags on every exit path The 1:1 fusion identity-permutation fast path means existing fuse_operators_*.mlir fixtures (symmetric or no convergent shape) are unaffected. Pinned by test/Dialect/Conduit/convergent_merge_order_stability.mlir (K=2 convergent with NON-symmetric memref shapes — gate=memref<64xbf16>, up=memref<128xbf16>, out=memref<32xbf16>; CHECK encodes source-IR declaration order: gate→arg0, up→arg1, out→arg2). Pre-fix this fixture FAILed (merged seq emitted up first); post-fix PASSes. Verify: ninja aie-opt aiecc; lit subset 363 PASS / 3 XFAIL / 0 FAIL (was 362/3/1, the lone FAIL being this pin). Closes Task Xilinx#82 at the compiler level. fuse_hybrid_swiglu_npu HW byte-validation with non-symmetric inputs is the follow-up. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…erence Hand-fused swiglu baseline fixture authored from first principles, NOT captured from compiler output. Pairs with fuse_hybrid_swiglu_npu/ as the golden reference for byte-equivalence validation per the "validate-against-known-good-base" Sprint N+4 strategy. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

… asym inputs Closes the swiglu fixture pair's three validation gaps (per Sprint N+4 priority #10, swiglu-as-most-suspect-pair starting point): gap 1 (kernel-IR-matches-spec): kernel-IR re-verified against spec — devMul does arith.mulf, devSink does arith.addf with 1.0/2.0 (and the hand-fused baseline's single inline body computes the same). No discrepancy between IR and spec. gap 2 (independently-authored reference): reference math lifted out of test.cpp into a new gen_reference.py (numpy + stdlib, no torch / ml_dtypes deps). Computes expected_a/expected_b from spec, dumps 4 bf16 .bin files per regime. test.cpp accepts --gate-in / --up-in / --expected-a / --expected-b cxxopts and byte-compares against the on-disk .bin files. Bf16 representation matches test.cpp's existing float_to_bf16 truncate-toward-zero (numpy.uint16 raw bits, '<u2' explicit endianness on write). Removes the "same author wrote both the IR and the C++ reference" blind spot. gap 3 (gate/up arg-mis-pairing blind spot): gen_reference.py emits BOTH symmetric (legacy: gate=up=(j%8)) AND asymmetric (gate=(j%8), up=(j%8)+1) regimes. conduit.lit RUN block now invokes gen_reference twice (once per regime) with test.exe in between. Symmetric inputs previously hid any wire-swap bug downstream of mul; asymmetric surfaces it. Backward-compat: bare test.exe invocation (no .bin args) falls back to symmetric inline ramp + computed reference — preserves debuggability. Files in scope: fuse_hybrid_swiglu_npu/{test.cpp, conduit.lit, gen_reference.py} fuse_hybrid_swiglu_handfused_baseline/{test.cpp, conduit.lit, gen_reference.py} test.cpp byte-identical between the two dirs (intentional shared harness); gen_reference.py byte-identical (intentional copy, not symlink — fragile across cp -r / release-tarball workflows). HW verify queued (`fuse_hybrid_swiglu_npu` lit-header notes "currently HW-FAILS with all-zero outputs" predates Xilinx#92 landing — post-Xilinx#92 status open). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…rrently PASSes) New lit fixture: test/Dialect/Conduit/conduit_to_dma_b_channel_consolidation_BUG.mlir Pins WRONG-CURRENT behavior of --conduit-canonicalize-channel-puts (HomogeneousRepeatPattern.cpp::tryCollapsePuts) over-collapsing 8 structurally-identical IRON puts on a shim-MM2S → memtile linked channel into a single configure_task with `repeat_count = 7` (= 8 firmware fires). Empirical proof of root cause this session: - Captured Llama prefill `attn_scores` GEMM (M=2048 K=2048 N=512 num_invocations=16) post-Pass-C IR has 8 such consolidated B-channel configures, each `{repeat_count = 7 : i32}`. - HW dispatches hang (XRT timeout) at the first invocation. - Hand-patched the conduit IR to use stateful's 64×repeat_count=1 paced shape (per-col `(C, A, B, A, B)` × 4 rounds); aiecc + dispatch completed in <1ms with numerically-correct output (max_abs_diff=0.003261 vs numpy bf16 matmul). Minimum geometry to trigger the canon collapse: 3 tiles (shim 0,0 → memtile 0,1 → compute 0,2), 2 objectfifos bridged by `aie.objectfifo.link [@B_L3L2] -> [@B_L2L1]([] [0])`, compute core infinite scf.for, runtime_sequence with 8 structurally-identical `aiex.dma_configure_task_for @B_L3L2` ops (offset 0, BD len 4096, no producer dims, no `repeat_count`) + 8 matching `aiex.dma_free_task` ops. Per fix-discipline (CLAUDE.md USER-LOCKED 2026-04-24 "Isolate bugs with a minimal lit test BEFORE fixing"): Phase 1 of multi-phase Llama hang fix work. After source fix lands the CHECK lines flip from `repeat_count = 7` (1 configure) to 8 separate configures (no repeat_count attr) and the file is renamed by dropping the `_BUG` suffix. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…Llama prefill GEMM Llama prefill `attn_scores` GEMM (M=2048 K=2048 N=512 num_invocations=16) hung at HW dispatch because canon over-collapsed the 8 IRON puts on the shim-MM2S → memtile linked B-channel into 1 configure with `dma_repeat=7`, which Pass C surfaced verbatim onto `configure_task.repeat_count`. The linked-MM2S→memtile path in Pass C wants N separate paced configures (matching stateful's per-col `(C,A,B,A,B)` × 4 rounds emit), not the collapsed `dma_repeat`-encoded form. Empirically grounded by 2026-05-03 hand-patch experiment: replacing the 8×repeat=7 with 64×repeat=1 in the post-Pass-C IR + dispatching → numerically correct output (max_abs_diff=0.003261 vs numpy bf16 matmul). Per CLAUDE.md USER-LOCKED 2026-04-30 "Smaller verification loop for Pass C emit changes", verified before any source change. Source change (4 files, ~110 lines): * `CanonicalizeChannelPutsUtils.{h,cpp}` — new `isLinkedChannel(scope, chanName)` helper. Walks ScatterOp/GatherOp/TransposeOp in `scope`, checks srcs/dsts ArrayAttr + src/dst FlatSymbolRefAttr for chanName. Mirrors `ConduitDepthPromotion::collectLinkedConduitNames` precedent exactly (its Criterion 2 already excludes linked conduits from depth-promotion for the same structural reason). * `HomogeneousRepeatPattern.cpp::tryCollapsePuts`/`tryCollapseGets` and `ArithProgressionPattern.cpp::tryCollapseArithPuts`/`tryCollapseArithGets` — refuse to collapse + emit remark when channel is linked. Inserted after the cheap structural matches (sync-chain shape) and before the more expensive cap/dim-arith checks. Generic by default (USER-LOCKED 2026-04-24): the structural mismatch between `dma_repeat`-encoded canon output and Pass C's link-path emit applies to any linked-channel collapse, not just the GEMM B-channel shape. All 4 collapse modes (homogeneous puts/gets + arith puts/gets) get the refusal so canon's contract stays uniform: canon never produces a `dma_repeat`-bearing form on a linked channel. Test changes (3 files): * `test/Dialect/Conduit/conduit_to_dma_b_channel_consolidation.mlir` (renamed from `..._BUG.mlir`) — Phase 1 lit pin's CHECKs flipped from pinning wrong-current 1×repeat_count=7 emit to pinning correct post-fix 8 separate configures emit. Currently PASSes against post-fix HEAD. * `test/npu-xrt/conduit_canon_no_collapse_on_link/{aie.mlir,test.cpp, conduit.lit}` — new lit-NPU smoke per CLAUDE.md USER-LOCKED per-pattern-NPU-smoke-as-hard-gate (2026-04-30). 4-IRON-puts on a linked channel (smallest geometry that triggers the canon-collapse decision); HW byte-compare against spec-derived inline reference. REQUIRES: ryzen_ai, peano (bf16 in-core compute). Verification: * Lit subset: 364 PASS / 3 XFAIL / 0 FAIL (was 363/3/0; +1 from the flipped pin landing as a real PASS post-fix). Zero new regressions. * Llama conduit-mode: prefill `attn_scores` GEMM hang is GONE. Full prefill completes; ~33 lines of correct King-Lear continuation decode tokens stream before a separate downstream issue surfaces (multi-invocation iter-count bug; latent pre-fix and now exposed). Headline blocker resolved. * New NPU smoke: canon refusal fires as designed; Pass C emits N separate paced configures; HW dispatch completes; invocation 1 of 4 produces correct bytes. Invocations 2-4 produce zeros — same multi-invocation iter-count bug surfacing in the smoke. Smoke is now the minimum reproducer for the next bug to investigate. Out of scope for this commit: multi-invocation iter-count bug (Llama decode timeout mid-stream + smoke invocations 2-4 zeros). Tracked separately; matches PLAN.md Carry-over B (RTP-aware iteration-count inference) gap. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…el as "loop forever" `--objectfifo-to-conduit`'s `kTripCountUnboundedSentinel = 1 << 30` (chosen historically for legacy `i64::MAX`-bounded outer loops) does NOT catch IRON's actual "loop forever" idiom: the AIE 24-bit BD-loop saturation value `0xFFFFFE = 16,777,214 = (1 << 24) - 2`. Mismatch caused Pass A's `getStaticTripCount` -> `inferDmaRepeatForChannel` path to read the literal 16,777,214 trip count and stamp it onto non-shim conduit channels as `dma_repeat = 16777214` — a bogus value that silently propagates through canon + depth-promote. Discovered 2026-05-03 by `itercount-investigator` while tracing the post-canon-fix swiglu/smoke "all-zero-on-multi-invocation" symptom. The symptom turned out to be unrelated (sweep falsified the iter-count hypothesis) but the Pass A defect is real and worth defensive hygiene. Llama unaffected (IRON ops use `while_true=False` -> bounded `%c16`), but any future fixture or IR with literal-0xFFFFFE outer bound on a non-shim channel WILL hit it. Source change (ObjectFifoToConduit.cpp, +18 LOC): * Add `kAie24bitBdLoopSentinel = (int64_t{1} << 24) - 2;` adjacent to the existing `kTripCountUnboundedSentinel`. Comment block documents the empirical 2026-05-03 finding. * Extend the silent-skip predicate from `*trip >= kTripCountUnboundedSentinel` to `*trip == kAie24bitBdLoopSentinel || *trip >= kTripCountUnboundedSentinel`. Explicit equivalence check makes the AIE-specific case obvious vs the legacy threshold. Both sentinel forms are now silently skipped (no remark — matches the existing skip-site behavior; only the `out.reason`-bearing skip paths emit remarks). * Update the `// Skip cases:` doc enumeration at file top to mention both sentinel forms. Test: * `test/Dialect/Conduit/infer_iter_count_skip_aie_24bit_sentinel.mlir` — minimal compute-to-compute (no shim BD) fixture with both producer and consumer cores' outer loops bounded by `arith.constant 16777214`. CHECK-asserts `conduit.create @chan` has NO `dma_repeat` attribute (the shim-skip at the existing `:761` does NOT pre-empt for this geometry, so Pass A reaches the trip-count check and silently skips via the new sentinel match). Lit subset: 365 PASS / 3 XFAIL / 0 FAIL (was 364 + 1 new pin). Audit (per investigator's flagged adjacent gap): 2 callers of `getStaticTripCount` in `lib/Dialect/Conduit/`. Caller #1 is `tripCountOfLoop` -> `inferDmaRepeatForChannel` (this commit). Caller #2 is `ConduitCheckLoopBalance.cpp:146` which compares trip count to total-fires for a "token deficit" warning — would silently emit a misleading warning on `0xFFFFFE` bounds. NOT fixed in this commit (scope-tight); tracked as separate follow-up. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

…on sentinel bounds Adjacent-gap follow-up to commit `aa20c968b1` (Pass A sentinel-detection fix). The companion audit identified that `ConduitCheckLoopBalance.cpp:146` is the second `getStaticTripCount` caller in `lib/Dialect/Conduit/`. Its `tripCount > totalFires` predicate would silently emit a misleading "token deficit" warning on `0xFFFFFE`-bounded outer loops since the sentinel value is virtually always > totalFires for any real channel. Fix: short-circuit the warning emit when `tripCount` matches the AIE 24-bit BD-loop saturation sentinel (`0xFFFFFE`) or the legacy `kTripCountUnboundedSentinel` (`1 << 30`). Mirrors the predicate in `ObjectFifoToConduit.cpp:715-716`. Implementation choice: duplicate the two `static constexpr int64_t` constants in `ConduitCheckLoopBalance.cpp`'s anonymous namespace (with a cross-reference comment to `ObjectFifoToConduit.cpp:204/214` and a "keep in sync" warning) rather than hoist to a shared header. Rationale: ConduitCheckLoopBalance is the SOLE second consumer today; introducing a new shared header for two int64_t constants is over-engineering. Comment flags shared header as the better long-term home if a third consumer appears. Severity: LOW — warning-only, no compile/runtime impact. Worth fixing for forward visibility (the misleading warning was previously masked because no in-tree fixture has a `0xFFFFFE`-bounded outer loop on a balance-checked channel; the pre-existing in-tree `infer_iter_count_*` fixtures all use small finite bounds). Lit: 365 PASS / 3 XFAIL / 0 FAIL (unchanged from `aa20c968b1`; this fix suppresses a warning emit that no fixture exercised). Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

@out

Symmetric to the link-refusal landed in 375b0e5. Adds chainHasAwait helper to CanonicalizeChannelPutsUtils + 4 refusal sites in HomogeneousRepeatPattern.{tryCollapsePuts,tryCollapseGets} and ArithProgressionPattern.{tryCollapseArithPuts,tryCollapseArithGets}. When the per-put/per-get sync chain contains any wait_all{token=true} (IR-level signal that IRON requested a per-issue aiex.dma_await_task), canon refuses to collapse N puts/gets to 1 configure x dma_repeat=N-1. The consolidated form produces ONE consolidated firmware ack at the end of the configure, starving the per-chunk consumer-side ack request. Empirical HW backing: conduit_canon_no_collapse_on_link/'s @out side (189 byte mismatches pre-fix; flipped RED -> GREEN this commit) plus the new gets-with-await NPU smoke (PASS). Lit pin flips (9 fixtures): the prior canon-collapse pins encoded wrong-current behavior per CLAUDE.md USER-LOCKED 2026-04-28 "wrong is right" anti-pattern. Their input geometry uses [true,false] chain shapes (await + free) that pre-fix collapsed; post- fix they refuse. CHECKs flipped to pin N separate configures, no dma_repeat. Cap-refuse pins additionally drop --verify-diagnostics since chain-await refusal fires before the cap-refuse remark. New lit pin: conduit_canon_no_collapse_on_token_true.mlir pins the predicate at the at-site level with two channels: @chan_no_await (legitimate collapse, dma_repeat=3) + @chan_with_await (refused, all 4 puts survive). New NPU smokes (per the per-pattern NPU smoke hard gate): - conduit_canon_no_collapse_on_gets_with_await/ - PASS, isolates gets-side refusal on shim-S2MM with per-issue ack chains. - conduit_canon_no_collapse_on_puts_with_await/ - XFAIL, pins a newly-exposed Pass C bug: when canon legitimately leaves N>depth separate paced configures on a non-linked shim-MM2S -> compute channel, Pass C emits a NON-CIRCULAR consumer-side BD chain (length=depth) that terminates after `depth` transfers. Capture artifacts at /tmp/puts-with-await-capture/ (failing) vs /tmp/gets-with-await-capture/ (passing) - smoking gun is consumer- side tile BD `next_bd` topology. Flips to PASS once Pass C non- circular-chain emit on N>depth paced configures is fixed. Lit subset 366 PASS / 3 XFAIL / 0 FAIL. Llama prefill attn_scores GEMM hang (resolved by 375b0e5's link-refusal) remains green. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

Pure whitespace reflows produced by clang-format on files modified in recent commits (225f0d6, aa20c96, 375b0e5, 64fe983, 3753a99). No logic changes. Verified via `clang-format --dry-run --Werror` on every C/C++ file in the unpushed-commit set: all clean post-reflow. Co-Authored-By: Claude Opus 4 (1M context) <noreply@anthropic.com>

github-actions Bot reviewed Apr 20, 2026

View reviewed changes

hunhoffe and others added 27 commits April 20, 2026 13:27

tests+link: cleanup from sprint 5 session

3dd6cc4

- Remove stale XFAIL markers from passC_delta tests (now passing) - ConduitToDMALink: clean up iter_count repeat_count computation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

hunhoffe and others added 30 commits April 30, 2026 23:38

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

WIP#5

WIP#5
hunhoffe wants to merge 338 commits into
mainfrom
conduit-dialect

hunhoffe commented Apr 20, 2026

Uh oh!

github-actions Bot left a comment

Uh oh!

github-actions Bot left a comment

Uh oh!

github-actions Bot left a comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant

	static void
	dimsToOffsetsStrides(int32_t bdOffset, int64_t len,
	AIE::BDDimLayoutArrayAttr dimensions,
	llvm::SmallVectorImpl<int64_t> &offsets,
	llvm::SmallVectorImpl<int64_t> &sizes,
	llvm::SmallVectorImpl<int64_t> &strides) {

	llvm::StringRef conduitName =
	conduitChannelAttr ? conduitChannelAttr.getValue()
	: alloc.getSymName();

	void processRuntimeSequence(
	AIE::RuntimeSequenceOp rtSeq,
	const llvm::StringMap<ShimAllocInfo> &allocMap,
	mlir::OpBuilder &builder) {

	auto nameAttr =
	op->getAttrOfType<mlir::FlatSymbolRefAttr>("name");

	static llvm::SmallVector<std::string>
	getProducedChannels(AIE::CoreOp core) {

	static llvm::SmallVector<std::string>
	getConsumedChannels(AIE::CoreOp core) {

	device.walk([&](Create createOp) {
	createMap[createOp.getName()] = createOp;
	});

	std::string intermediateName =
	intermediateConduit.getName().str();

	static IntermediateRoute
	decideRoute(Create intermediateConduit, mlir::Value tile,
	AIE::DeviceOp device) {

	static std::string emitMemTileRelay(FusableCorePair &pair,
	AIE::DeviceOp device,
	mlir::OpBuilder &builder,
	mlir::MLIRContext *ctx) {

	builder.create<Create>(
	intermediateConduit.getLoc(), mlir::StringAttr::get(ctx, relayName),
	intermediateConduit.getElementTypeAttr(),
	intermediateConduit.getDepthAttr(),
	/routing_mode=/RoutingModeAttr{},
	/sync_mode=/SyncModeAttr{},
	/producer_rates=/nullptr,
	/consumer_rates=/nullptr,
	/fusion_group=/mlir::StringAttr{},
	/bd_repeat=/nullptr,
	/dma_repeat=/nullptr,
	/producer_dimensions=/nullptr,
	/consumer_dimensions=/nullptr);

	mlir::ArrayAttr dstsArr = mlir::ArrayAttr::get(
	ctx, {mlir::FlatSymbolRefAttr::get(ctx, relayName)});

	static mlir::LogicalResult composeCoresBodies(
	FusableCorePair &pair, IntermediateRoute route,
	mlir::OpBuilder &builder, mlir::MLIRContext *ctx,
	AIE::DeviceOp device) {

	mapping.map(consumerFor.getInductionVar(),
	producerFor.getInductionVar());

	defOp != consumerFor.getOperation() &&
	!mapping.contains(operand) && alreadyCloned.insert(defOp).second) {

	mlir::OpBuilder &builder,
	const std::string &consumerChannelName) {

	std::string channelName =
	pair.intermediateConduit.getName().str();

	static llvm::SmallVector<ArgGroup>
	buildArgGroupsFromSeq(mlir::Block &seqBody) {

	int64_t offset =
	(offsetsAttr && !offsetsAttr.empty()) ? offsetsAttr[0] : 0;

	Key k = {alloc.getTile(),
	static_cast<int>(alloc.getChannelDir())};

Conversation

hunhoffe commented Apr 20, 2026

Uh oh!

github-actions Bot left a comment

Choose a reason for hiding this comment

Uh oh!

github-actions Bot left a comment

Choose a reason for hiding this comment

Uh oh!

github-actions Bot left a comment

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant

	if (allocs[idx].getChannelIndex() !=
	static_cast<int64_t>(idx))

	auto computeDeadArgs = [](
	const llvm::SmallVector<ArgGroup> &groups,
	const llvm::StringSet<> &erasedChannels,
	unsigned numOrigArgs) -> llvm::DenseSet<unsigned> {

	llvm::DenseSet<unsigned> deadA = computeDeadArgs(
	argGroupsA, erasedChannelsA,
	static_cast<unsigned>(origTypesA.size()));
	llvm::DenseSet<unsigned> deadB = computeDeadArgs(
	argGroupsB, erasedChannelsB,
	static_cast<unsigned>(origTypesB.size()));

	op.setRoutingModeAttr(
	RoutingModeAttr::get(module.getContext(), RoutingMode::SharedMemory));

	unsigned numConsumers = consCoords.empty() ? 1
	: consCoords.size();

	if (!createOp->getAttrOfType<mlir::StringAttr>(
	"dma_channel_group"))

	if (targetModel.isMemTile(consCol, consRow) &&
	info.putCount > 1 && info.dmaRepeat == 0) {

	auto consLocks =
	state.allocateLockPair(consTileVal, consPrefix, consNBufs, prodInit);

	info.producerDimensions =
	mlir::cast<AIE::BDDimLayoutArrayAttr>(*dims);

	auto arrayOfArrays =
	mlir::cast<AIE::BDDimLayoutArrayArrayAttr>(*dims);