From 184db22b6b068af0fbdc00bfa19fbd4162fa72f7 Mon Sep 17 00:00:00 2001 From: Morten Borup Petersen Date: Thu, 23 Sep 2021 18:14:01 +0100 Subject: [PATCH] [SCFToCalyx] Support memref operations [12/12] (#1863) This commit adds support for memory accessing operations. All index types are converted to a fixed-width integer. This is necessary due to the lack of a bitwidth inference pass. Upon an index-typed value being used as a memory address input, the address value is truncated to the width of the memory port. --- lib/Conversion/SCFToCalyx/SCFToCalyx.cpp | 271 +++++++++++++++++- .../Conversion/SCFToCalyx/convert_memory.mlir | 259 +++++++++++++++++ 2 files changed, 529 insertions(+), 1 deletion(-) create mode 100644 test/Conversion/SCFToCalyx/convert_memory.mlir diff --git a/lib/Conversion/SCFToCalyx/SCFToCalyx.cpp b/lib/Conversion/SCFToCalyx/SCFToCalyx.cpp index 503bb26e73cc..83a39e522ffc 100644 --- a/lib/Conversion/SCFToCalyx/SCFToCalyx.cpp +++ b/lib/Conversion/SCFToCalyx/SCFToCalyx.cpp @@ -17,6 +17,7 @@ #include "circt/Dialect/HW/HWOps.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/Matchers.h" @@ -60,6 +61,14 @@ static bool matchConstantOp(Operation *op, APInt &value) { return mlir::detail::constant_int_op_binder(&value).match(op); } +/// Returns true if there exists only a single memref::LoadOp which loads from +/// the memory referenced by loadOp. +static bool singleLoadFromMemory(memref::LoadOp loadOp) { + return llvm::count_if(loadOp.memref().getUses(), [](auto &user) { + return dyn_cast(user.getOwner()); + }) <= 1; +} + /// Creates a DictionaryAttr containing a unit attribute 'name'. Used for /// defining mandatory port attributes for calyx::ComponentOp's. static DictionaryAttr getMandatoryPortAttr(MLIRContext *ctx, StringRef name) { @@ -304,6 +313,23 @@ class ComponentLoweringState { return it->second; } + /// Registers a calyx::MemoryOp as being associated with a memory identified + /// by 'memref'. + void registerMemory(Value memref, calyx::MemoryOp memoryOp) { + assert(memref.getType().isa()); + assert(memories.find(memref) == memories.end() && + "Memory already registered for memref"); + memories[memref] = memoryOp; + } + + /// Returns a calyx::MemoryOp registered for the given memref. + calyx::MemoryOp getMemory(Value memref) { + assert(memref.getType().isa()); + auto it = memories.find(memref); + assert(it != memories.end() && "No memory registered for memref"); + return it->second; + } + private: /// A reference to the parent program lowering state. ProgramLoweringState &programLoweringState; @@ -348,6 +374,9 @@ class ComponentLoweringState { /// A mapping from while ops to iteration argument registers. DenseMap> whileIterRegs; + + /// A mapping from memref's to their corresponding calyx memory op. + DenseMap memories; }; /// ProgramLoweringState handles the current state of lowering of a Calyx @@ -565,10 +594,13 @@ class BuildOpGroups : public FuncOpPartialLoweringPattern { .template Case( + XOrOp, OrOp, ZeroExtendIOp, TruncateIOp, + IndexCastOp>( [&](auto op) { return buildOp(rewriter, op).succeeded(); }) .template Case( [&](auto) { @@ -606,6 +638,10 @@ class BuildOpGroups : public FuncOpPartialLoweringPattern { LogicalResult buildOp(PatternRewriter &rewriter, TruncateIOp op) const; LogicalResult buildOp(PatternRewriter &rewriter, ZeroExtendIOp op) const; LogicalResult buildOp(PatternRewriter &rewriter, ReturnOp op) const; + LogicalResult buildOp(PatternRewriter &rewriter, IndexCastOp op) const; + LogicalResult buildOp(PatternRewriter &rewriter, memref::AllocOp op) const; + LogicalResult buildOp(PatternRewriter &rewriter, memref::LoadOp op) const; + LogicalResult buildOp(PatternRewriter &rewriter, memref::StoreOp op) const; /// buildLibraryOp will build a TCalyxLibOp inside a TGroupOp based on the /// source operation TSrcOp. @@ -666,8 +702,107 @@ class BuildOpGroups : public FuncOpPartialLoweringPattern { return createGroup(rewriter, getComponentState().getComponentOp(), block->front().getLoc(), groupName); } + + /// Creates assignments within the provided group to the address ports of the + /// memoryOp based on the provided addressValues. + void assignAddressPorts(PatternRewriter &rewriter, Location loc, + calyx::GroupInterface group, calyx::MemoryOp memoryOp, + Operation::operand_range addressValues) const { + IRRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToEnd(group.getBody()); + auto addrPorts = memoryOp.addrPorts(); + assert(addrPorts.size() == addressValues.size() && + "Mismatch between number of address ports of the provided memory " + "and address assignment values"); + for (auto &idx : enumerate(addressValues)) + rewriter.create(loc, addrPorts[idx.index()], idx.value(), + Value()); + } }; +LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter, + memref::LoadOp loadOp) const { + auto memoryOp = getComponentState().getMemory(loadOp.memref()); + if (singleLoadFromMemory(loadOp)) { + /// Single load from memory; Combinational case - we do not have to consider + /// adding registers in front of the memory. + auto combGroup = createGroupForOp(rewriter, loadOp); + assignAddressPorts(rewriter, loadOp.getLoc(), combGroup, memoryOp, + loadOp.getIndices()); + + /// We refrain from replacing the loadOp result with memoryOp.readData, + /// since multiple loadOp's need to be converted to a single memory's + /// ReadData. If this replacement is done now, we lose the link between + /// which SSA memref::LoadOp values map to which groups for loading a value + /// from the Calyx memory. At this point of lowering, we keep the + /// memref::LoadOp SSA value, and do value replacement _after_ control has + /// been generated (see LateSSAReplacement). This is *vital* for things such + /// as InlineCombGroups to be able to properly track which memory assignment + /// groups belong to which accesses. + getComponentState().registerEvaluatingGroup(loadOp.getResult(), combGroup); + } else { + auto group = createGroupForOp(rewriter, loadOp); + assignAddressPorts(rewriter, loadOp.getLoc(), group, memoryOp, + loadOp.getIndices()); + + /// Multiple loads from the same memory; In this case, we _may_ have a + /// structural hazard in the design we generate. To get around this, we + /// conservatively place a register in front of each load operation, and + /// replace all uses of the loaded value with the register output. Proper + /// handling of this requires the combinational group inliner/scheduler to + /// be aware of when a combinational expression references multiple loaded + /// values from the same memory, and then schedule assignments to temporary + /// registers to get around the structural hazard. + auto reg = createReg(getComponentState(), rewriter, loadOp.getLoc(), + getComponentState().getUniqueName("load"), + loadOp.getMemRefType().getElementTypeBitWidth()); + buildAssignmentsForRegisterWrite(getComponentState(), rewriter, group, reg, + memoryOp.readData()); + loadOp.getResult().replaceAllUsesWith(reg.out()); + getComponentState().addBlockScheduleable(loadOp->getBlock(), group); + } + return success(); +} + +LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter, + memref::StoreOp storeOp) const { + auto memoryOp = getComponentState().getMemory(storeOp.memref()); + auto group = createGroupForOp(rewriter, storeOp); + + /// This is a sequential group, so register it as being scheduleable for the + /// block. + getComponentState().addBlockScheduleable(storeOp->getBlock(), + cast(group)); + assignAddressPorts(rewriter, storeOp.getLoc(), group, memoryOp, + storeOp.getIndices()); + rewriter.setInsertionPointToEnd(group.getBody()); + rewriter.create(storeOp.getLoc(), memoryOp.writeData(), + storeOp.getValueToStore(), Value()); + rewriter.create( + storeOp.getLoc(), memoryOp.writeEn(), + getComponentState().getConstant(rewriter, storeOp.getLoc(), 1, 1), + Value()); + rewriter.create(storeOp.getLoc(), memoryOp.done(), + Value()); + return success(); +} +LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter, + memref::AllocOp allocOp) const { + rewriter.setInsertionPointToStart(getComponent()->getBody()); + MemRefType memtype = allocOp.getType(); + SmallVector addrSizes; + SmallVector sizes; + for (int64_t dim : memtype.getShape()) { + sizes.push_back(dim); + addrSizes.push_back(llvm::Log2_64_Ceil(dim)); + } + auto memoryOp = rewriter.create( + allocOp.getLoc(), getComponentState().getUniqueName("mem"), + memtype.getElementType().getIntOrFloatBitWidth(), sizes, addrSizes); + getComponentState().registerMemory(allocOp.getResult(), memoryOp); + return success(); +} + LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter, scf::YieldOp yieldOp) const { if (yieldOp.getOperands().size() == 0) @@ -816,6 +951,120 @@ LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter, rewriter, op, {op.getOperand().getType()}, {op.getType()}); } +LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter, + IndexCastOp op) const { + Type sourceType = op.getOperand().getType(); + sourceType = sourceType.isIndex() ? rewriter.getI32Type() : sourceType; + Type targetType = op.getResult().getType(); + targetType = targetType.isIndex() ? rewriter.getI32Type() : targetType; + unsigned targetBits = targetType.getIntOrFloatBitWidth(); + unsigned sourceBits = sourceType.getIntOrFloatBitWidth(); + LogicalResult res = success(); + + if (targetBits == sourceBits) { + /// Drop the index cast and replace uses of the target value with the source + /// value. + op.getResult().replaceAllUsesWith(op.getOperand()); + } else { + /// pad/slice the source operand. + if (sourceBits > targetBits) + res = buildLibraryOp( + rewriter, op, {sourceType}, {targetType}); + else + res = buildLibraryOp( + rewriter, op, {sourceType}, {targetType}); + } + rewriter.eraseOp(op); + return res; +} + +/// This pass rewrites memory accesses that have a width mismatch. Such +/// mismatches are due to index types being assumed 32-bit wide due to the lack +/// of a width inference pass. +class RewriteMemoryAccesses : public PartialLoweringPattern { +public: + RewriteMemoryAccesses(MLIRContext *context, LogicalResult &resRef, + ProgramLoweringState &pls) + : PartialLoweringPattern(context, resRef), pls(pls) {} + + LogicalResult partiallyLower(calyx::AssignOp assignOp, + PatternRewriter &rewriter) const override { + auto dest = assignOp.dest(); + auto destDefOp = dest.getDefiningOp(); + /// Is this an assignment to a memory op? + if (!destDefOp) + return success(); + auto destDefMem = dyn_cast(destDefOp); + if (!destDefMem) + return success(); + + /// Is this an assignment to an address port of the memory op? + bool isAssignToAddrPort = llvm::any_of( + destDefMem.addrPorts(), [&](auto port) { return port == dest; }); + + auto src = assignOp.src(); + auto &state = + pls.compLoweringState(assignOp->getParentOfType()); + + unsigned srcBits = src.getType().getIntOrFloatBitWidth(); + unsigned dstBits = dest.getType().getIntOrFloatBitWidth(); + if (srcBits == dstBits) + return success(); + + if (isAssignToAddrPort) { + SmallVector types = {rewriter.getIntegerType(srcBits), + rewriter.getIntegerType(dstBits)}; + auto sliceOp = state.getNewLibraryOpInstance( + rewriter, assignOp.getLoc(), types); + rewriter.setInsertionPoint(assignOp->getBlock(), + assignOp->getBlock()->begin()); + rewriter.create(assignOp->getLoc(), sliceOp.getResult(0), + src, Value()); + assignOp.setOperand(1, sliceOp.getResult(1)); + } else + return assignOp.emitError() + << "Will only infer slice operators for assign width mismatches " + "to memory address ports."; + + return success(); + } + +private: + ProgramLoweringState &pls; +}; + +/// Connverts all index-typed operations and values to i32 values. +class ConvertIndexTypes : public FuncOpPartialLoweringPattern { + using FuncOpPartialLoweringPattern::FuncOpPartialLoweringPattern; + + LogicalResult + PartiallyLowerFuncToComp(mlir::FuncOp funcOp, + PatternRewriter &rewriter) const override { + funcOp.walk([&](Block *block) { + for (auto arg : block->getArguments()) + if (arg.getType().isIndex()) + arg.setType(rewriter.getI32Type()); + }); + + funcOp.walk([&](Operation *op) { + for (auto res : op->getResults()) { + if (!res.getType().isIndex()) + continue; + + res.setType(rewriter.getI32Type()); + if (auto constOp = dyn_cast(op)) { + APInt value; + matchConstantOp(constOp, value); + rewriter.setInsertionPoint(constOp); + rewriter.replaceOpWithNewOp( + constOp, rewriter.getI32IntegerAttr(value.getSExtValue())); + } + } + }); + return success(); + } +}; + /// Inlines Calyx ExecuteRegionOp operations within their parent blocks. /// An execution region op (ERO) is inlined by: /// i : add a sink basic block for all yield operations inside the @@ -1330,6 +1579,18 @@ class LateSSAReplacement : public FuncOpPartialLoweringPattern { for (auto res : getComponentState().getWhileIterRegs(whileOp)) whileOp.getResults()[res.first].replaceAllUsesWith(res.second.out()); }); + + funcOp.walk([&](memref::LoadOp loadOp) { + if (singleLoadFromMemory(loadOp)) { + /// In buildOpGroups we did not replace loadOp's results, to ensure a + /// link between evaluating groups (which fix the input addresses of a + /// memory op) and a readData result. Now, we may replace these SSA + /// values with their memoryOp readData output. + loadOp.getResult().replaceAllUsesWith( + getComponentState().getMemory(loadOp.memref()).readData()); + } + }); + return success(); } }; @@ -1719,6 +1980,10 @@ void SCFToCalyxPass::runOnOperation() { /// This pass inlines scf.ExecuteRegionOp's by adding control-flow. addGreedyPattern(loweringPatterns); + /// This pattern converts all index types to a predefined width (currently + /// i32). + addOncePattern(loweringPatterns, funcMap, *loweringState); + /// This pattern creates registers for all basic-block arguments. addOncePattern(loweringPatterns, funcMap, *loweringState); @@ -1752,6 +2017,10 @@ void SCFToCalyxPass::runOnOperation() { /// after control generation. addOncePattern(loweringPatterns, funcMap, *loweringState); + /// This pattern rewrites accesses to memories which are too wide due to + /// index types being converted to a fixed-width integer type. + addOncePattern(loweringPatterns, *loweringState); + /// This pattern removes the source FuncOp which has now been converted into /// a Calyx component. addOncePattern(loweringPatterns, funcMap, *loweringState); diff --git a/test/Conversion/SCFToCalyx/convert_memory.mlir b/test/Conversion/SCFToCalyx/convert_memory.mlir new file mode 100644 index 000000000000..d4a140fb1239 --- /dev/null +++ b/test/Conversion/SCFToCalyx/convert_memory.mlir @@ -0,0 +1,259 @@ +// RUN: circt-opt %s --lower-scf-to-calyx -split-input-file | FileCheck %s + +// CHECK: module { +// CHECK-NEXT: calyx.program "main" { +// CHECK-NEXT: calyx.component @main(%clk: i1 {clk}, %reset: i1 {reset}, %go: i1 {go}) -> (%done: i1 {done}) { +// CHECK-NEXT: %true = hw.constant true +// CHECK-NEXT: %c0_i32 = hw.constant 0 : i32 +// CHECK-NEXT: %c1_i32 = hw.constant 1 : i32 +// CHECK-NEXT: %c64_i32 = hw.constant 64 : i32 +// CHECK-NEXT: %std_slice_2.in, %std_slice_2.out = calyx.std_slice "std_slice_2" : i32, i6 +// CHECK-NEXT: %std_slice_1.in, %std_slice_1.out = calyx.std_slice "std_slice_1" : i32, i6 +// CHECK-NEXT: %std_slice_0.in, %std_slice_0.out = calyx.std_slice "std_slice_0" : i32, i6 +// CHECK-NEXT: %std_add_0.left, %std_add_0.right, %std_add_0.out = calyx.std_add "std_add_0" : i32, i32, i32 +// CHECK-NEXT: %std_lt_0.left, %std_lt_0.right, %std_lt_0.out = calyx.std_lt "std_lt_0" : i32, i32, i1 +// CHECK-NEXT: %mem_1.addr0, %mem_1.write_data, %mem_1.write_en, %mem_1.clk, %mem_1.read_data, %mem_1.done = calyx.memory "mem_1"<[64] x 32> [6] : i6, i32, i1, i1, i32, i1 +// CHECK-NEXT: %mem_0.addr0, %mem_0.write_data, %mem_0.write_en, %mem_0.clk, %mem_0.read_data, %mem_0.done = calyx.memory "mem_0"<[64] x 32> [6] : i6, i32, i1, i1, i32, i1 +// CHECK-NEXT: %while_0_arg0_reg.in, %while_0_arg0_reg.write_en, %while_0_arg0_reg.clk, %while_0_arg0_reg.reset, %while_0_arg0_reg.out, %while_0_arg0_reg.done = calyx.register "while_0_arg0_reg" : i32, i1, i1, i1, i32, i1 +// CHECK-NEXT: calyx.wires { +// CHECK-NEXT: calyx.group @assign_while_0_init { +// CHECK-NEXT: calyx.assign %while_0_arg0_reg.in = %c0_i32 : i32 +// CHECK-NEXT: calyx.assign %while_0_arg0_reg.write_en = %true : i1 +// CHECK-NEXT: calyx.group_done %while_0_arg0_reg.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: calyx.comb_group @bb0_0 { +// CHECK-NEXT: calyx.assign %std_lt_0.left = %while_0_arg0_reg.out : i32 +// CHECK-NEXT: calyx.assign %std_lt_0.right = %c64_i32 : i32 +// CHECK-NEXT: } +// CHECK-NEXT: calyx.group @bb0_2 { +// CHECK-NEXT: calyx.assign %std_slice_1.in = %while_0_arg0_reg.out : i32 +// CHECK-NEXT: calyx.assign %std_slice_0.in = %while_0_arg0_reg.out : i32 +// CHECK-NEXT: calyx.assign %mem_1.addr0 = %std_slice_1.out : i6 +// CHECK-NEXT: calyx.assign %mem_1.write_data = %mem_0.read_data : i32 +// CHECK-NEXT: calyx.assign %mem_1.write_en = %true : i1 +// CHECK-NEXT: calyx.assign %mem_0.addr0 = %std_slice_0.out : i6 +// CHECK-NEXT: calyx.group_done %mem_1.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: calyx.group @assign_while_0_latch { +// CHECK-NEXT: calyx.assign %while_0_arg0_reg.in = %std_add_0.out : i32 +// CHECK-NEXT: calyx.assign %while_0_arg0_reg.write_en = %true : i1 +// CHECK-NEXT: calyx.assign %std_add_0.left = %while_0_arg0_reg.out : i32 +// CHECK-NEXT: calyx.assign %std_add_0.right = %c1_i32 : i32 +// CHECK-NEXT: calyx.group_done %while_0_arg0_reg.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: calyx.control { +// CHECK-NEXT: calyx.seq { +// CHECK-NEXT: calyx.enable @assign_while_0_init +// CHECK-NEXT: calyx.while %std_lt_0.out with @bb0_0 { +// CHECK-NEXT: calyx.seq { +// CHECK-NEXT: calyx.enable @bb0_2 +// CHECK-NEXT: calyx.enable @assign_while_0_latch +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +module { + func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c64 = constant 64 : index + %0 = memref.alloc() : memref<64xi32> + %1 = memref.alloc() : memref<64xi32> + scf.while(%arg0 = %c0) : (index) -> (index) { + %cond = cmpi ult, %arg0, %c64 : index + scf.condition(%cond) %arg0 : index + } do { + ^bb0(%arg1: index): + %v = memref.load %0[%arg1] : memref<64xi32> + memref.store %v, %1[%arg1] : memref<64xi32> + %inc = addi %arg1, %c1 : index + scf.yield %inc : index + } + return + } +} + +// ----- + +// Test combinational value used across sequential group boundary. This requires +// that any referenced combinational assignments are re-applied in each +// sequential group. + +// CHECK: module { +// CHECK-NEXT: calyx.program "main" { +// CHECK-NEXT: calyx.component @main(%in0: i32, %clk: i1 {clk}, %reset: i1 {reset}, %go: i1 {go}) -> (%out0: i32, %done: i1 {done}) { +// CHECK-NEXT: %c1_i32 = hw.constant 1 : i32 +// CHECK-NEXT: %c0_i32 = hw.constant 0 : i32 +// CHECK-NEXT: %true = hw.constant true +// CHECK-NEXT: %std_slice_0.in, %std_slice_0.out = calyx.std_slice "std_slice_0" : i32, i6 +// CHECK-NEXT: %std_add_1.left, %std_add_1.right, %std_add_1.out = calyx.std_add "std_add_1" : i32, i32, i32 +// CHECK-NEXT: %std_add_0.left, %std_add_0.right, %std_add_0.out = calyx.std_add "std_add_0" : i32, i32, i32 +// CHECK-NEXT: %mem_0.addr0, %mem_0.write_data, %mem_0.write_en, %mem_0.clk, %mem_0.read_data, %mem_0.done = calyx.memory "mem_0"<[64] x 32> [6] : i6, i32, i1, i1, i32, i1 +// CHECK-NEXT: %ret_arg0_reg.in, %ret_arg0_reg.write_en, %ret_arg0_reg.clk, %ret_arg0_reg.reset, %ret_arg0_reg.out, %ret_arg0_reg.done = calyx.register "ret_arg0_reg" : i32, i1, i1, i1, i32, i1 +// CHECK-NEXT: calyx.wires { +// CHECK-NEXT: calyx.assign %out0 = %ret_arg0_reg.out : i32 +// CHECK-NEXT: calyx.group @bb0_1 { +// CHECK-NEXT: calyx.assign %std_slice_0.in = %c0_i32 : i32 +// CHECK-NEXT: calyx.assign %mem_0.addr0 = %std_slice_0.out : i6 +// CHECK-NEXT: calyx.assign %mem_0.write_data = %std_add_0.out : i32 +// CHECK-NEXT: calyx.assign %mem_0.write_en = %true : i1 +// CHECK-NEXT: calyx.assign %std_add_0.left = %in0 : i32 +// CHECK-NEXT: calyx.assign %std_add_0.right = %c1_i32 : i32 +// CHECK-NEXT: calyx.group_done %mem_0.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: calyx.group @ret_assign_0 { +// CHECK-NEXT: calyx.assign %ret_arg0_reg.in = %std_add_1.out : i32 +// CHECK-NEXT: calyx.assign %ret_arg0_reg.write_en = %true : i1 +// CHECK-NEXT: calyx.assign %std_add_1.left = %std_add_0.out : i32 +// CHECK-NEXT: calyx.assign %std_add_0.left = %in0 : i32 +// CHECK-NEXT: calyx.assign %std_add_0.right = %c1_i32 : i32 +// CHECK-NEXT: calyx.assign %std_add_1.right = %c1_i32 : i32 +// CHECK-NEXT: calyx.group_done %ret_arg0_reg.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: calyx.control { +// CHECK-NEXT: calyx.seq { +// CHECK-NEXT: calyx.enable @bb0_1 +// CHECK-NEXT: calyx.enable @ret_assign_0 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +module { + func @main(%arg0 : i32) -> i32 { + %0 = memref.alloc() : memref<64xi32> + %c0 = constant 0 : index + %c1 = constant 1 : i32 + %1 = addi %arg0, %c1 : i32 + memref.store %1, %0[%c0] : memref<64xi32> + %3 = addi %1, %c1 : i32 + return %3 : i32 + } +} + +// ----- + +// CHECK: module { +// CHECK-NEXT: calyx.program "main" { +// CHECK-NEXT: calyx.component @main(%in0: i32, %clk: i1 {clk}, %reset: i1 {reset}, %go: i1 {go}) -> (%out0: i32, %done: i1 {done}) { +// CHECK-NEXT: %c1_i32 = hw.constant 1 : i32 +// CHECK-NEXT: %c0_i32 = hw.constant 0 : i32 +// CHECK-NEXT: %true = hw.constant true +// CHECK-NEXT: %std_slice_0.in, %std_slice_0.out = calyx.std_slice "std_slice_0" : i32, i6 +// CHECK-NEXT: %std_add_2.left, %std_add_2.right, %std_add_2.out = calyx.std_add "std_add_2" : i32, i32, i32 +// CHECK-NEXT: %std_add_1.left, %std_add_1.right, %std_add_1.out = calyx.std_add "std_add_1" : i32, i32, i32 +// CHECK-NEXT: %std_add_0.left, %std_add_0.right, %std_add_0.out = calyx.std_add "std_add_0" : i32, i32, i32 +// CHECK-NEXT: %mem_0.addr0, %mem_0.write_data, %mem_0.write_en, %mem_0.clk, %mem_0.read_data, %mem_0.done = calyx.memory "mem_0"<[64] x 32> [6] : i6, i32, i1, i1, i32, i1 +// CHECK-NEXT: %ret_arg0_reg.in, %ret_arg0_reg.write_en, %ret_arg0_reg.clk, %ret_arg0_reg.reset, %ret_arg0_reg.out, %ret_arg0_reg.done = calyx.register "ret_arg0_reg" : i32, i1, i1, i1, i32, i1 +// CHECK-NEXT: calyx.wires { +// CHECK-NEXT: calyx.assign %out0 = %ret_arg0_reg.out : i32 +// CHECK-NEXT: calyx.group @bb0_2 { +// CHECK-NEXT: calyx.assign %std_slice_0.in = %c0_i32 : i32 +// CHECK-NEXT: calyx.assign %mem_0.addr0 = %std_slice_0.out : i6 +// CHECK-NEXT: calyx.assign %mem_0.write_data = %std_add_0.out : i32 +// CHECK-NEXT: calyx.assign %mem_0.write_en = %true : i1 +// CHECK-NEXT: calyx.assign %std_add_0.left = %in0 : i32 +// CHECK-NEXT: calyx.assign %std_add_0.right = %c1_i32 : i32 +// CHECK-NEXT: calyx.group_done %mem_0.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: calyx.group @ret_assign_0 { +// CHECK-NEXT: calyx.assign %ret_arg0_reg.in = %std_add_2.out : i32 +// CHECK-NEXT: calyx.assign %ret_arg0_reg.write_en = %true : i1 +// CHECK-NEXT: calyx.assign %std_add_2.left = %std_add_1.out : i32 +// CHECK-NEXT: calyx.assign %std_add_1.left = %std_add_0.out : i32 +// CHECK-NEXT: calyx.assign %std_add_0.left = %in0 : i32 +// CHECK-NEXT: calyx.assign %std_add_0.right = %c1_i32 : i32 +// CHECK-NEXT: calyx.assign %std_add_1.right = %c1_i32 : i32 +// CHECK-NEXT: calyx.assign %std_add_2.right = %c1_i32 : i32 +// CHECK-NEXT: calyx.group_done %ret_arg0_reg.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: calyx.control { +// CHECK-NEXT: calyx.seq { +// CHECK-NEXT: calyx.enable @bb0_2 +// CHECK-NEXT: calyx.enable @ret_assign_0 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +module { + func @main(%arg0 : i32) -> i32 { + %0 = memref.alloc() : memref<64xi32> + %c0 = constant 0 : index + %c1 = constant 1 : i32 + %1 = addi %arg0, %c1 : i32 + %2 = addi %1, %c1 : i32 + memref.store %1, %0[%c0] : memref<64xi32> + %3 = addi %2, %c1 : i32 + return %3 : i32 + } +} + +// ----- +// Test multiple reads from the same memory (structural hazard). + +// CHECK: module { +// CHECK-NEXT: calyx.program "main" { +// CHECK-NEXT: calyx.component @main(%in0: i6, %clk: i1 {clk}, %reset: i1 {reset}, %go: i1 {go}) -> (%out0: i32, %done: i1 {done}) { +// CHECK-NEXT: %c1_i32 = hw.constant 1 : i32 +// CHECK-NEXT: %true = hw.constant true +// CHECK-NEXT: %std_slice_1.in, %std_slice_1.out = calyx.std_slice "std_slice_1" : i32, i6 +// CHECK-NEXT: %std_slice_0.in, %std_slice_0.out = calyx.std_slice "std_slice_0" : i32, i6 +// CHECK-NEXT: %std_add_0.left, %std_add_0.right, %std_add_0.out = calyx.std_add "std_add_0" : i32, i32, i32 +// CHECK-NEXT: %load_1_reg.in, %load_1_reg.write_en, %load_1_reg.clk, %load_1_reg.reset, %load_1_reg.out, %load_1_reg.done = calyx.register "load_1_reg" : i32, i1, i1, i1, i32, i1 +// CHECK-NEXT: %load_0_reg.in, %load_0_reg.write_en, %load_0_reg.clk, %load_0_reg.reset, %load_0_reg.out, %load_0_reg.done = calyx.register "load_0_reg" : i32, i1, i1, i1, i32, i1 +// CHECK-NEXT: %std_pad_0.in, %std_pad_0.out = calyx.std_pad "std_pad_0" : i6, i32 +// CHECK-NEXT: %mem_0.addr0, %mem_0.write_data, %mem_0.write_en, %mem_0.clk, %mem_0.read_data, %mem_0.done = calyx.memory "mem_0"<[64] x 32> [6] : i6, i32, i1, i1, i32, i1 +// CHECK-NEXT: %ret_arg0_reg.in, %ret_arg0_reg.write_en, %ret_arg0_reg.clk, %ret_arg0_reg.reset, %ret_arg0_reg.out, %ret_arg0_reg.done = calyx.register "ret_arg0_reg" : i32, i1, i1, i1, i32, i1 +// CHECK-NEXT: calyx.wires { +// CHECK-NEXT: calyx.assign %out0 = %ret_arg0_reg.out : i32 +// CHECK-NEXT: calyx.group @bb0_1 { +// CHECK-NEXT: calyx.assign %std_slice_1.in = %std_pad_0.out : i32 +// CHECK-NEXT: calyx.assign %mem_0.addr0 = %std_slice_1.out : i6 +// CHECK-NEXT: calyx.assign %load_0_reg.in = %mem_0.read_data : i32 +// CHECK-NEXT: calyx.assign %load_0_reg.write_en = %true : i1 +// CHECK-NEXT: calyx.assign %std_pad_0.in = %in0 : i6 +// CHECK-NEXT: calyx.group_done %load_0_reg.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: calyx.group @bb0_2 { +// CHECK-NEXT: calyx.assign %std_slice_0.in = %c1_i32 : i32 +// CHECK-NEXT: calyx.assign %mem_0.addr0 = %std_slice_0.out : i6 +// CHECK-NEXT: calyx.assign %load_1_reg.in = %mem_0.read_data : i32 +// CHECK-NEXT: calyx.assign %load_1_reg.write_en = %true : i1 +// CHECK-NEXT: calyx.group_done %load_1_reg.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: calyx.group @ret_assign_0 { +// CHECK-NEXT: calyx.assign %ret_arg0_reg.in = %std_add_0.out : i32 +// CHECK-NEXT: calyx.assign %ret_arg0_reg.write_en = %true : i1 +// CHECK-NEXT: calyx.assign %std_add_0.left = %load_0_reg.out : i32 +// CHECK-NEXT: calyx.assign %std_add_0.right = %load_1_reg.out : i32 +// CHECK-NEXT: calyx.group_done %ret_arg0_reg.done : i1 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: calyx.control { +// CHECK-NEXT: calyx.seq { +// CHECK-NEXT: calyx.enable @bb0_1 +// CHECK-NEXT: calyx.enable @bb0_2 +// CHECK-NEXT: calyx.enable @ret_assign_0 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +module { + func @main(%arg0 : i6) -> i32 { + %0 = memref.alloc() : memref<64xi32> + %c1 = constant 1 : index + %arg0_idx = index_cast %arg0 : i6 to index + %1 = memref.load %0[%arg0_idx] : memref<64xi32> + %2 = memref.load %0[%c1] : memref<64xi32> + %3 = addi %1, %2 : i32 + return %3 : i32 + } +}