From 4f17459f96282fe67f623adf2f0b1746e42dfa80 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Tue, 25 Jun 2024 14:58:43 -0600 Subject: [PATCH 01/16] Transition AIETarget to use internal aie2xclbin function rather than external utility --- .../print_ir_aie2xclbin/buffers_xclbin.mlir | 2 +- .../plugins/target/AMD-AIE/aie/CMakeLists.txt | 2 + .../AMD-AIE/iree-amd-aie/Target/AIETarget.cpp | 147 ++++++++++-------- .../iree-amd-aie/Target/AIETargetDirect.cpp | 8 +- .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 28 +++- .../iree-amd-aie/Transforms/CMakeLists.txt | 1 + .../iree-amd-aie/Transforms/Passes.cpp | 14 ++ 7 files changed, 126 insertions(+), 76 deletions(-) diff --git a/build_tools/ci/print_ir_aie2xclbin/buffers_xclbin.mlir b/build_tools/ci/print_ir_aie2xclbin/buffers_xclbin.mlir index 5d6af9756..2fe31af06 100644 --- a/build_tools/ci/print_ir_aie2xclbin/buffers_xclbin.mlir +++ b/build_tools/ci/print_ir_aie2xclbin/buffers_xclbin.mlir @@ -72,7 +72,7 @@ // CHECK: }, // CHECK: "instances": [ // CHECK: { -// CHECK: "name": "dummy2_0" +// CHECK: "name": "IREE" // CHECK: } // CHECK: ], // CHECK: "name": "dummy2_0", diff --git a/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt index f081678a9..54bd61232 100644 --- a/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt @@ -304,6 +304,8 @@ iree_cc_library( "AIEObjectFifoStatefulTransform.cpp" "AIEDmaToNpu.cpp" "AIEXToStandard.cpp" + # Passes needed by AIR-AIE lowering + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.cpp" DEPS ::defs ::AIEDialectIR diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index 5a71a2521..847ac43e4 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -8,12 +8,19 @@ #include +#include "XCLBinGen.h" #include "aie/Dialect/AIE/IR/AIEDialect.h" +#include "aie/Dialect/AIE/Transforms/AIEPasses.h" +#include "aie/Dialect/AIEVec/IR/AIEVecDialect.h" #include "aie/Dialect/AIEX/IR/AIEXDialect.h" +#include "aie/Dialect/XLLVM/XLLVMDialect.h" +#include "aie/Passes.h" +#include "aie/Target/LLVMIR/Dialect/XLLVM/XLLVMToLLVMIRTranslation.h" #include "air/Dialect/AIR/AIRDialect.h" #include "air/Dialect/AIRRt/AIRRtDialect.h" #include "iree-amd-aie/IR/AMDAIEDialect.h" #include "iree-amd-aie/Transforms/Passes.h" +#include "iree-dialects/Dialect/LinalgTransform/Passes.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h" @@ -21,10 +28,22 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Program.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/ToolOutputFile.h" +#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" +#include "mlir/Conversion/Passes.h" +#include "mlir/Dialect/DLTI/DLTI.h" +#include "mlir/Dialect/EmitC/IR/EmitC.h" +#include "mlir/Dialect/Func/Extensions/AllExtensions.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Parser/Parser.h" +#include "mlir/Pass/PassManager.h" #include "mlir/Support/FileUtilities.h" +#include "mlir/Target/LLVMIR/Dialect/All.h" +#include "mlir/Target/LLVMIR/Export.h" #include "runtime/plugins/AMD-AIE/iree-amd-aie/schemas/xrt_executable_def_builder.h" #define DEBUG_TYPE "aie-target" @@ -136,12 +155,27 @@ class AIETargetBackend final : public IREE::HAL::TargetBackend { } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert< + mlir::iree_compiler::AMDAIE::AMDAIEDialect, + mlir::iree_compiler::IREE::Codegen::IREECodegenDialect, + IREE::LinalgExt::IREELinalgExtDialect, transform::TransformDialect, + xilinx::AIE::AIEDialect, xilinx::AIEX::AIEXDialect, + xilinx::air::airDialect, xilinx::airrt::AIRRtDialect, + xilinx::xllvm::XLLVMDialect, xilinx::aievec::AIEVecDialect, + emitc::EmitCDialect, LLVM::LLVMDialect, func::FuncDialect, + cf::ControlFlowDialect, DLTIDialect, arith::ArithDialect, + memref::MemRefDialect, math::MathDialect, vector::VectorDialect>(); + + registerBuiltinDialectTranslation(registry); + registerLLVMDialectTranslation(registry); + xilinx::xllvm::registerXLLVMDialectTranslation(registry); + arith::registerConvertArithToLLVMInterface(registry); + cf::registerConvertControlFlowToLLVMInterface(registry); + func::registerAllExtensions(registry); + registerConvertFuncToLLVMInterface(registry); + index::registerConvertIndexToLLVMInterface(registry); + registerConvertMathToLLVMInterface(registry); + registerConvertMemRefToLLVMInterface(registry); } void buildTranslationPassPipeline(IREE::HAL::ExecutableTargetAttr, @@ -205,7 +239,6 @@ LogicalResult AIETargetBackend::serializeExecutable( if (failed(maybeWorkDir)) return failure(); auto workDir = maybeWorkDir.value(); - // collect names of kernels as they need to be in kernels.json // generated by `aie2xclbin` SmallVector entryPointNames; @@ -251,8 +284,6 @@ LogicalResult AIETargetBackend::serializeExecutable( return moduleOp.emitOpError("should contain some entry points"); } - SmallString<128> aie2xclbin(options.mlirAieInstallDir); - llvm::sys::path::append(aie2xclbin, "bin", "aie2xclbin"); std::unique_ptr xclbinIn; FlatbufferBuilder builder; @@ -293,68 +324,52 @@ LogicalResult AIETargetBackend::serializeExecutable( SmallString<128> entryPointWorkDir(workDir); if (ordinalCount > 1) llvm::sys::path::append(entryPointWorkDir, entryPointNamesFb[ordinal]); + auto err = llvm::sys::fs::create_directories(entryPointWorkDir); + if (err) + return moduleOp.emitOpError() + << "failed to create working directory for xclbin generation: " + << err.message(); + llvm::outs().flush(); SmallString<128> xclbinPath(entryPointWorkDir); llvm::sys::path::append(xclbinPath, entryPointNamesFb[ordinal] + ".xclbin"); SmallString<128> npuInstPath(entryPointWorkDir); llvm::sys::path::append(npuInstPath, entryPointNamesFb[ordinal] + ".npu.txt"); - SmallVector cmdArgs{aie2xclbin, - inputMlirPath, - "--peano", - options.peanoInstallDir, - "--xclbin-name", - xclbinPath, - "--npu-insts-name", - npuInstPath, - "--xclbin-kernel-name", - entryPointNamesFb[ordinal], - "--tmpdir", - entryPointWorkDir}; - - auto addOpt = [&](StringRef arg, bool value) { - if (value) cmdArgs.push_back(arg); - }; - addOpt("--use-chess", options.useChess); - addOpt("-v", options.showInvokedCommands); - addOpt("--print-ir-after-all", options.aie2xclbinPrintIrAfterAll); - addOpt("--print-ir-before-all", options.aie2xclbinPrintIrBeforeAll); - addOpt("--disable-threading", options.aie2xclbinDisableTheading); - addOpt("--print-ir-module-scope", options.aie2xclbinPrintIrModuleScope); - addOpt("--timing", options.aie2xclbinTiming); - - SmallVector cmdEnv{}; - if (options.useChess) { - std::string newVitis = "VITIS=" + options.vitisInstallDir; - cmdEnv.push_back(newVitis); - } - - if (const char *originalPath = ::getenv("PATH")) { - std::string newPath = "PATH=" + std::string(originalPath); - cmdEnv.push_back(newPath); - } - - // Chess (if used) will look here for the AIEbuild license. - if (const char *originalHome = ::getenv("HOME")) { - std::string newHome = std::string("HOME=") + std::string(originalHome); - cmdEnv.push_back(newHome); - } - - if (options.showInvokedCommands) { - for (auto s : cmdEnv) llvm::dbgs() << s << " "; - for (auto s : cmdArgs) llvm::dbgs() << s << " "; - llvm::dbgs() << "\n"; - } - - { - SmallVector cmdEnvRefs{cmdEnv.begin(), cmdEnv.end()}; - int result = llvm::sys::ExecuteAndWait(cmdArgs[0], cmdArgs, cmdEnvRefs, - {}, 0, 0, &errorMessage); - if (result != 0) - return moduleOp.emitOpError() - << "Failed to produce an XCLBin with external tool: " - << errorMessage; - } + xilinx::XCLBinGenConfig TK; + TK.DisableThreading = options.aie2xclbinDisableTheading; + TK.PrintIRAfterAll = options.aie2xclbinPrintIrAfterAll; + TK.PrintIRBeforeAll = options.aie2xclbinPrintIrBeforeAll; + TK.PrintIRModuleScope = options.aie2xclbinPrintIrModuleScope; + TK.TargetArch = "AIE2"; + TK.TempDir = entryPointWorkDir.str(); + TK.UseChess = options.useChess; + TK.Verbose = options.showInvokedCommands; + // The instance name is appended to the kernel name so we dont want it to be + // something too long. + TK.XCLBinInstanceName = "IREE"; + + // Convert ordinal to hexadecimal string for xclbin kernel id. + std::stringstream ordinalHex; + ordinalHex << "0x" << std::hex << ordinal; + TK.XCLBinKernelID = ordinalHex.str(); + TK.XCLBinKernelName = entryPointNamesFb[ordinal]; + + SmallString<64> aieToolsDir(options.vitisInstallDir); + llvm::sys::path::append(aieToolsDir, "aietools"); + TK.AIEToolsDir = aieToolsDir.str(); + TK.InstallDir = options.mlirAieInstallDir; + TK.PeanoDir = options.peanoInstallDir; + + ParserConfig pcfg(variantOp->getContext()); + llvm::SourceMgr srcMgr; + + OwningOpRef owningModuleOp = + parseSourceFile(inputMlirPath, srcMgr, pcfg); + + if (failed(aie2xclbin(variantOp->getContext(), *owningModuleOp, TK, npuInstPath, + xclbinPath))) + return failure(); std::ifstream instrFile(static_cast(npuInstPath)); std::string line; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp index 91b36e635..e94a28a28 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp @@ -287,6 +287,10 @@ LogicalResult AIETargetDirectBackend::serializeExecutable( } uint64_t ordinalCount = entryPointOrdinals.size(); + if (entryPointNames.empty()) { + return moduleOp.emitOpError("should contain some entry points"); + } + std::unique_ptr xclbinIn; FlatbufferBuilder builder; @@ -342,7 +346,9 @@ LogicalResult AIETargetDirectBackend::serializeExecutable( TK.TempDir = entryPointWorkDir.str(); TK.UseChess = options.useChess; TK.Verbose = options.showInvokedCommands; - TK.XCLBinInstanceName = entryPointNamesFb[ordinal]; + // The instance name is appended to the kernel name so we dont want it to be + // something too long. + TK.XCLBinInstanceName = "IREE"; // Convert ordinal to hexadecimal string for xclbin kernel id. std::stringstream ordinalHex; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index f40987618..a24b1ec5e 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -12,6 +12,7 @@ #include "AMDAIETargets.h" #include "aie/Conversion/AIEVecToLLVM/AIEVecToLLVM.h" +#include "aie/Dialect/AIE/Transforms/AIEPasses.h" #include "aie/Dialect/AIEVec/Pipelines/Passes.h" #include "aie/Passes.h" #include "aie/Targets/AIETargets.h" @@ -686,6 +687,8 @@ static LogicalResult generateUnifiedObject(MLIRContext *context, pm.addNestedPass( mlir::iree_compiler::AMDAIE::createAIELocalizeLocksPass()); + pm.addNestedPass( + xilinx::AIE::createAIENormalizeAddressSpacesPass()); pm.addPass(mlir::iree_compiler::AMDAIE::createAIECoreToStandardPass()); pm.addPass(mlir::iree_compiler::AMDAIE::createAIEXToStandardPass()); @@ -825,8 +828,10 @@ LogicalResult xilinx::aie2xclbin(MLIRContext *ctx, ModuleOp moduleOp, XCLBinGenConfig &TK, StringRef OutputNPU, StringRef OutputXCLBin, StringRef InputXCLBin) { - PassManager pm(ctx, moduleOp.getOperationName()); - applyConfigToPassManager(TK, pm); + std::regex target_regex("AIE.?"); + if (!std::regex_search(TK.TargetArch, target_regex)) + return moduleOp.emitOpError() + << "Unexpected target architecture: " << TK.TargetArch; // generateNPUInstructions pm.addNestedPass( @@ -843,13 +848,20 @@ LogicalResult xilinx::aie2xclbin(MLIRContext *ctx, ModuleOp moduleOp, std::vector unsignedNpuInstructions( signedNpuInstructionsAttr.begin(), signedNpuInstructionsAttr.end()); - std::string errorMessage; - auto output = openOutputFile(OutputNPU, &errorMessage); - if (!output) return moduleOp.emitOpError(errorMessage); - for (auto w : unsignedNpuInstructions) - output->os() << llvm::format("%08X\n", w); - output->keep(); + std::string errorMessage; + auto output = openOutputFile(OutputNPU, &errorMessage); + if (!output) { + llvm::errs() << errorMessage << "\n"; + return moduleOp.emitOpError(""); + } + if (failed( + mlir::iree_compiler::AMDAIE::AIETranslateToNPU(copy, output->os()))) + return moduleOp.emitOpError("NPU Instruction translation failed"); + + output->keep(); + copy->erase(); + } SmallString<64> unifiedObj(TK.TempDir); sys::path::append(unifiedObj, "input.o"); if (failed(generateUnifiedObject(ctx, moduleOp, TK, std::string(unifiedObj)))) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt index d231215f9..f29a8f52f 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt @@ -95,6 +95,7 @@ iree_cc_library( iree::compiler::Dialect::LinalgExt::IR iree::compiler::Dialect::LinalgExt::Transforms iree::compiler::Utils + iree::target::amd-aie::aie::AIEPasses iree::target::amd-aie::air::AIRConversionPasses iree::target::amd-aie::air::AIRTransformPasses IREELinalgTransformDialectPasses diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index b85be3399..abf850e28 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -6,6 +6,7 @@ #include "iree-amd-aie/Transforms/Passes.h" +#include "aie/Passes.h" #include "air/Conversion/Passes.h" #include "air/Transform/Passes.h" #include "iree-amd-aie/IR/AMDAIEAttrs.h" @@ -14,6 +15,8 @@ #include "iree/compiler/Codegen/Common/Passes.h" #include "iree/compiler/Dialect/LinalgExt/Transforms/Passes.h" #include "iree/compiler/Utils/PassUtils.h" +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" +#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Linalg/Passes.h" @@ -581,6 +584,17 @@ void addMLIRAIRAIELoweringPasses(OpPassManager &passManager, bool packPeel) { passManager.addPass(xilinx::airrt::createAIRRtToNpuPass()); passManager.addPass(createCanonicalizerPass()); + + // Now lower using the AIE passes used by the mlir-air/mlir-aie flow. + passManager.addPass(createLowerAffinePass()); + OpPassManager &devicePM = passManager.nest(); + devicePM.addPass(createAIEAssignLockIDsPass()); + devicePM.addPass(createAIEObjectFifoStatefulTransformPass()); + devicePM.addPass(createAIEAssignBufferDescriptorIDsPass()); + devicePM.addPass(createAIEAssignBufferAddressesBasicPass()); + devicePM.addPass(createAIEPathfinderPass()); + devicePM.addPass(createAIELocalizeLocksPass()); + passManager.addPass(createConvertSCFToCFPass()); } // NOTE: this runs on the top-level program module containing all hal.executable From 3eb133177a8178d4cb41a4ed8ddc05d566b72ad5 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 26 Jun 2024 10:38:47 -0600 Subject: [PATCH 02/16] used mlir-aie based library for AIE passes for the old path --- .../plugins/target/AMD-AIE/aie/CMakeLists.txt | 22 +++++++++++++++++++ .../iree-amd-aie/Target/CMakeLists.txt | 1 + .../iree-amd-aie/Transforms/CMakeLists.txt | 1 + .../iree-amd-aie/Transforms/Passes.cpp | 16 +++++++++----- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt index 54bd61232..3d1ba7cac 100644 --- a/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt @@ -304,8 +304,30 @@ iree_cc_library( "AIEObjectFifoStatefulTransform.cpp" "AIEDmaToNpu.cpp" "AIEXToStandard.cpp" + DEPS + ::defs + ::AIEDialectIR + ::AIENormalizeAddressSpacesGen + ::AIEXDialectIR + ::AIEXTransformPassesIncGen + ::AIETransformPassesIncGen +) + +iree_cc_library( + NAME + AIEPassesFromMLIRAIE + SRCS # Passes needed by AIR-AIE lowering + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIEAssignLockIDs.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIEObjectFifoRegisterProcess.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIEAssignBufferDescriptorIDs.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIEX/Transforms/AIECreateBroadcastPacket.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIECreatePacketFlows.cpp" "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIECanonicalizeDevice.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIEX/Transforms/AIELowerMulticast.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIEAssignBuffers.cpp" DEPS ::defs ::AIEDialectIR diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt index 3f40acc25..0f941d6e7 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt @@ -24,6 +24,7 @@ iree_cc_library( iree::target::amd-aie::aie::AIEDialectIR iree::target::amd-aie::aie::AIEXDialectIR iree::target::amd-aie::aie::AIEPasses + iree::target::amd-aie::aie::AIEPassesFromMLIRAIE iree::target::amd-aie::aie::AIEVecDialectIR iree::target::amd-aie::aie::AIEVecConvertToLLVM MLIRToLLVMIRTranslationRegistration diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt index f29a8f52f..bb2f66ed4 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt @@ -96,6 +96,7 @@ iree_cc_library( iree::compiler::Dialect::LinalgExt::Transforms iree::compiler::Utils iree::target::amd-aie::aie::AIEPasses + iree::target::amd-aie::aie::AIEPassesFromMLIRAIE iree::target::amd-aie::air::AIRConversionPasses iree::target::amd-aie::air::AIRTransformPasses IREELinalgTransformDialectPasses diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index abf850e28..68e9c95c6 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -6,6 +6,8 @@ #include "iree-amd-aie/Transforms/Passes.h" +#include "aie/Dialect/AIE/Transforms/AIEPasses.h" +#include "aie/Dialect/AIEX/Transforms/AIEXPasses.h" #include "aie/Passes.h" #include "air/Conversion/Passes.h" #include "air/Transform/Passes.h" @@ -588,12 +590,14 @@ void addMLIRAIRAIELoweringPasses(OpPassManager &passManager, bool packPeel) { // Now lower using the AIE passes used by the mlir-air/mlir-aie flow. passManager.addPass(createLowerAffinePass()); OpPassManager &devicePM = passManager.nest(); - devicePM.addPass(createAIEAssignLockIDsPass()); - devicePM.addPass(createAIEObjectFifoStatefulTransformPass()); - devicePM.addPass(createAIEAssignBufferDescriptorIDsPass()); - devicePM.addPass(createAIEAssignBufferAddressesBasicPass()); - devicePM.addPass(createAIEPathfinderPass()); - devicePM.addPass(createAIELocalizeLocksPass()); + devicePM.addPass(xilinx::AIE::createAIEAssignLockIDsPass()); + devicePM.addPass(xilinx::AIE::createAIEObjectFifoRegisterProcessPass()); + devicePM.addPass(xilinx::AIE::createAIEObjectFifoStatefulTransformPass()); + devicePM.addPass(xilinx::AIE::createAIEAssignBufferDescriptorIDsPass()); + devicePM.addPass(xilinx::AIEX::createAIEBroadcastPacketPass()); + devicePM.addPass(xilinx::AIE::createAIERoutePacketFlowsPass()); + devicePM.addPass(xilinx::AIEX::createAIELowerMulticastPass()); + devicePM.addPass(xilinx::AIE::createAIEAssignBufferAddressesPass()); passManager.addPass(createConvertSCFToCFPass()); } From 96c32fd81d96471fd54e67debafac09079b2fb48 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 26 Jun 2024 11:05:05 -0600 Subject: [PATCH 03/16] temporarily remove pack-peel test to figure out other issues --- tests/samples/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/samples/CMakeLists.txt b/tests/samples/CMakeLists.txt index a12bac7b5..c34461c97 100644 --- a/tests/samples/CMakeLists.txt +++ b/tests/samples/CMakeLists.txt @@ -9,7 +9,7 @@ iree_lit_test_suite( lit SRCS "matmul_peeled_objectfifo.mlir" - "pack_peel_pipeline_matmul.mlir" + #"pack_peel_pipeline_matmul.mlir" "pack_peel_pipeline_matmul_elementwise.mlir" "pad_pack_pipeline_e2e.mlir" "xdna_oplib_plugin.mlir" From 80e67560d0c7aab0ea29783a69a3b58b62dbe82c Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 26 Jun 2024 13:28:32 -0600 Subject: [PATCH 04/16] retire aie2xclbin-disable-threading as with the internal utility mlir-disable threading propagates --- build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh | 1 - .../target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp | 1 - .../plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h | 7 ------- .../target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp | 1 - .../target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 1 - 5 files changed, 11 deletions(-) diff --git a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh index 6b6d62f24..07b615d15 100755 --- a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh +++ b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh @@ -131,7 +131,6 @@ ${SOURCE_MLIR_FILE} \ --aie2xclbin-print-ir-after-all \ --aie2xclbin-print-ir-before-all \ --aie2xclbin-print-ir-module-scope \ ---aie2xclbin-disable-threading \ --aie2xclbin-timing \ --mlir-print-ir-after-all \ --mlir-print-ir-module-scope \ diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index 847ac43e4..12ea56b15 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -337,7 +337,6 @@ LogicalResult AIETargetBackend::serializeExecutable( entryPointNamesFb[ordinal] + ".npu.txt"); xilinx::XCLBinGenConfig TK; - TK.DisableThreading = options.aie2xclbinDisableTheading; TK.PrintIRAfterAll = options.aie2xclbinPrintIrAfterAll; TK.PrintIRBeforeAll = options.aie2xclbinPrintIrBeforeAll; TK.PrintIRModuleScope = options.aie2xclbinPrintIrModuleScope; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h index dc92c415f..9fb625a0a 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h @@ -38,9 +38,6 @@ struct AMDAIEOptions { // Print IR before all MLIR passes run in aie2xclbin (to stderr). bool aie2xclbinPrintIrBeforeAll{false}; - // Disable theading in MLIR passes in aie2xclbin. - bool aie2xclbinDisableTheading{false}; - // Print IR at module scope in MLIR passes in aie2xclbin. bool aie2xclbinPrintIrModuleScope{false}; @@ -73,10 +70,6 @@ struct AMDAIEOptions { llvm::cl::desc( "If true, print the IR before all MLIR passes run in aie2xclbin")); - binder.opt( - "aie2xclbin-disable-threading", aie2xclbinDisableTheading, - llvm::cl::cat(category), - llvm::cl::desc("Disable theading in MLIR passes in aie2xclbin")); binder.opt( "aie2xclbin-print-ir-module-scope", aie2xclbinPrintIrModuleScope, diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp index e94a28a28..744bf5006 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp @@ -338,7 +338,6 @@ LogicalResult AIETargetDirectBackend::serializeExecutable( entryPointNamesFb[ordinal] + ".npu.txt"); xilinx::XCLBinGenConfig TK; - TK.DisableThreading = options.aie2xclbinDisableTheading; TK.PrintIRAfterAll = options.aie2xclbinPrintIrAfterAll; TK.PrintIRBeforeAll = options.aie2xclbinPrintIrBeforeAll; TK.PrintIRModuleScope = options.aie2xclbinPrintIrModuleScope; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index a24b1ec5e..522212d07 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -57,7 +57,6 @@ namespace { // manager. These control when (if ever) and what IR gets printed between // passes, and whether the pass manager uses multi-theading. void applyConfigToPassManager(XCLBinGenConfig &TK, PassManager &pm) { - pm.getContext()->disableMultithreading(TK.DisableThreading); bool printBefore = TK.PrintIRBeforeAll; auto shouldPrintBeforePass = [printBefore](Pass *, Operation *) { From 399958d4b1d5cbbb44de98e0adaa31e2175b204b Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 26 Jun 2024 14:15:19 -0600 Subject: [PATCH 05/16] add missing timing flag plumbing --- .../plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp | 1 + .../target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index 12ea56b15..fa90e5a01 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -340,6 +340,7 @@ LogicalResult AIETargetBackend::serializeExecutable( TK.PrintIRAfterAll = options.aie2xclbinPrintIrAfterAll; TK.PrintIRBeforeAll = options.aie2xclbinPrintIrBeforeAll; TK.PrintIRModuleScope = options.aie2xclbinPrintIrModuleScope; + TK.Timing = options.aie2xclbinTiming; TK.TargetArch = "AIE2"; TK.TempDir = entryPointWorkDir.str(); TK.UseChess = options.useChess; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp index 744bf5006..d1a73fb09 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp @@ -341,6 +341,7 @@ LogicalResult AIETargetDirectBackend::serializeExecutable( TK.PrintIRAfterAll = options.aie2xclbinPrintIrAfterAll; TK.PrintIRBeforeAll = options.aie2xclbinPrintIrBeforeAll; TK.PrintIRModuleScope = options.aie2xclbinPrintIrModuleScope; + TK.Timing = options.aie2xclbinTiming; TK.TargetArch = "AIE2"; TK.TempDir = entryPointWorkDir.str(); TK.UseChess = options.useChess; From 82da486ca821c3a33aad8487cbbff544e1fc3d42 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 26 Jun 2024 14:31:30 -0600 Subject: [PATCH 06/16] test if normalizeaddressspaces pass is necessary --- .../plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 522212d07..fb1ae93d4 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -686,8 +686,8 @@ static LogicalResult generateUnifiedObject(MLIRContext *context, pm.addNestedPass( mlir::iree_compiler::AMDAIE::createAIELocalizeLocksPass()); - pm.addNestedPass( - xilinx::AIE::createAIENormalizeAddressSpacesPass()); + /*pm.addNestedPass( + xilinx::AIE::createAIENormalizeAddressSpacesPass());*/ pm.addPass(mlir::iree_compiler::AMDAIE::createAIECoreToStandardPass()); pm.addPass(mlir::iree_compiler::AMDAIE::createAIEXToStandardPass()); From af7910bcfcab622c1a2974ee45151e9ba0186d2e Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 26 Jun 2024 14:50:43 -0600 Subject: [PATCH 07/16] add back createAIENormalizeAddressSpacesPass as its necessary for peano --- .../plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index fb1ae93d4..522212d07 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -686,8 +686,8 @@ static LogicalResult generateUnifiedObject(MLIRContext *context, pm.addNestedPass( mlir::iree_compiler::AMDAIE::createAIELocalizeLocksPass()); - /*pm.addNestedPass( - xilinx::AIE::createAIENormalizeAddressSpacesPass());*/ + pm.addNestedPass( + xilinx::AIE::createAIENormalizeAddressSpacesPass()); pm.addPass(mlir::iree_compiler::AMDAIE::createAIECoreToStandardPass()); pm.addPass(mlir::iree_compiler::AMDAIE::createAIEXToStandardPass()); From b74c14f03831ec605ca24674d16e89ebc7fddedf Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 26 Jun 2024 14:52:09 -0600 Subject: [PATCH 08/16] add currently working sizes for pack-peel --- tests/samples/CMakeLists.txt | 2 +- tests/samples/pack_peel_pipeline_matmul.mlir | 54 ++++++-------------- 2 files changed, 17 insertions(+), 39 deletions(-) diff --git a/tests/samples/CMakeLists.txt b/tests/samples/CMakeLists.txt index c34461c97..a12bac7b5 100644 --- a/tests/samples/CMakeLists.txt +++ b/tests/samples/CMakeLists.txt @@ -9,7 +9,7 @@ iree_lit_test_suite( lit SRCS "matmul_peeled_objectfifo.mlir" - #"pack_peel_pipeline_matmul.mlir" + "pack_peel_pipeline_matmul.mlir" "pack_peel_pipeline_matmul_elementwise.mlir" "pad_pack_pipeline_e2e.mlir" "xdna_oplib_plugin.mlir" diff --git a/tests/samples/pack_peel_pipeline_matmul.mlir b/tests/samples/pack_peel_pipeline_matmul.mlir index 6b7ca0c1e..b0bbe4f23 100644 --- a/tests/samples/pack_peel_pipeline_matmul.mlir +++ b/tests/samples/pack_peel_pipeline_matmul.mlir @@ -1,21 +1,21 @@ // RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-use-pipeline=pack-peel --split-input-file | FileCheck %s -func.func @matmul_i8_i32(%lhs: tensor<1024x512xi8>, %rhs: tensor<512x1024xi8>) -> tensor<1024x1024xi32> +func.func @matmul_i8_i32(%lhs: tensor<32x16xi8>, %rhs: tensor<16x32xi8>) -> tensor<32x32xi32> { %cst = arith.constant 0 : i32 - %0 = tensor.empty() : tensor<1024x1024xi32> - %1 = linalg.fill ins(%cst : i32) outs(%0 : tensor<1024x1024xi32>) -> tensor<1024x1024xi32> - %res = linalg.matmul ins(%lhs, %rhs: tensor<1024x512xi8>, tensor<512x1024xi8>) - outs(%1: tensor<1024x1024xi32>) -> tensor<1024x1024xi32> - return %res : tensor<1024x1024xi32> + %0 = tensor.empty() : tensor<32x32xi32> + %1 = linalg.fill ins(%cst : i32) outs(%0 : tensor<32x32xi32>) -> tensor<32x32xi32> + %res = linalg.matmul ins(%lhs, %rhs: tensor<32x16xi8>, tensor<16x32xi8>) + outs(%1: tensor<32x32xi32>) -> tensor<32x32xi32> + return %res : tensor<32x32xi32> } -// CHECK-LABEL: hal.executable.export public @matmul_i8_i32_dispatch_0_matmul_1024x1024x512_i8xi8xi32 +// CHECK-LABEL: hal.executable.export public @matmul_i8_i32_dispatch_0_matmul_32x32x16_i8xi8xi32 // CHECK: aie.device(npu1_4col) // CHECK: aie.shim_dma_allocation // CHECK: aie.shim_dma_allocation // CHECK: aie.shim_dma_allocation -// CHECK: func.func @matmul_i8_i32_dispatch_0_matmul_1024x1024x512_i8xi8xi32(%arg0: memref<131072xi32>, %arg1: memref<131072xi32>, %arg2: memref<1024x1024xi32>) +// CHECK: func.func @matmul_i8_i32_dispatch_0_matmul_32x32x16_i8xi8xi32(%arg0: memref<128xi32>, %arg1: memref<128xi32>, %arg2: memref<32x32xi32>) // CHECK: aiex.npu.dma_memcpy_nd // CHECK: aiex.npu.dma_memcpy_nd // CHECK: aiex.npu.dma_memcpy_nd @@ -23,44 +23,22 @@ func.func @matmul_i8_i32(%lhs: tensor<1024x512xi8>, %rhs: tensor<512x1024xi8>) - // ----- -func.func @matmul_bf16(%lhs: tensor<512x1024xbf16>, %rhs: tensor<1024x512xbf16>) -> tensor<512x512xbf16> +func.func @matmul_bf16(%lhs: tensor<16x32xbf16>, %rhs: tensor<32x16xbf16>) -> tensor<16x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<512x512xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<512x512xbf16>) -> tensor<512x512xbf16> - %res = linalg.matmul ins(%lhs, %rhs: tensor<512x1024xbf16>, tensor<1024x512xbf16>) - outs(%1: tensor<512x512xbf16>) -> tensor<512x512xbf16> - return %res : tensor<512x512xbf16> + %0 = tensor.empty() : tensor<16x16xbf16> + %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16x16xbf16>) -> tensor<16x16xbf16> + %res = linalg.matmul ins(%lhs, %rhs: tensor<16x32xbf16>, tensor<32x16xbf16>) + outs(%1: tensor<16x16xbf16>) -> tensor<16x16xbf16> + return %res : tensor<16x16xbf16> } -// CHECK-LABEL: hal.executable.export public @matmul_bf16_dispatch_0_matmul_512x512x1024_bf16 +// CHECK-LABEL: hal.executable.export public @matmul_bf16_dispatch_0_matmul_16x16x32_bf16 // CHECK: aie.device(npu1_4col) // CHECK: aie.shim_dma_allocation // CHECK: aie.shim_dma_allocation // CHECK: aie.shim_dma_allocation -// CHECK: func.func @matmul_bf16_dispatch_0_matmul_512x512x1024_bf16(%arg0: memref<262144xi32>, %arg1: memref<262144xi32>, %arg2: memref<131072xi32>) -// CHECK: aiex.npu.dma_memcpy_nd -// CHECK: aiex.npu.dma_memcpy_nd -// CHECK: aiex.npu.dma_memcpy_nd -// CHECK: aiex.npu.sync - -// ----- - -func.func @matmul_bf16_large(%arg0: tensor<308x9728xbf16>, %arg1: tensor<9728x2432xbf16>) -> tensor<308x2432xbf16> { - %0 = tensor.empty() : tensor<308x2432xbf16> - %cst = arith.constant 0.000000e+00 : bf16 - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<308x2432xbf16>) -> tensor<308x2432xbf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<308x9728xbf16>, tensor<9728x2432xbf16>) outs(%1 : tensor<308x2432xbf16>) -> tensor<308x2432xbf16> - return %2 : tensor<308x2432xbf16> -} - - -// CHECK-LABEL: hal.executable.export public @matmul_bf16_large_dispatch_0_matmul_308x2432x9728_bf16 -// CHECK: aie.device(npu1_4col) -// CHECK: aie.shim_dma_allocation -// CHECK: aie.shim_dma_allocation -// CHECK: aie.shim_dma_allocation -// CHECK: func.func @matmul_bf16_large_dispatch_0_matmul_308x2432x9728_bf16(%arg0: memref<1498112xi32>, %arg1: memref<11829248xi32>, %arg2: memref<374528xi32>) +// CHECK: func.func @matmul_bf16_dispatch_0_matmul_16x16x32_bf16(%arg0: memref<256xi32>, %arg1: memref<256xi32>, %arg2: memref<128xi32>) // CHECK: aiex.npu.dma_memcpy_nd // CHECK: aiex.npu.dma_memcpy_nd // CHECK: aiex.npu.dma_memcpy_nd From d9fd624059ace80866d3676dc972fe92544a8012 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Wed, 26 Jun 2024 14:58:50 -0600 Subject: [PATCH 09/16] nits --- .../plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 1 + .../target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 522212d07..323e0592b 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -861,6 +861,7 @@ LogicalResult xilinx::aie2xclbin(MLIRContext *ctx, ModuleOp moduleOp, output->keep(); copy->erase(); } + SmallString<64> unifiedObj(TK.TempDir); sys::path::append(unifiedObj, "input.o"); if (failed(generateUnifiedObject(ctx, moduleOp, TK, std::string(unifiedObj)))) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt index bb2f66ed4..5099d4f61 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt @@ -95,7 +95,6 @@ iree_cc_library( iree::compiler::Dialect::LinalgExt::IR iree::compiler::Dialect::LinalgExt::Transforms iree::compiler::Utils - iree::target::amd-aie::aie::AIEPasses iree::target::amd-aie::aie::AIEPassesFromMLIRAIE iree::target::amd-aie::air::AIRConversionPasses iree::target::amd-aie::air::AIRTransformPasses From 0c0a4b602c2f80c13e41e8e5fb26e903377bc3f0 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Thu, 27 Jun 2024 09:38:35 -0600 Subject: [PATCH 10/16] fix bad conflict resolution --- .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 323e0592b..2f902f0d4 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -827,10 +827,8 @@ LogicalResult xilinx::aie2xclbin(MLIRContext *ctx, ModuleOp moduleOp, XCLBinGenConfig &TK, StringRef OutputNPU, StringRef OutputXCLBin, StringRef InputXCLBin) { - std::regex target_regex("AIE.?"); - if (!std::regex_search(TK.TargetArch, target_regex)) - return moduleOp.emitOpError() - << "Unexpected target architecture: " << TK.TargetArch; + PassManager pm(ctx, moduleOp.getOperationName()); + applyConfigToPassManager(TK, pm); // generateNPUInstructions pm.addNestedPass( @@ -847,12 +845,12 @@ LogicalResult xilinx::aie2xclbin(MLIRContext *ctx, ModuleOp moduleOp, std::vector unsignedNpuInstructions( signedNpuInstructionsAttr.begin(), signedNpuInstructionsAttr.end()); - std::string errorMessage; - auto output = openOutputFile(OutputNPU, &errorMessage); - if (!output) { - llvm::errs() << errorMessage << "\n"; - return moduleOp.emitOpError(""); - } + std::string errorMessage; + auto output = openOutputFile(OutputNPU, &errorMessage); + if (!output) return moduleOp.emitOpError(errorMessage); + for (auto w : unsignedNpuInstructions) + output->os() << llvm::format("%08X\n", w); + output->keep(); if (failed( mlir::iree_compiler::AMDAIE::AIETranslateToNPU(copy, output->os()))) From b512aff974a9c3bcc7281ab28b9a807d19f0f648 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Thu, 27 Jun 2024 09:39:10 -0600 Subject: [PATCH 11/16] clang-format --- .../target/AMD-AIE/iree-amd-aie/Target/AIETarget.h | 1 - .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 13 ++++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h index 9fb625a0a..9a29249e2 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h @@ -70,7 +70,6 @@ struct AMDAIEOptions { llvm::cl::desc( "If true, print the IR before all MLIR passes run in aie2xclbin")); - binder.opt( "aie2xclbin-print-ir-module-scope", aie2xclbinPrintIrModuleScope, llvm::cl::cat(category), diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 2f902f0d4..f5fff8ec6 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -57,7 +57,6 @@ namespace { // manager. These control when (if ever) and what IR gets printed between // passes, and whether the pass manager uses multi-theading. void applyConfigToPassManager(XCLBinGenConfig &TK, PassManager &pm) { - bool printBefore = TK.PrintIRBeforeAll; auto shouldPrintBeforePass = [printBefore](Pass *, Operation *) { return printBefore; @@ -827,7 +826,7 @@ LogicalResult xilinx::aie2xclbin(MLIRContext *ctx, ModuleOp moduleOp, XCLBinGenConfig &TK, StringRef OutputNPU, StringRef OutputXCLBin, StringRef InputXCLBin) { - PassManager pm(ctx, moduleOp.getOperationName()); + PassManager pm(ctx, moduleOp.getOperationName()); applyConfigToPassManager(TK, pm); // generateNPUInstructions @@ -852,12 +851,12 @@ LogicalResult xilinx::aie2xclbin(MLIRContext *ctx, ModuleOp moduleOp, output->os() << llvm::format("%08X\n", w); output->keep(); - if (failed( - mlir::iree_compiler::AMDAIE::AIETranslateToNPU(copy, output->os()))) - return moduleOp.emitOpError("NPU Instruction translation failed"); + if (failed( + mlir::iree_compiler::AMDAIE::AIETranslateToNPU(copy, output->os()))) + return moduleOp.emitOpError("NPU Instruction translation failed"); - output->keep(); - copy->erase(); + output->keep(); + copy->erase(); } SmallString<64> unifiedObj(TK.TempDir); From 4075e4a5a8bf69792f0fee42e93d4626bc109c2c Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Thu, 27 Jun 2024 09:40:12 -0600 Subject: [PATCH 12/16] more conflict resolution --- .../target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index f5fff8ec6..21a6a0de7 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -851,14 +851,6 @@ LogicalResult xilinx::aie2xclbin(MLIRContext *ctx, ModuleOp moduleOp, output->os() << llvm::format("%08X\n", w); output->keep(); - if (failed( - mlir::iree_compiler::AMDAIE::AIETranslateToNPU(copy, output->os()))) - return moduleOp.emitOpError("NPU Instruction translation failed"); - - output->keep(); - copy->erase(); - } - SmallString<64> unifiedObj(TK.TempDir); sys::path::append(unifiedObj, "input.o"); if (failed(generateUnifiedObject(ctx, moduleOp, TK, std::string(unifiedObj)))) From e85ae54e4b18f3c334838c2ee5ef56e029a57875 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Thu, 27 Jun 2024 16:02:31 -0600 Subject: [PATCH 13/16] address reviwers comments and put passesin a seperate functions so other pipelines can use it --- compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt | 1 + .../AMD-AIE/iree-amd-aie/Target/AIETarget.cpp | 1 + .../AMD-AIE/iree-amd-aie/Target/CMakeLists.txt | 1 - .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 4 ++-- .../AMD-AIE/iree-amd-aie/Transforms/Passes.cpp | 14 ++++++++++---- .../AMD-AIE/iree-amd-aie/Transforms/Passes.h | 8 ++++++-- 6 files changed, 20 insertions(+), 9 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt index 3d1ba7cac..dbcfaad05 100644 --- a/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt @@ -328,6 +328,7 @@ iree_cc_library( "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIECanonicalizeDevice.cpp" "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIEX/Transforms/AIELowerMulticast.cpp" "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIEAssignBuffers.cpp" + "${IREE_MLIR_AIE_SOURCE_DIR}/lib/Dialect/AIE/Transforms/AIELocalizeLocks.cpp" DEPS ::defs ::AIEDialectIR diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index fa90e5a01..6c39b9ac5 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -13,6 +13,7 @@ #include "aie/Dialect/AIE/Transforms/AIEPasses.h" #include "aie/Dialect/AIEVec/IR/AIEVecDialect.h" #include "aie/Dialect/AIEX/IR/AIEXDialect.h" +#include "aie/Dialect/AIEX/Transforms/AIEXPasses.h" #include "aie/Dialect/XLLVM/XLLVMDialect.h" #include "aie/Passes.h" #include "aie/Target/LLVMIR/Dialect/XLLVM/XLLVMToLLVMIRTranslation.h" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt index 0f941d6e7..3f40acc25 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/CMakeLists.txt @@ -24,7 +24,6 @@ iree_cc_library( iree::target::amd-aie::aie::AIEDialectIR iree::target::amd-aie::aie::AIEXDialectIR iree::target::amd-aie::aie::AIEPasses - iree::target::amd-aie::aie::AIEPassesFromMLIRAIE iree::target::amd-aie::aie::AIEVecDialectIR iree::target::amd-aie::aie::AIEVecConvertToLLVM MLIRToLLVMIRTranslationRegistration diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 21a6a0de7..f740b6f4a 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -683,10 +683,10 @@ static LogicalResult generateUnifiedObject(MLIRContext *context, PassManager pm(context, moduleOp.getOperationName()); applyConfigToPassManager(TK, pm); - pm.addNestedPass( + /*pm.addNestedPass( mlir::iree_compiler::AMDAIE::createAIELocalizeLocksPass()); pm.addNestedPass( - xilinx::AIE::createAIENormalizeAddressSpacesPass()); + xilinx::AIE::createAIENormalizeAddressSpacesPass());*/ pm.addPass(mlir::iree_compiler::AMDAIE::createAIECoreToStandardPass()); pm.addPass(mlir::iree_compiler::AMDAIE::createAIEXToStandardPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index 68e9c95c6..a3bd6ddf0 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -428,9 +428,9 @@ void buildAMDAIETransformPassPipeline(OpPassManager &variantPassManager) { } modulePassManager.addPass(createLowerUKernelOpsToCallsPass()); if (clUsePipeline == AIEPassPipeline::PadPackPipeline) { - addMLIRAIRAIELoweringPasses(modulePassManager, false); + addMLIRAIRLoweringPasses(modulePassManager, false); } else if (clUsePipeline == AIEPassPipeline::PackPeelPipeline) { - addMLIRAIRAIELoweringPasses(modulePassManager, true); + addMLIRAIRLoweringPasses(modulePassManager, true); } variantPassManager.addPass(createReconcileTranslationInfoPass()); variantPassManager.addPass(createAMDAIELowerWorkgroupCountPass()); @@ -445,7 +445,7 @@ void buildAMDAIETransformPassPipeline(OpPassManager &variantPassManager) { // TODO (Erwei): The "packPeel" temporary argument should be removed once // pack-peel and pack-pad share the same pass pipeline. See TODOs inlined below // for details. -void addMLIRAIRAIELoweringPasses(OpPassManager &passManager, bool packPeel) { +void addMLIRAIRLoweringPasses(OpPassManager &passManager, bool packPeel) { // Add passes for preparing for lowering to MLIR-AIR passManager.addPass(createEraseHALDescriptorTypeFromMemRefPass()); passManager.addPass(memref::createFoldMemRefAliasOpsPass()); @@ -587,7 +587,11 @@ void addMLIRAIRAIELoweringPasses(OpPassManager &passManager, bool packPeel) { passManager.addPass(xilinx::airrt::createAIRRtToNpuPass()); passManager.addPass(createCanonicalizerPass()); - // Now lower using the AIE passes used by the mlir-air/mlir-aie flow. + // Now lower using the AIE passes from MLIR-AIE. + addMLIRAIELoweringPasses(passManager); +} + +void addMLIRAIELoweringPasses(OpPassManager &passManager) { passManager.addPass(createLowerAffinePass()); OpPassManager &devicePM = passManager.nest(); devicePM.addPass(xilinx::AIE::createAIEAssignLockIDsPass()); @@ -598,6 +602,8 @@ void addMLIRAIRAIELoweringPasses(OpPassManager &passManager, bool packPeel) { devicePM.addPass(xilinx::AIE::createAIERoutePacketFlowsPass()); devicePM.addPass(xilinx::AIEX::createAIELowerMulticastPass()); devicePM.addPass(xilinx::AIE::createAIEAssignBufferAddressesPass()); + devicePM.addPass(xilinx::AIE::createAIELocalizeLocksPass()); + devicePM.addPass(xilinx::AIE::createAIENormalizeAddressSpacesPass()); passManager.addPass(createConvertSCFToCFPass()); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h index 57faa4b89..6567cfbc3 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h @@ -13,9 +13,13 @@ namespace mlir::iree_compiler::AMDAIE { -/// Add passes to lower from MLIR-AIR through AIE. This is +/// Add lowering passes from MLIR-AIR. This is /// currently the default passes used for lowering after IREEs tiling. -void addMLIRAIRAIELoweringPasses(OpPassManager &passManager, bool packPeel); +void addMLIRAIRLoweringPasses(OpPassManager &passManager, bool packPeel); + +/// Add lowering passes from MLIR-AIE. This is +/// currently the default passes used for lowering from AIE dialect. +void addMLIRAIELoweringPasses(OpPassManager &passManager); /// Populates passes needed to lower linalg/arith/math ops to LLVM dialect via /// the structured ops path. The pass manager `pm` here operate on the module From 2d4de691f493e846576223b8bd13eab2d68cd89b Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Thu, 27 Jun 2024 16:05:38 -0600 Subject: [PATCH 14/16] minor fixes --- .../plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp | 1 - .../plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 5 ----- 2 files changed, 6 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index 6c39b9ac5..fa90e5a01 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -13,7 +13,6 @@ #include "aie/Dialect/AIE/Transforms/AIEPasses.h" #include "aie/Dialect/AIEVec/IR/AIEVecDialect.h" #include "aie/Dialect/AIEX/IR/AIEXDialect.h" -#include "aie/Dialect/AIEX/Transforms/AIEXPasses.h" #include "aie/Dialect/XLLVM/XLLVMDialect.h" #include "aie/Passes.h" #include "aie/Target/LLVMIR/Dialect/XLLVM/XLLVMToLLVMIRTranslation.h" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index f740b6f4a..a713e42e5 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -682,11 +682,6 @@ static LogicalResult generateUnifiedObject(MLIRContext *context, const std::string &outputFile) { PassManager pm(context, moduleOp.getOperationName()); applyConfigToPassManager(TK, pm); - - /*pm.addNestedPass( - mlir::iree_compiler::AMDAIE::createAIELocalizeLocksPass()); - pm.addNestedPass( - xilinx::AIE::createAIENormalizeAddressSpacesPass());*/ pm.addPass(mlir::iree_compiler::AMDAIE::createAIECoreToStandardPass()); pm.addPass(mlir::iree_compiler::AMDAIE::createAIEXToStandardPass()); From 4c5ea9aac1ea13dadd3c6cc5e0b5d392c135a687 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Thu, 27 Jun 2024 17:03:07 -0600 Subject: [PATCH 15/16] fix pass ordering issue --- .../target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 1 + .../target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index a713e42e5..8d4efe0cc 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -682,6 +682,7 @@ static LogicalResult generateUnifiedObject(MLIRContext *context, const std::string &outputFile) { PassManager pm(context, moduleOp.getOperationName()); applyConfigToPassManager(TK, pm); + pm.addPass(mlir::iree_compiler::AMDAIE::createAIECoreToStandardPass()); pm.addPass(mlir::iree_compiler::AMDAIE::createAIEXToStandardPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index a3bd6ddf0..6c45b1cf8 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -602,9 +602,11 @@ void addMLIRAIELoweringPasses(OpPassManager &passManager) { devicePM.addPass(xilinx::AIE::createAIERoutePacketFlowsPass()); devicePM.addPass(xilinx::AIEX::createAIELowerMulticastPass()); devicePM.addPass(xilinx::AIE::createAIEAssignBufferAddressesPass()); - devicePM.addPass(xilinx::AIE::createAIELocalizeLocksPass()); - devicePM.addPass(xilinx::AIE::createAIENormalizeAddressSpacesPass()); passManager.addPass(createConvertSCFToCFPass()); + passManager.addNestedPass( + xilinx::AIE::createAIELocalizeLocksPass()); + passManager.addNestedPass( + xilinx::AIE::createAIENormalizeAddressSpacesPass()); } // NOTE: this runs on the top-level program module containing all hal.executable From e76e09b1be25633c28e755f2684a6fc59630d960 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram Date: Thu, 27 Jun 2024 17:53:23 -0600 Subject: [PATCH 16/16] address reviwer comments --- .../plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 8d4efe0cc..9ebe0608d 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -12,7 +12,6 @@ #include "AMDAIETargets.h" #include "aie/Conversion/AIEVecToLLVM/AIEVecToLLVM.h" -#include "aie/Dialect/AIE/Transforms/AIEPasses.h" #include "aie/Dialect/AIEVec/Pipelines/Passes.h" #include "aie/Passes.h" #include "aie/Targets/AIETargets.h"