Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions runtime/common/BaseRemoteRESTQPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "common/ExecutionContext.h"
#include "common/Executor.h"
#include "common/ExtraPayloadProvider.h"
#include "common/JIT.h"
#include "common/Resources.h"
#include "cudaq.h"
#include "cudaq/Optimizer/Builder/Runtime.h"
Expand Down
1 change: 1 addition & 0 deletions runtime/common/BaseRemoteSimulatorQPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "common/ArgumentConversion.h"
#include "common/ExecutionContext.h"
#include "common/JIT.h"
#include "common/RemoteKernelExecutor.h"
#include "common/Resources.h"
#include "common/RuntimeMLIR.h"
Expand Down
203 changes: 190 additions & 13 deletions runtime/common/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,40 @@
******************************************************************************/

#include "JIT.h"
#include "ExecutionContext.h"
#include "common/Environment.h"
#include "common/Timing.h"
#include "cudaq/Frontend/nvqpp/AttributeNames.h"
#include "cudaq/Optimizer/Builder/Runtime.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "cudaq/Optimizer/CodeGen/Passes.h"
#include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h"
#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h"
#include "cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h"
#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
#include "cudaq/runtime/logger/logger.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/ExecutionEngine/ExecutionEngine.h"
#include <cudaq/platform.h>
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Target/LLVMIR/Export.h"
#include <cassert>
#include <cxxabi.h>
#include <llvm/Support/Error.h>
#include <iterator>
#include <memory>
#include <stdexcept>
#include <tuple>

#define DEBUG_TYPE "cudaq-qpud"
Expand Down Expand Up @@ -148,6 +157,174 @@ cudaq::createWrappedKernel(std::string_view irString,
return std::make_tuple(std::move(jit), callable);
}

namespace {
void insertSetupAndCleanupOperations(mlir::Operation *module) {
mlir::OpBuilder modBuilder(module);
auto *context = module->getContext();
auto arrayQubitTy = cudaq::opt::getArrayType(context);
auto voidTy = mlir::LLVM::LLVMVoidType::get(context);
auto boolTy = modBuilder.getI1Type();
mlir::FlatSymbolRefAttr allocateSymbol =
cudaq::opt::factory::createLLVMFunctionSymbol(
cudaq::opt::QIRArrayQubitAllocateArray, arrayQubitTy,
{modBuilder.getI64Type()}, mlir::dyn_cast<mlir::ModuleOp>(module));
mlir::FlatSymbolRefAttr releaseSymbol =
cudaq::opt::factory::createLLVMFunctionSymbol(
cudaq::opt::QIRArrayQubitReleaseArray, {voidTy}, {arrayQubitTy},
mlir::dyn_cast<mlir::ModuleOp>(module));
mlir::FlatSymbolRefAttr isDynamicSymbol =
cudaq::opt::factory::createLLVMFunctionSymbol(
cudaq::opt::QIRisDynamicQubitManagement, {boolTy}, {},
mlir::dyn_cast<mlir::ModuleOp>(module));
mlir::FlatSymbolRefAttr setDynamicSymbol =
cudaq::opt::factory::createLLVMFunctionSymbol(
cudaq::opt::QIRsetDynamicQubitManagement, {voidTy}, {boolTy},
mlir::dyn_cast<mlir::ModuleOp>(module));
mlir::FlatSymbolRefAttr clearResultMapsSymbol =
cudaq::opt::factory::createLLVMFunctionSymbol(
cudaq::opt::QIRClearResultMaps, {voidTy}, {},
mlir::dyn_cast<mlir::ModuleOp>(module));

// Iterate through all operations in the ModuleOp
mlir::SmallVector<mlir::LLVM::LLVMFuncOp> funcs;
module->walk([&](mlir::LLVM::LLVMFuncOp func) { funcs.push_back(func); });
for (auto &func : funcs) {
if (!func->hasAttr(cudaq::entryPointAttrName))
continue;
std::int64_t num_qubits = -1;
if (auto requiredQubits = func->getAttrOfType<mlir::StringAttr>(
cudaq::opt::qir0_1::RequiredQubitsAttrName))
requiredQubits.strref().getAsInteger(10, num_qubits);
else if (auto requiredQubits = func->getAttrOfType<mlir::StringAttr>(
cudaq::opt::qir1_0::RequiredQubitsAttrName))
requiredQubits.strref().getAsInteger(10, num_qubits);

auto &blocks = func.getBlocks();
if (blocks.size() < 1 || num_qubits < 0)
continue;

mlir::Block &block = *blocks.begin();
mlir::OpBuilder builder(&block, block.begin());
auto loc = builder.getUnknownLoc();

auto origMode = builder.create<mlir::LLVM::CallOp>(
loc, mlir::TypeRange{boolTy}, isDynamicSymbol, mlir::ValueRange{});

auto numQubitsVal =
cudaq::opt::factory::genLlvmI64Constant(loc, builder, num_qubits);
auto falseVal = builder.create<mlir::LLVM::ConstantOp>(
loc, boolTy, builder.getI16IntegerAttr(false));

auto qubitAlloc = builder.create<mlir::LLVM::CallOp>(
loc, mlir::TypeRange{arrayQubitTy}, allocateSymbol,
mlir::ValueRange{numQubitsVal.getResult()});
builder.create<mlir::LLVM::CallOp>(loc, mlir::TypeRange{voidTy},
setDynamicSymbol,
mlir::ValueRange{falseVal.getResult()});

// At the end of the function, deallocate the qubits and restore the
// simulator state.
builder.setInsertionPoint(std::prev(blocks.end())->getTerminator());
builder.create<mlir::LLVM::CallOp>(
loc, mlir::TypeRange{voidTy}, releaseSymbol,
mlir::ValueRange{qubitAlloc.getResult()});
builder.create<mlir::LLVM::CallOp>(loc, mlir::TypeRange{voidTy},
setDynamicSymbol,
mlir::ValueRange{origMode.getResult()});
builder.create<mlir::LLVM::CallOp>(loc, mlir::TypeRange{voidTy},
clearResultMapsSymbol,
mlir::ValueRange{});
}
}
} // namespace

cudaq::JitEngine cudaq::createQIRJITEngine(mlir::ModuleOp &moduleOp,
llvm::StringRef convertTo) {
// The "fast" instruction selection compilation algorithm is actually very
// slow for large quantum circuits. Disable that here.
ScopedTraceWithContext(cudaq::TIMING_JIT, "createQIRJITEngine");
const char *argv[] = {"", "-fast-isel=0", nullptr};
llvm::cl::ParseCommandLineOptions(2, argv);

mlir::ExecutionEngineOptions opts;
opts.transformer = [](llvm::Module *m) { return llvm::ErrorSuccess(); };
opts.jitCodeGenOptLevel = llvm::CodeGenOpt::None;
opts.llvmModuleBuilder =
[convertTo = convertTo.str()](
mlir::Operation *module,
llvm::LLVMContext &llvmContext) -> std::unique_ptr<llvm::Module> {
ScopedTraceWithContext(cudaq::TIMING_JIT,
"createQIRJITEngine::llvmModuleBuilder");
llvmContext.setOpaquePointers(false);

auto *context = module->getContext();
mlir::PassManager pm(context);

bool containsWireSet =
module
->walk<mlir::WalkOrder::PreOrder>([](quake::WireSetOp wireSetOp) {
return mlir::WalkResult::interrupt();
})
.wasInterrupted();

// Even though we're not lowering all the way to a real QIR profile for
// this emulated path, we need to pass in `convertTo` to mimic the
// non-emulated path.
if (containsWireSet)
cudaq::opt::addWiresetToProfileQIRPipeline(pm, convertTo);
else
cudaq::opt::addAOTPipelineConvertToQIR(pm);

auto enablePrintMLIREachPass =
getEnvBool("CUDAQ_MLIR_PRINT_EACH_PASS", false);
if (enablePrintMLIREachPass) {
module->getContext()->disableMultithreading();
pm.enableIRPrinting();
}

std::string error_msg;
mlir::DiagnosticEngine &engine = context->getDiagEngine();
auto handlerId = engine.registerHandler(
[&error_msg](mlir::Diagnostic &diag) -> mlir::LogicalResult {
if (diag.getSeverity() == mlir::DiagnosticSeverity::Error) {
error_msg += diag.str();
return mlir::failure(false);
}
return mlir::failure();
});

mlir::DefaultTimingManager tm;
tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES));
auto timingScope = tm.getRootScope(); // starts the timer
pm.enableTiming(timingScope); // do this right before pm.run
if (mlir::failed(pm.run(module))) {
engine.eraseHandler(handlerId);
throw std::runtime_error("[createQIRJITEngine] Lowering to QIR for "
"remote emulation failed.\n" +
error_msg);
}
timingScope.stop();
engine.eraseHandler(handlerId);

// Insert necessary calls to qubit allocations and qubit releases if the
// original module contained WireSetOp's.
if (containsWireSet)
insertSetupAndCleanupOperations(module);

auto llvmModule = translateModuleToLLVMIR(module, llvmContext);
if (!llvmModule)
throw std::runtime_error(
"[createQIRJITEngine] Lowering to LLVM IR failed.");

mlir::ExecutionEngine::setupTargetTriple(llvmModule.get());
return llvmModule;
};

auto jitOrError = mlir::ExecutionEngine::create(moduleOp, opts);
assert(!!jitOrError && "ExecutionEngine creation failed.");
return JitEngine(std::move(jitOrError.get()));
}

namespace cudaq {
class JitEngine::Impl {
public:
Expand Down
13 changes: 11 additions & 2 deletions runtime/common/JIT.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@
#include <memory>
#include <string>

namespace llvm::orc {
namespace llvm {
class StringRef;
namespace orc {
class LLJIT;
}
} // namespace llvm

namespace mlir {
class ExecutionEngine;
}
class ModuleOp;
} // namespace mlir

namespace cudaq {

Expand Down Expand Up @@ -47,4 +51,9 @@ class JitEngine {
class Impl;
std::shared_ptr<Impl> impl;
};

/// Lower ModuleOp to QIR/LLVM IR and create a JIT execution engine.
JitEngine createQIRJITEngine(mlir::ModuleOp &moduleOp,
llvm::StringRef convertTo);

} // namespace cudaq
Loading
Loading