From ac01dd197f9fafb0301b0432da90698cd7e9517a Mon Sep 17 00:00:00 2001 From: Ben Howe Date: Fri, 11 Oct 2024 16:52:57 +0000 Subject: [PATCH 01/54] DCO Remediation Commit for Ben Howe I, Ben Howe , hereby add my Signed-off-by to this commit: 86681ef67d3b76c0e468f6595e2c2524cf9b4b6c Signed-off-by: Ben Howe Signed-off-by: Anna Gringauze From 21a87c1646f168a6465c3e51dc4fc510c1de9c43 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 17 Sep 2024 14:40:45 -0700 Subject: [PATCH 02/54] State pointer synthesis for quantum hardware Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Builder/Intrinsics.h | 4 + include/cudaq/Optimizer/Transforms/Passes.td | 38 ++++ lib/Optimizer/Builder/Intrinsics.cpp | 4 + lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp | 3 +- lib/Optimizer/Transforms/CMakeLists.txt | 1 + lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 11 +- .../Transforms/StateInitialization.cpp | 146 +++++++++++++++ python/runtime/cudaq/algorithms/py_state.cpp | 5 +- .../cudaq/platform/py_alt_launch_kernel.cpp | 2 +- runtime/common/ArgumentConversion.cpp | 167 ++++++++++++++++-- runtime/common/ArgumentConversion.h | 22 ++- runtime/common/BaseRemoteRESTQPU.h | 33 ++-- runtime/common/BaseRestRemoteClient.h | 4 +- runtime/common/CMakeLists.txt | 2 +- runtime/common/SimulationState.h | 11 ++ runtime/cudaq/CMakeLists.txt | 1 + runtime/cudaq/algorithms/get_state.h | 12 ++ .../rest/helpers/quantinuum/quantinuum.yml | 2 + runtime/cudaq/qis/quantum_state.cpp | 113 ++++++++++++ runtime/cudaq/qis/quantum_state.h | 151 ++++++++++++++++ runtime/cudaq/qis/remote_state.cpp | 2 +- runtime/cudaq/qis/remote_state.h | 3 +- .../Remote-Sim/qvector_init_from_state.cpp | 16 ++ .../execution/qvector_init_from_state.cpp | 147 +++++++++++++++ targettests/execution/state_init.cpp | 2 +- test/Quake/arg_subst-5.txt | 15 ++ test/Quake/arg_subst-6.txt | 11 ++ test/Quake/arg_subst_func.qke | 37 +++- test/Quake/state_init.qke | 37 ++++ test/Quake/state_prep.qke | 2 +- tpls/Stim | 2 +- 31 files changed, 955 insertions(+), 51 deletions(-) create mode 100644 lib/Optimizer/Transforms/StateInitialization.cpp create mode 100644 runtime/cudaq/qis/quantum_state.cpp create mode 100644 runtime/cudaq/qis/quantum_state.h create mode 100644 targettests/execution/qvector_init_from_state.cpp create mode 100644 test/Quake/arg_subst-5.txt create mode 100644 test/Quake/arg_subst-6.txt create mode 100644 test/Quake/state_init.qke diff --git a/include/cudaq/Optimizer/Builder/Intrinsics.h b/include/cudaq/Optimizer/Builder/Intrinsics.h index 30ab0e696a1..c05021b879f 100644 --- a/include/cudaq/Optimizer/Builder/Intrinsics.h +++ b/include/cudaq/Optimizer/Builder/Intrinsics.h @@ -55,6 +55,10 @@ static constexpr const char createCudaqStateFromDataFP32[] = // Delete a state created by the runtime functions above. static constexpr const char deleteCudaqState[] = "__nvqpp_cudaq_state_delete"; +// Get state of a kernel (placeholder function, calls are always replaced in +// opts) +static constexpr const char getCudaqState[] = "__nvqpp_cudaq_state_get"; + /// Builder for lowering the clang AST to an IR for CUDA-Q. Lowering includes /// the transformation of both quantum and classical computation. Different /// features of the CUDA-Q programming model are lowered into different dialects diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 9ca3810f395..66eb4cfcb0d 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -779,6 +779,44 @@ def DeleteStates : Pass<"delete-states", "mlir::ModuleOp"> { }]; } +def StateInitialization : Pass<"state-initialization", "mlir::ModuleOp"> { + let summary = + "Replace `quake.init_state` instructions with call to the kernel generating the state"; + let description = [{ + Argument synthesis for state pointers for quantum devices substitutes state + argument by a new state created from `__nvqpp_cudaq_state_get` intrinsic, which + in turn accepts the name for the synthesized kernel that generated the state. + + This optimization completes the replacement of `quake.init_state` instruction by: + + - Replace `quake.init_state` by a call that `get_state` call refers to. + - Remove all unneeded instructions. + + For example: + + Before StateInitialization (state-initialization): + ``` + func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %0 = cc.string_literal "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr> + %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr + %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr + %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 + %4 = quake.alloca !quake.veq[%3 : i64] + %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq + return + } + ``` + + After StateInitialization (state-initialization): + ``` + func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %5 = call @__nvqpp__mlirgen__test_init_state.modified_0() : () -> !quake.veq + return + } + ``` + }]; +} + def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { let summary = "Convert state vector data into gates"; diff --git a/lib/Optimizer/Builder/Intrinsics.cpp b/lib/Optimizer/Builder/Intrinsics.cpp index 12e430dc031..57c636e31dd 100644 --- a/lib/Optimizer/Builder/Intrinsics.cpp +++ b/lib/Optimizer/Builder/Intrinsics.cpp @@ -261,6 +261,10 @@ static constexpr IntrinsicCode intrinsicTable[] = { {cudaq::deleteCudaqState, {}, R"#( func.func private @__nvqpp_cudaq_state_delete(%p : !cc.ptr) -> () + )#"}, + + {cudaq::getCudaqState, {}, R"#( + func.func private @__nvqpp_cudaq_state_get(%p : !cc.ptr) -> !cc.ptr )#"}, {cudaq::getNumQubitsFromCudaqState, {}, R"#( diff --git a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp index 4de20fd7bef..04eac5b06f7 100644 --- a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp +++ b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp @@ -49,7 +49,8 @@ struct VerifyNVQIRCallOpsPass cudaq::getNumQubitsFromCudaqState, cudaq::createCudaqStateFromDataFP32, cudaq::createCudaqStateFromDataFP64, - cudaq::deleteCudaqState}; + cudaq::deleteCudaqState, + cudaq::getCudaqState}; // It must be either NVQIR extension functions or in the allowed list. return std::find(NVQIR_FUNCS.begin(), NVQIR_FUNCS.end(), functionName) != NVQIR_FUNCS.end() || diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index a6b94d9a596..f107d78bde6 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -50,6 +50,7 @@ add_cudaq_library(OptTransforms QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp + StateInitialization.cpp StatePreparation.cpp UnitarySynthesis.cpp WiresToWiresets.cpp diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 9328b78896d..8cf6a019f8b 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -170,9 +170,10 @@ class AllocaPattern : public OpRewritePattern { if (auto load = dyn_cast(useuser)) { rewriter.setInsertionPointAfter(useuser); LLVM_DEBUG(llvm::dbgs() << "replaced load\n"); - rewriter.replaceOpWithNewOp( - load, eleTy, conArr, - ArrayRef{offset}); + auto extract = rewriter.create( + loc, eleTy, conArr, ArrayRef{offset}); + rewriter.replaceAllUsesWith(load, extract); + toErase.push_back(load); continue; } if (isa(useuser)) @@ -199,8 +200,10 @@ class AllocaPattern : public OpRewritePattern { toErase.push_back(alloc); } - for (auto *op : toErase) + for (auto *op : toErase) { + op->dropAllUses(); rewriter.eraseOp(op); + } return success(); } diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp new file mode 100644 index 00000000000..3a122f02a7b --- /dev/null +++ b/lib/Optimizer/Transforms/StateInitialization.cpp @@ -0,0 +1,146 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" +#include + +namespace cudaq::opt { +#define GEN_PASS_DEF_STATEINITIALIZATION +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + +#define DEBUG_TYPE "state-initialization" + +using namespace mlir; + +namespace { + +static bool isCall(Operation *callOp, std::vector &&names) { + if (callOp) { + if (auto createStateCall = dyn_cast(callOp)) { + if (auto calleeAttr = createStateCall.getCalleeAttr()) { + auto funcName = calleeAttr.getValue().str(); + if (std::find(names.begin(), names.end(), funcName) != names.end()) + return true; + } + } + } + return false; +} + +static bool isGetStateCall(Operation *callOp) { + return isCall(callOp, {cudaq::getCudaqState}); +} + +static bool isNumberOfQubitsCall(Operation *callOp) { + return isCall(callOp, {cudaq::getNumQubitsFromCudaqState}); +} + +// clang-format off +/// Replace `quake.init_state` by a call to a (modified) kernel that produced the state. +/// ``` +/// %0 = cc.string_literal "callee.modified_0" : !cc.ptr> +/// %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr +/// %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr +/// %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 +/// %4 = quake.alloca !quake.veq[%3 : i64] +/// %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq +/// ─────────────────────────────────────────── +/// ... +/// %5 = call @callee.modified_0() : () -> !quake.veq +/// ``` +// clang-format on +class StateInitPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(quake::InitializeStateOp initState, + PatternRewriter &rewriter) const override { + auto loc = initState.getLoc(); + auto allocaOp = initState.getOperand(0).getDefiningOp(); + auto getStateOp = initState.getOperand(1).getDefiningOp(); + auto numOfQubits = allocaOp->getOperand(0).getDefiningOp(); + + if (isGetStateCall(getStateOp)) { + auto calleeNameOp = getStateOp->getOperand(0); + if (auto cast = + dyn_cast(calleeNameOp.getDefiningOp())) { + calleeNameOp = cast.getOperand(); + + if (auto literal = dyn_cast( + calleeNameOp.getDefiningOp())) { + auto calleeName = literal.getStringLiteral(); + + Value result = + rewriter + .create(loc, initState.getType(), calleeName, + mlir::ValueRange{}) + .getResult(0); + rewriter.replaceAllUsesWith(initState, result); + initState.erase(); + allocaOp->dropAllUses(); + rewriter.eraseOp(allocaOp); + if (isNumberOfQubitsCall(numOfQubits)) { + numOfQubits->dropAllUses(); + rewriter.eraseOp(numOfQubits); + } + getStateOp->dropAllUses(); + rewriter.eraseOp(getStateOp); + cast->dropAllUses(); + rewriter.eraseOp(cast); + literal->dropAllUses(); + rewriter.eraseOp(literal); + return success(); + } + } + } + return failure(); + } +}; + +class StateInitializationPass + : public cudaq::opt::impl::StateInitializationBase< + StateInitializationPass> { +public: + using StateInitializationBase::StateInitializationBase; + + void runOnOperation() override { + auto *ctx = &getContext(); + auto module = getOperation(); + for (Operation &op : *module.getBody()) { + auto func = dyn_cast(op); + if (!func) + continue; + + std::string funcName = func.getName().str(); + RewritePatternSet patterns(ctx); + patterns.insert(ctx); + + LLVM_DEBUG(llvm::dbgs() + << "Before state initialization: " << func << '\n'); + + if (failed(applyPatternsAndFoldGreedily(func.getOperation(), + std::move(patterns)))) + signalPassFailure(); + + LLVM_DEBUG(llvm::dbgs() + << "After state initialization: " << func << '\n'); + } + } +}; +} // namespace diff --git a/python/runtime/cudaq/algorithms/py_state.cpp b/python/runtime/cudaq/algorithms/py_state.cpp index 77a8e4a36d0..74e098ebbf9 100644 --- a/python/runtime/cudaq/algorithms/py_state.cpp +++ b/python/runtime/cudaq/algorithms/py_state.cpp @@ -96,8 +96,9 @@ class PyRemoteSimulationState : public RemoteSimulationState { } } - std::pair> getKernelInfo() const override { - return {kernelName, argsData->getArgs()}; + std::optional>> + getKernelInfo() const override { + return std::make_pair(kernelName, argsData->getArgs()); } std::complex overlap(const cudaq::SimulationState &other) override { diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index b91627de9fc..a7531f9caa1 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -517,7 +517,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, auto isLocalSimulator = platform.is_simulator() && !platform.is_emulated(); auto isSimulator = isLocalSimulator || isRemoteSimulator; - cudaq::opt::ArgumentConverter argCon(name, unwrap(module), isSimulator); + cudaq::opt::ArgumentConverter argCon(name, unwrap(module)); argCon.gen(runtimeArgs.getArgs()); std::string kernName = cudaq::runtime::cudaqGenPrefixName + name; SmallVector kernels = {kernName}; diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 424cbd8873d..83e4dd3725c 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -10,6 +10,8 @@ #include "cudaq.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "cudaq/Todo.h" #include "cudaq/qis/pauli_word.h" #include "cudaq/utils/registry.h" @@ -97,11 +99,25 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp substMod, llvm::DataLayout &); static Value genConstant(OpBuilder &builder, const cudaq::state *v, - ModuleOp substMod, llvm::DataLayout &layout, - llvm::StringRef kernelName, bool isSimulator) { - if (isSimulator) { - // The program is executed remotely, materialize the simulation data - // into an array and create a new state from it. + llvm::DataLayout &layout, + cudaq::opt::ArgumentConverter &converter) { + auto simState = + cudaq::state_helper::getSimulationState(const_cast(v)); + + auto kernelName = converter.getKernelName(); + auto sourceMod = converter.getSourceModule(); + auto substMod = converter.getSubstitutionModule(); + + // If the state has amplitude data, we materialize the data as a state + // vector and create a new state from it. + // TODO: how to handle density matrices? Should we just inline calls? + if (simState->hasData()) { + // The call below might cause lazy execution of the state kernel. + // TODO: For lazy execution scenario on remote simulators, we have the + // kernel info available on the state as well, before we needed to run + // the state kernel and compute its data, which might cause significant + // data transfer). Investigate if it is more performant to use the other + // synthesis option in that case (see the next `if`). auto numQubits = v->get_num_qubits(); // We currently only synthesize small states. @@ -130,11 +146,11 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, std::string name = kernelName.str() + ".rodata_synth_" + std::to_string(counter++); irBuilder.genVectorOfConstants(loc, substMod, name, vec); - auto conGlobal = builder.create(loc, ptrTy, name); - return builder.create(loc, arrTy, conGlobal); + + return builder.create(loc, ptrTy, name); }; - auto conArr = is64Bit ? genConArray.template operator()() + auto buffer = is64Bit ? genConArray.template operator()() : genConArray.template operator()(); auto createState = is64Bit ? cudaq::createCudaqStateFromDataFP64 @@ -146,21 +162,111 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto stateTy = cudaq::cc::StateType::get(ctx); auto statePtrTy = cudaq::cc::PointerType::get(stateTy); auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - auto buffer = builder.create(loc, arrTy); - builder.create(loc, conArr, buffer); auto cast = builder.create(loc, i8PtrTy, buffer); auto statePtr = builder .create(loc, statePtrTy, createState, ValueRange{cast, arrSize}) .getResult(0); + return builder.create(loc, statePtrTy, statePtr); + } + + // For quantum hardware, replace states with calls to kernels that generated + // them. + if (simState->getKernelInfo().has_value()) { + auto [calleeName, calleeArgs] = simState->getKernelInfo().value(); + + std::string calleeKernelName = + cudaq::runtime::cudaqGenPrefixName + calleeName; + + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); - // TODO: Delete the new state before function exit. + auto code = cudaq::get_quake_by_name(calleeName, /*throwException=*/false); + assert(!code.empty() && "Quake code not found for callee"); + auto fromModule = parseSourceString(code, ctx); + + static unsigned counter = 0; + std::string modifiedCalleeName = + calleeName + ".modified_" + std::to_string(counter++); + std::string modifiedCalleeKernelName = + cudaq::runtime::cudaqGenPrefixName + modifiedCalleeName; + + // Create callee.modified that returns concat of veq allocations. + auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); + assert(calleeFunc && "callee is missing"); + auto argTypes = calleeFunc.getArgumentTypes(); + auto retType = quake::VeqType::getUnsized(ctx); + auto funcTy = FunctionType::get(ctx, argTypes, {retType}); + + { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(sourceMod.getBody()); + + auto modifiedCalleeFunc = cast(builder.clone(*calleeFunc)); + modifiedCalleeFunc.setName(modifiedCalleeKernelName); + modifiedCalleeFunc.setType(funcTy); + modifiedCalleeFunc.setPrivate(); + + OpBuilder modifiedBuilder(ctx); + SmallVector allocations; + SmallVector cleanUps; + for (auto &op : modifiedCalleeFunc.getOps()) { + if (auto alloc = dyn_cast(op)) { + allocations.push_back(alloc.getResult()); + // Replace by the result of quake.init_state if used by it + for (auto *user : op.getUsers()) { + if (auto init = dyn_cast(*user)) { + allocations.pop_back(); + allocations.push_back(init.getResult()); + } + } + } + if (auto retOp = dyn_cast(op)) { + if (retOp.getOperands().size() == 0) { + modifiedBuilder.setInsertionPointAfter(retOp); + assert(allocations.size() > 0 && "No veq allocations found"); + Value ret = modifiedBuilder.create( + loc, quake::VeqType::getUnsized(ctx), allocations); + modifiedBuilder.create(loc, ret); + cleanUps.push_back(retOp); + } + } + } + for (auto *op : cleanUps) { + op->dropAllUses(); + op->erase(); + } + } + + // Create substitutions for the `callee.modified.N`. + converter.genCallee(modifiedCalleeName, calleeArgs); + + // Create a subst for state pointer. + auto strLitTy = cudaq::cc::PointerType::get( + cudaq::cc::ArrayType::get(builder.getContext(), builder.getI8Type(), + modifiedCalleeKernelName.size() + 1)); + auto callee = builder.create( + loc, strLitTy, builder.getStringAttr(modifiedCalleeKernelName)); + + auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); + auto calleeCast = builder.create(loc, i8PtrTy, callee); + + cudaq::IRBuilder irBuilder(ctx); + auto result = irBuilder.loadIntrinsic(substMod, cudaq::getCudaqState); + assert(succeeded(result) && "loading intrinsic should never fail"); + + auto statePtrTy = + cudaq::cc::PointerType::get(cudaq::cc::StateType::get(ctx)); + auto statePtr = + builder + .create(loc, statePtrTy, cudaq::getCudaqState, + ValueRange{calleeCast}) + .getResult(0); return builder.create(loc, statePtrTy, statePtr); } - // The program is executed on quantum hardware, state data is not - // available and needs to be regenerated. - TODO("cudaq::state* argument synthesis for quantum hardware"); + + TODO("cudaq::state* argument synthesis for quantum hardware for c functions"); return {}; } @@ -326,7 +432,7 @@ cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, ModuleOp sourceModule, bool isSimulator) : sourceModule(sourceModule), builder(sourceModule.getContext()), - kernelName(kernelName), isSimulator(isSimulator) { + kernelName(kernelName) { substModule = builder.create(builder.getUnknownLoc()); } @@ -335,7 +441,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { // We should look up the input type signature here. auto fun = sourceModule.lookupSymbol( - cudaq::runtime::cudaqGenPrefixName + kernelName.str()); + cudaq::runtime::cudaqGenPrefixName + kernelName); FunctionType fromFuncTy = fun.getFunctionType(); for (auto iter : llvm::enumerate(llvm::zip(fromFuncTy.getInputs(), arguments))) { @@ -403,8 +509,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { .Case([&](cc::PointerType ptrTy) -> cc::ArgumentSubstitutionOp { if (ptrTy.getElementType() == cc::StateType::get(ctx)) return buildSubst(static_cast(argPtr), - substModule, dataLayout, kernelName, - isSimulator); + dataLayout, *this); return {}; }) .Case([&](cc::StdvecType ty) { @@ -457,3 +562,29 @@ void cudaq::opt::ArgumentConverter::gen_drop_front( } gen(partialArgs); } + +std::pair, std::vector> +cudaq::opt::ArgumentConverter::collectAllSubstitutions() { + std::vector kernels; + std::vector substs; + + std::function collect = + [&kernels, &substs, &collect](ArgumentConverter &con) { + auto name = con.getKernelName(); + std::string kernName = cudaq::runtime::cudaqGenPrefixName + name.str(); + kernels.push_back(kernName); + + { + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << con.getSubstitutionModule(); + substs.push_back(substBuff); + } + + for (auto &calleeCon : con.getCalleeConverters()) + collect(calleeCon); + }; + + collect(*this); + return {kernels, substs}; +} diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 45e6607b0c9..be438fe66ca 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -14,6 +14,7 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/Types.h" #include +#include namespace cudaq::opt { @@ -47,13 +48,30 @@ class ArgumentConverter { /// created. mlir::ModuleOp getSubstitutionModule() { return substModule; } + mlir::ModuleOp getSourceModule() { return sourceModule; } + + mlir::StringRef getKernelName() { return kernelName; } + + void genCallee(std::string &calleeName, std::vector &args) { + auto converter = ArgumentConverter(calleeName, sourceModule); + converter.gen(args); + calleeConverters.push_back(converter); + } + + std::vector &getCalleeConverters() { + return calleeConverters; + } + + std::pair, std::vector> + collectAllSubstitutions(); + private: mlir::ModuleOp sourceModule; mlir::ModuleOp substModule; mlir::OpBuilder builder; - mlir::StringRef kernelName; + std::string kernelName; mlir::SmallVector substitutions; - bool isSimulator; + std::vector calleeConverters; }; } // namespace cudaq::opt diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 61c26dc791f..41f45b6b759 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -393,15 +393,18 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (!func->hasAttr(cudaq::entryPointAttrName)) func->setAttr(cudaq::entryPointAttrName, builder.getUnitAttr()); auto moduleOp = builder.create(); - moduleOp.push_back(func.clone()); moduleOp->setAttrs(m_module->getAttrDictionary()); for (auto &op : m_module.getOps()) { - // Add any global symbols, including global constant arrays. - // Global constant arrays can be created during compilation, - // `lift-array-value`, `quake-synthesizer`, and `get-concrete-matrix` - // passes. - if (auto globalOp = dyn_cast(op)) + if (auto funcOp = dyn_cast(op)) { + // Add quantum kernels defined in the module. + if (funcOp->hasAttr(cudaq::kernelAttrName) || + funcOp.getName().startswith("__nvqpp__mlirgen__") || + funcOp.getBody().empty()) + moduleOp.push_back(funcOp.clone()); + } + // Add globals defined in the module. + if (auto globalOp = dyn_cast(op)) moduleOp.push_back(globalOp.clone()); } @@ -428,16 +431,18 @@ class BaseRemoteRESTQPU : public cudaq::QPU { mlir::PassManager pm(&context); if (!rawArgs.empty()) { cudaq::info("Run Argument Synth.\n"); - opt::ArgumentConverter argCon(kernelName, moduleOp, false); + opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); - std::string kernName = cudaq::runtime::cudaqGenPrefixName + kernelName; - mlir::SmallVector kernels = {kernName}; - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << argCon.getSubstitutionModule(); - mlir::SmallVector substs = {substBuff}; + auto [kernels, substs] = argCon.collectAllSubstitutions(); pm.addNestedPass( - opt::createArgumentSynthesisPass(kernels, substs)); + cudaq::opt::createArgumentSynthesisPass( + mlir::SmallVector{kernels.begin(), + kernels.end()}, + mlir::SmallVector{substs.begin(), + substs.end()})); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(opt::createDeleteStates()); + pm.addPass(opt::createStateInitialization()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index b938815d926..5384d71008a 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -329,8 +329,8 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (!castedState1 || !castedState2) throw std::runtime_error( "Invalid execution context: input states are not compatible"); - auto [kernelName1, args1] = castedState1->getKernelInfo(); - auto [kernelName2, args2] = castedState2->getKernelInfo(); + auto [kernelName1, args1] = castedState1->getKernelInfo().value(); + auto [kernelName2, args2] = castedState2->getKernelInfo().value(); cudaq::IRPayLoad stateIrPayload1, stateIrPayload2; stateIrPayload1.entryPoint = kernelName1; diff --git a/runtime/common/CMakeLists.txt b/runtime/common/CMakeLists.txt index bb8a5ecaba6..e1a38c4e257 100644 --- a/runtime/common/CMakeLists.txt +++ b/runtime/common/CMakeLists.txt @@ -102,7 +102,7 @@ set_source_files_properties( JIT.cpp Logger.cpp RuntimeMLIR.cpp - PROPERTIES COMPILE_FLAGS -fno-rtti +# PROPERTIES COMPILE_FLAGS -fno-rtti ) target_include_directories(cudaq-mlir-runtime diff --git a/runtime/common/SimulationState.h b/runtime/common/SimulationState.h index 3ec97f2568f..694770fa482 100644 --- a/runtime/common/SimulationState.h +++ b/runtime/common/SimulationState.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -140,6 +141,16 @@ class SimulationState { return createFromSizeAndPtr(size, ptr, data.index()); } + /// @brief True if the state has amplitudes or density matrix + // is available or can be computed. + virtual bool hasData() const { return true; } + + /// @brief Helper to retrieve (kernel name, `args` pointers) + virtual std::optional>> + getKernelInfo() const { + return std::nullopt; + } + /// @brief Return the tensor at the given index. Throws /// for an invalid tensor index. virtual Tensor getTensor(std::size_t tensorIdx = 0) const = 0; diff --git a/runtime/cudaq/CMakeLists.txt b/runtime/cudaq/CMakeLists.txt index 9c08eef3543..2efb8824e7b 100644 --- a/runtime/cudaq/CMakeLists.txt +++ b/runtime/cudaq/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(${LIBRARY_NAME} platform/quantum_platform.cpp qis/execution_manager_c_api.cpp qis/execution_manager.cpp + qis/quantum_state.cpp qis/remote_state.cpp qis/state.cpp utils/cudaq_utils.cpp diff --git a/runtime/cudaq/algorithms/get_state.h b/runtime/cudaq/algorithms/get_state.h index bbb64ebcbfc..a57fa0194e6 100644 --- a/runtime/cudaq/algorithms/get_state.h +++ b/runtime/cudaq/algorithms/get_state.h @@ -14,6 +14,7 @@ #include "cudaq/host_config.h" #include "cudaq/platform.h" #include "cudaq/platform/QuantumExecutionQueue.h" +#include "cudaq/qis/quantum_state.h" #include "cudaq/qis/remote_state.h" #include "cudaq/qis/state.h" #include @@ -118,6 +119,17 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { return state(new RemoteSimulationState(std::forward(kernel), std::forward(args)...)); } +#endif +#if defined(CUDAQ_QUANTUM_DEVICE) + // Store kernel name and arguments for quantum states. + if (!cudaq::get_quake_by_name(cudaq::getKernelName(kernel), false).empty()) { + return state(new QuantumState(std::forward(kernel), + std::forward(args)...)); + } else { + throw std::runtime_error( + "cudaq::state* argument synthesis is not supported for quantum hardware" + "for c-like functions, use class kernels instead"); + } #endif return details::extractState([&]() mutable { cudaq::invokeKernel(std::forward(kernel), diff --git a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml index 21cc45be1e3..0a291a240d2 100644 --- a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml +++ b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/runtime/cudaq/qis/quantum_state.cpp b/runtime/cudaq/qis/quantum_state.cpp new file mode 100644 index 00000000000..faaae5b510a --- /dev/null +++ b/runtime/cudaq/qis/quantum_state.cpp @@ -0,0 +1,113 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "quantum_state.h" +#include "common/Logger.h" + +namespace cudaq { + +QuantumState::~QuantumState() { + if (!platformExecutionLog.empty()) { + // Flush any info log from the remote execution + printf("%s\n", platformExecutionLog.c_str()); + platformExecutionLog.clear(); + } + + for (std::size_t counter = 0; auto &ptr : args) + deleters[counter++](ptr); + + args.clear(); + deleters.clear(); +} + +std::size_t QuantumState::getNumQubits() const { + throw std::runtime_error( + "getNumQubits is not implemented for quantum hardware"); +} + +cudaq::SimulationState::Tensor +QuantumState::getTensor(std::size_t tensorIdx) const { + throw std::runtime_error("getTensor is not implemented for quantum hardware"); +} + +/// @brief Return all tensors that represent this state +std::vector QuantumState::getTensors() const { + throw std::runtime_error( + "getTensors is not implemented for quantum hardware"); + return {getTensor()}; +} + +/// @brief Return the number of tensors that represent this state. +std::size_t QuantumState::getNumTensors() const { + throw std::runtime_error( + "getNumTensors is not implemented for quantum hardware"); +} + +std::complex +QuantumState::operator()(std::size_t tensorIdx, + const std::vector &indices) { + throw std::runtime_error( + "operator() is not implemented for quantum hardware"); +} + +std::unique_ptr +QuantumState::createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) { + throw std::runtime_error( + "createFromSizeAndPtr is not implemented for quantum hardware"); +} + +void QuantumState::dump(std::ostream &os) const { + throw std::runtime_error("dump is not implemented for quantum hardware"); +} + +cudaq::SimulationState::precision QuantumState::getPrecision() const { + throw std::runtime_error( + "getPrecision is not implemented for quantum hardware"); +} + +void QuantumState::destroyState() { + // There is no state data so nothing to destroy. +} + +bool QuantumState::isDeviceData() const { + throw std::runtime_error( + "isDeviceData is not implemented for quantum hardware"); +} + +void QuantumState::toHost(std::complex *clientAllocatedData, + std::size_t numElements) const { + throw std::runtime_error("toHost is not implemented for quantum hardware"); +} + +void QuantumState::toHost(std::complex *clientAllocatedData, + std::size_t numElements) const { + throw std::runtime_error("toHost is not implemented for quantum hardware"); +} + +std::optional>> +QuantumState::getKernelInfo() const { + return std::make_pair(kernelName, args); +} + +std::vector> +QuantumState::getAmplitudes(const std::vector> &basisStates) { + throw std::runtime_error( + "getAmplitudes is not implemented for quantum hardware"); +} + +std::complex +QuantumState::getAmplitude(const std::vector &basisState) { + throw std::runtime_error( + "getAmplitudes is not implemented for quantum hardware"); +} + +std::complex +QuantumState::overlap(const cudaq::SimulationState &other) { + throw std::runtime_error("overlap is not implemented for quantum hardware"); +} +} // namespace cudaq diff --git a/runtime/cudaq/qis/quantum_state.h b/runtime/cudaq/qis/quantum_state.h new file mode 100644 index 00000000000..63117eb4629 --- /dev/null +++ b/runtime/cudaq/qis/quantum_state.h @@ -0,0 +1,151 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +#include "common/SimulationState.h" +#include "cudaq.h" +#include "cudaq/utils/cudaq_utils.h" + +namespace cudaq { +/// Implementation of `SimulationState` for quantum device backends. +// The state is represented by a quantum kernel. +// Quantum state contains all the information we need to replicate a +// call to kernel that created the state. +class QuantumState : public cudaq::SimulationState { +protected: + std::string kernelName; + // Lazily-evaluated state data (just keeping the kernel name and arguments). + // e.g., to be evaluated at amplitude accessor APIs (const APIs, hence needs + // to be mutable) or overlap calculation with another remote state (combining + // the IR of both states for remote evaluation) + mutable std::unique_ptr state; + // Cache log messages from the remote execution. + // Mutable to support lazy execution during `const` API calls. + mutable std::string platformExecutionLog; + using ArgDeleter = std::function; + /// @brief Vector of arguments + // Note: we create a copy of all arguments except pointers. + std::vector args; + /// @brief Deletion functions for the arguments. + std::vector> deleters; + +public: + template + void addArgument(const T &arg) { + if constexpr (std::is_pointer_v>) { + if constexpr (std::is_copy_constructible_v< + std::remove_pointer_t>>) { + auto ptr = new std::remove_pointer_t>(*arg); + args.push_back(ptr); + deleters.push_back([](void *ptr) { + delete static_cast> *>(ptr); + }); + } else { + throw std::invalid_argument( + "Unsupported argument type: only pointers to copy-constructible " + "types and copy-constructible types are supported."); + } + } else if constexpr (std::is_copy_constructible_v>) { + auto *ptr = new std::decay_t(arg); + args.push_back(ptr); + deleters.push_back( + [](void *ptr) { delete static_cast *>(ptr); }); + } else { + throw std::invalid_argument( + "Unsupported argument type: only pointers to copy-constructible " + "types and copy-constructible types are supported."); + } + } + + /// @brief Constructor + template + QuantumState(QuantumKernel &&kernel, Args &&...args) { + if constexpr (has_name::value) { + // kernel_builder kernel: need to JIT code to get it registered. + static_cast(kernel).jitCode(); + kernelName = kernel.name(); + } else { + kernelName = cudaq::getKernelName(kernel); + } + (addArgument(args), ...); + } + QuantumState() = default; + virtual ~QuantumState(); + + /// @brief True if the state has amplitudes or density matrix available. + virtual bool hasData() const override { return false; } + + /// @brief Helper to retrieve (kernel name, `args` pointers) + virtual std::optional>> + getKernelInfo() const override; + + /// @brief Return the number of qubits this state represents. + std::size_t getNumQubits() const override; + + /// @brief Compute the overlap of this state representation with + /// the provided `other` state, e.g. ``. + std::complex overlap(const cudaq::SimulationState &other) override; + + /// @brief Return the amplitude of the given computational + /// basis state. + std::complex + getAmplitude(const std::vector &basisState) override; + + /// @brief Return the amplitudes of the given list of computational + /// basis states. + std::vector> + getAmplitudes(const std::vector> &basisState) override; + + /// @brief Return the tensor at the given index. Throws + /// for an invalid tensor index. + Tensor getTensor(std::size_t tensorIdx = 0) const override; + + /// @brief Return all tensors that represent this state + std::vector getTensors() const override; + + /// @brief Return the number of tensors that represent this state. + std::size_t getNumTensors() const override; + + /// @brief Return the element from the tensor at the + /// given tensor index and at the given indices. + std::complex + operator()(std::size_t tensorIdx, + const std::vector &indices) override; + + /// @brief Create a new subclass specific SimulationState + /// from the user provided data set. + std::unique_ptr + createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) override; + + /// @brief Dump a representation of the state to the + /// given output stream. + void dump(std::ostream &os) const override; + + /// @brief Return the floating point precision used by the simulation state. + precision getPrecision() const override; + + /// @brief Destroy the state representation, frees all associated memory. + void destroyState() override; + + /// @brief Return true if this `SimulationState` wraps data on the GPU. + bool isDeviceData() const override; + + /// @brief Transfer data from device to host, return the data + /// to the pointer provided by the client. Clients must specify the number of + /// elements. + void toHost(std::complex *clientAllocatedData, + std::size_t numElements) const override; + + /// @brief Transfer data from device to host, return the data + /// to the pointer provided by the client. Clients must specify the number of + /// elements. + void toHost(std::complex *clientAllocatedData, + std::size_t numElements) const override; +}; +} // namespace cudaq diff --git a/runtime/cudaq/qis/remote_state.cpp b/runtime/cudaq/qis/remote_state.cpp index 713a462e46d..84c9bf94104 100644 --- a/runtime/cudaq/qis/remote_state.cpp +++ b/runtime/cudaq/qis/remote_state.cpp @@ -128,7 +128,7 @@ void RemoteSimulationState::toHost(std::complex *clientAllocatedData, } } -std::pair> +std::optional>> RemoteSimulationState::getKernelInfo() const { return std::make_pair(kernelName, args); } diff --git a/runtime/cudaq/qis/remote_state.h b/runtime/cudaq/qis/remote_state.h index 878bb098dd8..ba7929dea44 100644 --- a/runtime/cudaq/qis/remote_state.h +++ b/runtime/cudaq/qis/remote_state.h @@ -83,7 +83,8 @@ class RemoteSimulationState : public cudaq::SimulationState { virtual void execute() const; /// @brief Helper to retrieve (kernel name, `args` pointers) - virtual std::pair> getKernelInfo() const; + virtual std::optional>> + getKernelInfo() const override; /// @brief Return the number of qubits this state represents. std::size_t getNumQubits() const override; diff --git a/targettests/Remote-Sim/qvector_init_from_state.cpp b/targettests/Remote-Sim/qvector_init_from_state.cpp index 5899c2f5987..1f94b47f06f 100644 --- a/targettests/Remote-Sim/qvector_init_from_state.cpp +++ b/targettests/Remote-Sim/qvector_init_from_state.cpp @@ -148,6 +148,22 @@ int main() { // CHECK: 10 // clang-format on + { + std::cout << "Passing state from another kernel as argument" + " with pauli word arg (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state); + auto counts = cudaq::sample(test_state_param2, &state, cudaq::pauli_word{"XX"}); + printCounts(counts); + } + // clang-format off +// CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 + // clang-format on + { std::cout << "Passing state from another kernel as argument iteratively " "with vector args (kernel mode)" diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp new file mode 100644 index 00000000000..afaba5a2c05 --- /dev/null +++ b/targettests/execution/qvector_init_from_state.cpp @@ -0,0 +1,147 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target quantinuum --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// clang-format on + +#include +#include +#include +#include + +struct test_init_state { + void operator()(int n) __qpu__ { + cudaq::qvector q(n); + ry(M_PI/2.0, q[0]); + } +}; + +struct test_state_param { + void operator()(cudaq::state *state) __qpu__ { + cudaq::qvector q(state); + x(q); + } +}; + +struct test_state_param2 { + void operator()(cudaq::state *state, cudaq::pauli_word w) __qpu__ { + cudaq::qvector q(state); + cudaq::exp_pauli(1.0, q, w); + } +}; + +void printCounts(cudaq::sample_result &result) { + std::vector values{}; + for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { + std::cout << bits << std::endl; + } +} + +int main() { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; + std::vector vec1{0., 0., 0., 0., + 0., 0., M_SQRT1_2, M_SQRT1_2}; + auto state = cudaq::state::from_data(vec); + auto state1 = cudaq::state::from_data(vec1); + { + std::cout << "Passing state created from data as argument (kernel mode)" + << std::endl; + auto counts = cudaq::sample(test_state_param{}, &state); + printCounts(counts); + + counts = cudaq::sample(test_state_param{}, &state1); + printCounts(counts); + } + + // clang-format off +// CHECK: Passing state created from data as argument (kernel mode) +// CHECK: 011 +// CHECK: 111 + +// CHECK: 000 +// CHECK: 100 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + auto counts = cudaq::sample(test_state_param{}, &state); + printCounts(counts); + } + // clang-format off +// CHECK: Passing state from another kernel as argument (kernel mode) +// CHECK: 01 +// CHECK: 11 + // clang-format on + + { + std::cout + << "Passing large state from another kernel as argument (kernel mode)" + << std::endl; + auto largeState = cudaq::get_state(test_init_state{}, 14); + auto counts = cudaq::sample(test_state_param{}, &largeState); + printCounts(counts); + } + // clang-format off +// CHECK: Passing large state from another kernel as argument (kernel mode) +// CHECK: 01111111111111 +// CHECK: 11111111111111 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument" + " with pauli word arg (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + auto counts = cudaq::sample(test_state_param2{}, &state, cudaq::pauli_word{"XX"}); + printCounts(counts); + } + // clang-format off +// CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument iteratively " + "(kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + for (auto i = 0; i < 4; i++) { + auto counts = cudaq::sample(test_state_param{}, &state); + std::cout << "Iteration: " << i << std::endl; + printCounts(counts); + state = cudaq::get_state(test_state_param{}, &state); + } + } + // clang-format off +// CHECK: Passing state from another kernel as argument iteratively (kernel mode) +// CHECK: Iteration: 0 +// CHECK: 01 +// CHECK: 11 +// CHECK: Iteration: 1 +// CHECK: 00 +// CHECK: 10 +// CHECK: Iteration: 2 +// CHECK: 01 +// CHECK: 11 +// CHECK: Iteration: 3 +// CHECK: 00 +// CHECK: 10 + // clang-format on +} diff --git a/targettests/execution/state_init.cpp b/targettests/execution/state_init.cpp index 31e946147dd..e9b8456513d 100644 --- a/targettests/execution/state_init.cpp +++ b/targettests/execution/state_init.cpp @@ -40,4 +40,4 @@ int main() { } // CHECK: 00 -// CHECK: 10 +// CHECK: 10 \ No newline at end of file diff --git a/test/Quake/arg_subst-5.txt b/test/Quake/arg_subst-5.txt new file mode 100644 index 00000000000..c5e727bb79e --- /dev/null +++ b/test/Quake/arg_subst-5.txt @@ -0,0 +1,15 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +cc.arg_subst[0] { + %0 = cc.string_literal "init" : !cc.ptr> + %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr + %2 = func.call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr + %3 = cc.cast %2 : (!cc.ptr) -> !cc.ptr +} +func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr diff --git a/test/Quake/arg_subst-6.txt b/test/Quake/arg_subst-6.txt new file mode 100644 index 00000000000..4c3a55d883a --- /dev/null +++ b/test/Quake/arg_subst-6.txt @@ -0,0 +1,11 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +cc.arg_subst[0] { + %c2_i32 = arith.constant 2 : i32 +} diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index e96e04b63af..4bf6e101556 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -6,7 +6,7 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt --argument-synthesis=functions=foo:%S/arg_subst.txt,blink:%S/arg_subst.txt,testy1:%S/arg_subst-1.txt,testy2:%S/arg_subst-2.txt,testy3:%S/arg_subst-3.txt,testy4:%S/arg_subst-4.txt --canonicalize %s | FileCheck %s +// RUN: cudaq-opt --argument-synthesis=functions=foo:%S/arg_subst.txt,blink:%S/arg_subst.txt,testy1:%S/arg_subst-1.txt,testy2:%S/arg_subst-2.txt,testy3:%S/arg_subst-3.txt,testy4:%S/arg_subst-4.txt,testy5:%S/arg_subst-5.txt,init:%S/arg_subst-6.txt --canonicalize %s | FileCheck %s func.func private @bar(i32) func.func private @baz(f32) @@ -146,3 +146,38 @@ func.func @testy4(%arg0: !cc.stdvec>) { // CHECK: call @callee4(%[[VAL_32]]) : (!cc.stdvec>) -> () // CHECK: return // CHECK: } + +func.func @testy5(%arg0: !cc.ptr) { + %3 = call @__nvqpp_cudaq_state_numberOfQubits(%arg0) : (!cc.ptr) -> i64 + %4 = quake.alloca !quake.veq[%3 : i64] + %5 = quake.init_state %4, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq + return +} + +func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 +func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr + +func.func private @init(%arg0: i32) -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst = arith.constant 1.5707963267948966 : f64 + %0 = cc.cast signed %arg0 : (i32) -> i64 + %1 = quake.alloca !quake.veq[%0 : i64] + %2 = quake.concat %1 : (!quake.veq) -> !quake.veq + return %2 : !quake.veq +} + +// CHECK-LABEL: func.func @testy5() { +// CHECK: %[[VAL_0:.*]] = cc.string_literal "init" : !cc.ptr> +// CHECK: %[[VAL_1:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_2:.*]] = call @__nvqpp_cudaq_state_get(%[[VAL_1]]) : (!cc.ptr) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = call @__nvqpp_cudaq_state_numberOfQubits(%[[VAL_2]]) : (!cc.ptr) -> i64 +// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] +// CHECK: %[[VAL_5:.*]] = quake.init_state %[[VAL_4]], %[[VAL_2]] : (!quake.veq, !cc.ptr) -> !quake.veq +// CHECK: return +// CHECK: } +// CHECK: func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 +// CHECK: func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr +// CHECK: func.func private @init() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_8:.*]] = quake.relax_size %[[VAL_7:.*]] : (!quake.veq<2>) -> !quake.veq +// CHECK: return %[[VAL_8]] : !quake.veq +// CHECK: } diff --git a/test/Quake/state_init.qke b/test/Quake/state_init.qke new file mode 100644 index 00000000000..9f43a965a4f --- /dev/null +++ b/test/Quake/state_init.qke @@ -0,0 +1,37 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt -state-initialization -canonicalize %s | FileCheck %s + +module { + func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %0 = cc.string_literal "callee.modified_0" : !cc.ptr> + %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr + %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr + %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 + %4 = quake.alloca !quake.veq[%3 : i64] + %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq + return + } + + func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 + func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr + + func.func private @callee.modified_0() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst = arith.constant 1.5707963267948966 : f64 + %0 = quake.alloca !quake.veq<2> + %1 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref + quake.ry (%cst) %1 : (f64, !quake.ref) -> () + %2 = quake.relax_size %0 : (!quake.veq<2>) -> !quake.veq + return %2 : !quake.veq + } +// CHECK-LABEL: func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = call @callee.modified_0() : () -> !quake.veq +// CHECK: return +// CHECK: } +} diff --git a/test/Quake/state_prep.qke b/test/Quake/state_prep.qke index 4289571b33c..3072a192187 100644 --- a/test/Quake/state_prep.qke +++ b/test/Quake/state_prep.qke @@ -31,7 +31,7 @@ module { // CHECK: return // CHECK: } - func.func @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + func.func @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %0 = cc.address_of @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv.rodata_0 : !cc.ptr> %1 = quake.alloca !quake.veq<2> %2 = quake.init_state %1, %0 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> diff --git a/tpls/Stim b/tpls/Stim index 47190f4a3af..b01e4239158 160000 --- a/tpls/Stim +++ b/tpls/Stim @@ -1 +1 @@ -Subproject commit 47190f4a3afb104c9f0068d0be9fea87d2894a70 +Subproject commit b01e42391583d03db4266b387d907eda1d7ae488 From 3fc56de6f0c911888fc8f3ae6356b8613653f0f9 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 17 Oct 2024 14:25:47 -0700 Subject: [PATCH 03/54] Merge with main Signed-off-by: Anna Gringauze --- python/tests/interop/quantum_lib/CMakeLists.txt | 1 + runtime/common/BaseRemoteRESTQPU.h | 7 +++---- targettests/execution/state_init.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/tests/interop/quantum_lib/CMakeLists.txt b/python/tests/interop/quantum_lib/CMakeLists.txt index 34fb0241880..21bb37a4d7b 100644 --- a/python/tests/interop/quantum_lib/CMakeLists.txt +++ b/python/tests/interop/quantum_lib/CMakeLists.txt @@ -11,3 +11,4 @@ set(CMAKE_CXX_COMPILE_OBJECT " -fPIC --enable-mlir --disable # FIXME Error with SHARED, it pulls in all the mlir libraries anyway add_library(quantum_lib OBJECT quantum_lib.cpp) +add_dependencies(quantum_lib nvq++ cudaq-opt cudaq-quake cudaq-translate) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 41f45b6b759..32a097cfc5d 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -393,14 +393,13 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (!func->hasAttr(cudaq::entryPointAttrName)) func->setAttr(cudaq::entryPointAttrName, builder.getUnitAttr()); auto moduleOp = builder.create(); + moduleOp.push_back(func.clone()); moduleOp->setAttrs(m_module->getAttrDictionary()); for (auto &op : m_module.getOps()) { if (auto funcOp = dyn_cast(op)) { - // Add quantum kernels defined in the module. - if (funcOp->hasAttr(cudaq::kernelAttrName) || - funcOp.getName().startswith("__nvqpp__mlirgen__") || - funcOp.getBody().empty()) + // Add function definitions for runtime functions. + if (funcOp.getBody().empty()) moduleOp.push_back(funcOp.clone()); } // Add globals defined in the module. diff --git a/targettests/execution/state_init.cpp b/targettests/execution/state_init.cpp index e9b8456513d..31e946147dd 100644 --- a/targettests/execution/state_init.cpp +++ b/targettests/execution/state_init.cpp @@ -40,4 +40,4 @@ int main() { } // CHECK: 00 -// CHECK: 10 \ No newline at end of file +// CHECK: 10 From 7969a755986157cdb04625a8680516432d00e352 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 17 Oct 2024 14:37:56 -0700 Subject: [PATCH 04/54] Merge with main Signed-off-by: Anna Gringauze --- tpls/Stim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpls/Stim b/tpls/Stim index b01e4239158..47190f4a3af 160000 --- a/tpls/Stim +++ b/tpls/Stim @@ -1 +1 @@ -Subproject commit b01e42391583d03db4266b387d907eda1d7ae488 +Subproject commit 47190f4a3afb104c9f0068d0be9fea87d2894a70 From 755d0d1971bc489093ab2e541db759352f4506eb Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 17 Oct 2024 15:24:55 -0700 Subject: [PATCH 05/54] Fix test failure on anyon platform Signed-off-by: Anna Gringauze --- runtime/common/BaseRemoteRESTQPU.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 32a097cfc5d..989649d9fa5 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -18,6 +18,7 @@ #include "common/RuntimeMLIR.h" #include "cudaq.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/OpenQASMEmitter.h" #include "cudaq/Optimizer/CodeGen/Passes.h" @@ -398,8 +399,13 @@ class BaseRemoteRESTQPU : public cudaq::QPU { for (auto &op : m_module.getOps()) { if (auto funcOp = dyn_cast(op)) { - // Add function definitions for runtime functions. - if (funcOp.getBody().empty()) + // Add function definitions for runtime functions that must + // be removed after synthesis in cleanup ops. + if (funcOp.getBody().empty() && + (funcOp.getName().equals(cudaq::getNumQubitsFromCudaqState) || + funcOp.getName().equals(cudaq::createCudaqStateFromDataFP64) || + funcOp.getName().equals(cudaq::createCudaqStateFromDataFP32) || + funcOp.getName().equals(cudaq::getCudaqState))) moduleOp.push_back(funcOp.clone()); } // Add globals defined in the module. From 382bc99adda74bcae5cab1965096dac12d6e2b37 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 17 Oct 2024 15:40:34 -0700 Subject: [PATCH 06/54] Make StateInitialization a funcOp pass Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.td | 2 +- .../Transforms/StateInitialization.cpp | 25 ++++++------------- runtime/common/BaseRemoteRESTQPU.h | 2 +- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 66eb4cfcb0d..70ae6c71386 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -779,7 +779,7 @@ def DeleteStates : Pass<"delete-states", "mlir::ModuleOp"> { }]; } -def StateInitialization : Pass<"state-initialization", "mlir::ModuleOp"> { +def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> { let summary = "Replace `quake.init_state` instructions with call to the kernel generating the state"; let description = [{ diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp index 3a122f02a7b..f641eb04f63 100644 --- a/lib/Optimizer/Transforms/StateInitialization.cpp +++ b/lib/Optimizer/Transforms/StateInitialization.cpp @@ -121,26 +121,17 @@ class StateInitializationPass void runOnOperation() override { auto *ctx = &getContext(); - auto module = getOperation(); - for (Operation &op : *module.getBody()) { - auto func = dyn_cast(op); - if (!func) - continue; + auto func = getOperation(); + RewritePatternSet patterns(ctx); + patterns.insert(ctx); - std::string funcName = func.getName().str(); - RewritePatternSet patterns(ctx); - patterns.insert(ctx); + LLVM_DEBUG(llvm::dbgs() << "Before state initialization: " << func << '\n'); - LLVM_DEBUG(llvm::dbgs() - << "Before state initialization: " << func << '\n'); + if (failed(applyPatternsAndFoldGreedily(func.getOperation(), + std::move(patterns)))) + signalPassFailure(); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) - signalPassFailure(); - - LLVM_DEBUG(llvm::dbgs() - << "After state initialization: " << func << '\n'); - } + LLVM_DEBUG(llvm::dbgs() << "After state initialization: " << func << '\n'); } }; } // namespace diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 989649d9fa5..a37d5bf7067 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -447,7 +447,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { substs.end()})); pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); - pm.addPass(opt::createStateInitialization()); + pm.addNestedPass(opt::createStateInitialization()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); From d3a05d4432d41acaae68fea86eeac6f3e34d4cc7 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 18 Oct 2024 11:09:12 -0700 Subject: [PATCH 07/54] Fix issues and tests for the rest of quantum architectures Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.td | 11 ++ lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../Transforms/StateInitialization.cpp | 16 +-- lib/Optimizer/Transforms/StateValidation.cpp | 130 ++++++++++++++++++ runtime/common/BaseRemoteRESTQPU.h | 2 + .../default/rest/helpers/anyon/anyon.yml | 2 + .../default/rest/helpers/ionq/ionq.yml | 2 + .../platform/default/rest/helpers/iqm/iqm.yml | 2 + .../platform/default/rest/helpers/oqc/oqc.yml | 2 + .../execution/qvector_init_from_state.cpp | 17 ++- 10 files changed, 174 insertions(+), 11 deletions(-) create mode 100644 lib/Optimizer/Transforms/StateValidation.cpp diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 70ae6c71386..aa8f038c410 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -866,6 +866,17 @@ def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { ]; } +def StateValidation : Pass<"state-validation", "mlir::ModuleOp"> { + let summary = + "Make sure MLIR is valid after synthesis for quantum devices"; + let description = [{ + Argument synthesis should replace all `quake.init` from state instructions + and calls to state-related runtime functions. + Make sure none of them left, and remove definitions for state-related + runtime functions. + }]; +} + def PromoteRefToVeqAlloc : Pass<"promote-qubit-allocation"> { let summary = "Promote single qubit allocations."; let description = [{ diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index f107d78bde6..7eae39e35fe 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -52,6 +52,7 @@ add_cudaq_library(OptTransforms RegToMem.cpp StateInitialization.cpp StatePreparation.cpp + StateValidation.cpp UnitarySynthesis.cpp WiresToWiresets.cpp diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp index f641eb04f63..c46273b7476 100644 --- a/lib/Optimizer/Transforms/StateInitialization.cpp +++ b/lib/Optimizer/Transforms/StateInitialization.cpp @@ -30,10 +30,10 @@ using namespace mlir; namespace { -static bool isCall(Operation *callOp, std::vector &&names) { - if (callOp) { - if (auto createStateCall = dyn_cast(callOp)) { - if (auto calleeAttr = createStateCall.getCalleeAttr()) { +static bool isCall(Operation *op, std::vector &&names) { + if (op) { + if (auto callOp = dyn_cast(op)) { + if (auto calleeAttr = callOp.getCalleeAttr()) { auto funcName = calleeAttr.getValue().str(); if (std::find(names.begin(), names.end(), funcName) != names.end()) return true; @@ -43,12 +43,12 @@ static bool isCall(Operation *callOp, std::vector &&names) { return false; } -static bool isGetStateCall(Operation *callOp) { - return isCall(callOp, {cudaq::getCudaqState}); +static bool isGetStateCall(Operation *op) { + return isCall(op, {cudaq::getCudaqState}); } -static bool isNumberOfQubitsCall(Operation *callOp) { - return isCall(callOp, {cudaq::getNumQubitsFromCudaqState}); +static bool isNumberOfQubitsCall(Operation *op) { + return isCall(op, {cudaq::getNumQubitsFromCudaqState}); } // clang-format off diff --git a/lib/Optimizer/Transforms/StateValidation.cpp b/lib/Optimizer/Transforms/StateValidation.cpp new file mode 100644 index 00000000000..be20dd4edef --- /dev/null +++ b/lib/Optimizer/Transforms/StateValidation.cpp @@ -0,0 +1,130 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" + +namespace cudaq::opt { +#define GEN_PASS_DEF_STATEVALIDATION +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + +#define DEBUG_TYPE "state-validation" + +using namespace mlir; + + +/// Validate that quantum code does not contain runtime calls and remove runtime function definitions. +namespace { + +static bool isRuntimeStateCallName(llvm::StringRef funcName) { + static std::vector names = { + cudaq::getCudaqState, + cudaq::createCudaqStateFromDataFP32, + cudaq::createCudaqStateFromDataFP64, + cudaq::deleteCudaqState, + cudaq::getNumQubitsFromCudaqState + }; + if (std::find(names.begin(), names.end(), funcName) != names.end()) + return true; + return false; +} + +static bool isRuntimeStateCall(Operation *callOp) { + if (callOp) { + if (auto call = dyn_cast(callOp)) { + if (auto calleeAttr = call.getCalleeAttr()) { + auto funcName = calleeAttr.getValue().str(); + if (isRuntimeStateCallName(funcName)) + return true; + } + } + } + return false; +} + +class ValidateStateCallPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(func::CallOp callOp, + PatternRewriter &rewriter) const override { + if (isRuntimeStateCall(callOp)) { + auto name = callOp.getCalleeAttr().getValue(); + callOp.emitError("Unsupported call for quantum platform: " + name); + } + return failure(); + } +}; + +class ValidateStateInitPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(quake::InitializeStateOp initState, + PatternRewriter &rewriter) const override { + auto stateOp = initState.getOperand(1); + if (isa(stateOp.getType())) + initState.emitError("Synthesis did not remove `quake.init_state ` instruction"); + + return failure(); + } +}; + + +class StateValidationPass + : public cudaq::opt::impl::StateValidationBase { +protected: +public: + using StateValidationBase::StateValidationBase; + + mlir::ModuleOp getModule() { return getOperation(); } + + void runOnOperation() override final { + auto *ctx = &getContext(); + auto module = getModule(); + SmallVector toErase; + + for (Operation &op : *module.getBody()) { + auto func = dyn_cast(op); + if (!func) + continue; + + RewritePatternSet patterns(ctx); + patterns.insert(ctx); + + LLVM_DEBUG(llvm::dbgs() + << "Before state validation: " << func << '\n'); + + if (failed(applyPatternsAndFoldGreedily(func.getOperation(), + std::move(patterns)))) + signalPassFailure(); + + // Delete runtime function definitions. + if (func.getBody().empty() && isRuntimeStateCallName(func.getName())) + toErase.push_back(func); + + LLVM_DEBUG(llvm::dbgs() + << "After state validation: " << func << '\n'); + } + + for (auto *op : toErase) + op->erase(); + } +}; + +} // namespace diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index a37d5bf7067..0eab2c7fbab 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -405,6 +405,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { (funcOp.getName().equals(cudaq::getNumQubitsFromCudaqState) || funcOp.getName().equals(cudaq::createCudaqStateFromDataFP64) || funcOp.getName().equals(cudaq::createCudaqStateFromDataFP32) || + funcOp.getName().equals(cudaq::deleteCudaqState) || funcOp.getName().equals(cudaq::getCudaqState))) moduleOp.push_back(funcOp.clone()); } @@ -448,6 +449,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); pm.addNestedPass(opt::createStateInitialization()); + pm.addPass(opt::createStateValidation()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); diff --git a/runtime/cudaq/platform/default/rest/helpers/anyon/anyon.yml b/runtime/cudaq/platform/default/rest/helpers/anyon/anyon.yml index 3ecb49f3021..e0fb208f9c9 100644 --- a/runtime/cudaq/platform/default/rest/helpers/anyon/anyon.yml +++ b/runtime/cudaq/platform/default/rest/helpers/anyon/anyon.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml index 238d4c33163..802cdc2e0ad 100644 --- a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml +++ b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.yml b/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.yml index 0e90a1e2afa..2c928bda876 100644 --- a/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.yml +++ b/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml index 6a8a46c0667..cde626676cf 100644 --- a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml +++ b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index afaba5a2c05..06c97b1e6a3 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -7,8 +7,16 @@ ******************************************************************************/ // clang-format off -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --target quantinuum --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// Simulators +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s + +// Quantum emulators +// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s +// 2 different IQM machines for 2 different topologies +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s // clang-format on #include @@ -91,7 +99,10 @@ int main() { std::cout << "Passing large state from another kernel as argument (kernel mode)" << std::endl; - auto largeState = cudaq::get_state(test_init_state{}, 14); + // TODO: State larger than 5 qubits fails on iqm machines with Adonis architecture + // TODO: State larger than 8 qubits fails on oqc and anyon + // Up to 14 bits works with quantinuum an ionq + auto largeState = cudaq::get_state(test_init_state{}, 5); auto counts = cudaq::sample(test_state_param{}, &largeState); printCounts(counts); } From 51ef054c14df334252e389e2244d24974486661e Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 18 Oct 2024 15:48:39 -0700 Subject: [PATCH 08/54] Fix failing quantinuum state prep tests Signed-off-by: Anna Gringauze --- .../Transforms/StateInitialization.cpp | 68 ++++++++++--------- lib/Optimizer/Transforms/StateValidation.cpp | 7 +- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp index c46273b7476..0ed68676709 100644 --- a/lib/Optimizer/Transforms/StateInitialization.cpp +++ b/lib/Optimizer/Transforms/StateInitialization.cpp @@ -73,39 +73,43 @@ class StateInitPattern : public OpRewritePattern { PatternRewriter &rewriter) const override { auto loc = initState.getLoc(); auto allocaOp = initState.getOperand(0).getDefiningOp(); - auto getStateOp = initState.getOperand(1).getDefiningOp(); - auto numOfQubits = allocaOp->getOperand(0).getDefiningOp(); - - if (isGetStateCall(getStateOp)) { - auto calleeNameOp = getStateOp->getOperand(0); - if (auto cast = - dyn_cast(calleeNameOp.getDefiningOp())) { - calleeNameOp = cast.getOperand(); - - if (auto literal = dyn_cast( - calleeNameOp.getDefiningOp())) { - auto calleeName = literal.getStringLiteral(); - - Value result = - rewriter - .create(loc, initState.getType(), calleeName, - mlir::ValueRange{}) - .getResult(0); - rewriter.replaceAllUsesWith(initState, result); - initState.erase(); - allocaOp->dropAllUses(); - rewriter.eraseOp(allocaOp); - if (isNumberOfQubitsCall(numOfQubits)) { - numOfQubits->dropAllUses(); - rewriter.eraseOp(numOfQubits); + auto stateOp = initState.getOperand(1); + + if (isa(stateOp.getType())) { + auto getStateOp = stateOp.getDefiningOp(); + auto numOfQubits = allocaOp->getOperand(0).getDefiningOp(); + + if (isGetStateCall(getStateOp)) { + auto calleeNameOp = getStateOp->getOperand(0); + if (auto cast = + dyn_cast(calleeNameOp.getDefiningOp())) { + calleeNameOp = cast.getOperand(); + + if (auto literal = dyn_cast( + calleeNameOp.getDefiningOp())) { + auto calleeName = literal.getStringLiteral(); + + Value result = + rewriter + .create(loc, initState.getType(), calleeName, + mlir::ValueRange{}) + .getResult(0); + rewriter.replaceAllUsesWith(initState, result); + initState.erase(); + allocaOp->dropAllUses(); + rewriter.eraseOp(allocaOp); + if (isNumberOfQubitsCall(numOfQubits)) { + numOfQubits->dropAllUses(); + rewriter.eraseOp(numOfQubits); + } + getStateOp->dropAllUses(); + rewriter.eraseOp(getStateOp); + cast->dropAllUses(); + rewriter.eraseOp(cast); + literal->dropAllUses(); + rewriter.eraseOp(literal); + return success(); } - getStateOp->dropAllUses(); - rewriter.eraseOp(getStateOp); - cast->dropAllUses(); - rewriter.eraseOp(cast); - literal->dropAllUses(); - rewriter.eraseOp(literal); - return success(); } } } diff --git a/lib/Optimizer/Transforms/StateValidation.cpp b/lib/Optimizer/Transforms/StateValidation.cpp index f0b25cdc100..c9d301740c6 100644 --- a/lib/Optimizer/Transforms/StateValidation.cpp +++ b/lib/Optimizer/Transforms/StateValidation.cpp @@ -62,7 +62,8 @@ class ValidateStateCallPattern : public OpRewritePattern { PatternRewriter &rewriter) const override { if (isRuntimeStateCall(callOp)) { auto name = callOp.getCalleeAttr().getValue(); - callOp.emitError("Unsupported call for quantum platform: " + name); + callOp.emitError( + "Synthesis did not remove func call for quantum platform: " + name); } return failure(); } @@ -77,8 +78,8 @@ class ValidateStateInitPattern PatternRewriter &rewriter) const override { auto stateOp = initState.getOperand(1); if (isa(stateOp.getType())) - initState.emitError( - "Synthesis did not remove `quake.init_state ` instruction"); + initState.emitError("Synthesis did not remove `quake.init_state " + "` instruction"); return failure(); } From a7f5387e10c181704ff36c37504fea72ea2e3486 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 21 Oct 2024 15:11:34 -0700 Subject: [PATCH 09/54] Address CR comments Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.td | 17 +-- lib/Optimizer/Transforms/CMakeLists.txt | 3 +- ...ization.cpp => ReplaceStateWithKernel.cpp} | 98 +++++++------- lib/Optimizer/Transforms/StateValidation.cpp | 127 ------------------ runtime/common/BaseRemoteRESTQPU.h | 10 +- ...init.qke => replace_state_with_kernel.qke} | 2 +- 6 files changed, 64 insertions(+), 193 deletions(-) rename lib/Optimizer/Transforms/{StateInitialization.cpp => ReplaceStateWithKernel.cpp} (56%) delete mode 100644 lib/Optimizer/Transforms/StateValidation.cpp rename test/Quake/{state_init.qke => replace_state_with_kernel.qke} (96%) diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index aa8f038c410..ef446a38129 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -779,7 +779,7 @@ def DeleteStates : Pass<"delete-states", "mlir::ModuleOp"> { }]; } -def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> { +def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::FuncOp"> { let summary = "Replace `quake.init_state` instructions with call to the kernel generating the state"; let description = [{ @@ -794,7 +794,7 @@ def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> { For example: - Before StateInitialization (state-initialization): + Before ReplaceStateWithKernel (replace-state-with-kernel): ``` func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %0 = cc.string_literal "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr> @@ -807,7 +807,7 @@ def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> { } ``` - After StateInitialization (state-initialization): + After ReplaceStateWithKernel (replace-state-with-kernel): ``` func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %5 = call @__nvqpp__mlirgen__test_init_state.modified_0() : () -> !quake.veq @@ -866,17 +866,6 @@ def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { ]; } -def StateValidation : Pass<"state-validation", "mlir::ModuleOp"> { - let summary = - "Make sure MLIR is valid after synthesis for quantum devices"; - let description = [{ - Argument synthesis should replace all `quake.init` from state instructions - and calls to state-related runtime functions. - Make sure none of them left, and remove definitions for state-related - runtime functions. - }]; -} - def PromoteRefToVeqAlloc : Pass<"promote-qubit-allocation"> { let summary = "Promote single qubit allocations."; let description = [{ diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 7eae39e35fe..153e095e1fc 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -50,9 +50,8 @@ add_cudaq_library(OptTransforms QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp - StateInitialization.cpp + ReplaceStateWithKernel.cpp StatePreparation.cpp - StateValidation.cpp UnitarySynthesis.cpp WiresToWiresets.cpp diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp similarity index 56% rename from lib/Optimizer/Transforms/StateInitialization.cpp rename to lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index 0ed68676709..d588f092167 100644 --- a/lib/Optimizer/Transforms/StateInitialization.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -20,11 +20,11 @@ #include namespace cudaq::opt { -#define GEN_PASS_DEF_STATEINITIALIZATION +#define GEN_PASS_DEF_REPLACESTATEWITHKERNEL #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt -#define DEBUG_TYPE "state-initialization" +#define DEBUG_TYPE "replace-state-with-kernel" using namespace mlir; @@ -52,7 +52,9 @@ static bool isNumberOfQubitsCall(Operation *op) { } // clang-format off -/// Replace `quake.init_state` by a call to a (modified) kernel that produced the state. +/// Replace `quake.init_state` by a call to a (modified) kernel that produced +/// the state. +/// /// ``` /// %0 = cc.string_literal "callee.modified_0" : !cc.ptr> /// %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr @@ -65,50 +67,54 @@ static bool isNumberOfQubitsCall(Operation *op) { /// %5 = call @callee.modified_0() : () -> !quake.veq /// ``` // clang-format on -class StateInitPattern : public OpRewritePattern { +class ReplaceStateWithKernelPattern : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(quake::InitializeStateOp initState, PatternRewriter &rewriter) const override { - auto loc = initState.getLoc(); - auto allocaOp = initState.getOperand(0).getDefiningOp(); + //auto loc = initState.getLoc(); + auto *alloca = initState.getOperand(0).getDefiningOp(); auto stateOp = initState.getOperand(1); - if (isa(stateOp.getType())) { - auto getStateOp = stateOp.getDefiningOp(); - auto numOfQubits = allocaOp->getOperand(0).getDefiningOp(); - - if (isGetStateCall(getStateOp)) { - auto calleeNameOp = getStateOp->getOperand(0); - if (auto cast = - dyn_cast(calleeNameOp.getDefiningOp())) { - calleeNameOp = cast.getOperand(); - - if (auto literal = dyn_cast( - calleeNameOp.getDefiningOp())) { - auto calleeName = literal.getStringLiteral(); - - Value result = - rewriter - .create(loc, initState.getType(), calleeName, - mlir::ValueRange{}) - .getResult(0); - rewriter.replaceAllUsesWith(initState, result); - initState.erase(); - allocaOp->dropAllUses(); - rewriter.eraseOp(allocaOp); - if (isNumberOfQubitsCall(numOfQubits)) { - numOfQubits->dropAllUses(); - rewriter.eraseOp(numOfQubits); + if (auto ptrTy = dyn_cast(stateOp.getType())) { + if (isa(ptrTy.getElementType())) { + auto *getState = stateOp.getDefiningOp(); + auto *numOfQubits = alloca->getOperand(0).getDefiningOp(); + + if (isGetStateCall(getState)) { + auto calleeNameOp = getState->getOperand(0); + if (auto cast = calleeNameOp.getDefiningOp()) { + calleeNameOp = cast.getOperand(); + + if (auto literal = + calleeNameOp.getDefiningOp()) { + auto calleeName = literal.getStringLiteral(); + rewriter.replaceOpWithNewOp(initState, initState.getType(), calleeName, + mlir::ValueRange{}); + + if (alloca->getUses().empty()) + rewriter.eraseOp(alloca); + else { + alloca->emitError("Failed to remove `quake.alloca` in state synthesis"); + return failure(); + } + if (isNumberOfQubitsCall(numOfQubits)) { + if (numOfQubits->getUses().empty()) + rewriter.eraseOp(numOfQubits); + else { + numOfQubits->emitError("Failed to remove runtime call to get number of qubits in state synthesis"); + return failure(); + } + } + if (getState->getUses().empty()) + rewriter.eraseOp(getState); + else { + alloca->emitError("Failed to remove runtime call to get state in state synthesis"); + return failure(); + } + return success(); } - getStateOp->dropAllUses(); - rewriter.eraseOp(getStateOp); - cast->dropAllUses(); - rewriter.eraseOp(cast); - literal->dropAllUses(); - rewriter.eraseOp(literal); - return success(); } } } @@ -117,25 +123,25 @@ class StateInitPattern : public OpRewritePattern { } }; -class StateInitializationPass - : public cudaq::opt::impl::StateInitializationBase< - StateInitializationPass> { +class ReplaceStateWithKernelPass + : public cudaq::opt::impl::ReplaceStateWithKernelBase< + ReplaceStateWithKernelPass> { public: - using StateInitializationBase::StateInitializationBase; + using ReplaceStateWithKernelBase::ReplaceStateWithKernelBase; void runOnOperation() override { auto *ctx = &getContext(); auto func = getOperation(); RewritePatternSet patterns(ctx); - patterns.insert(ctx); + patterns.insert(ctx); - LLVM_DEBUG(llvm::dbgs() << "Before state initialization: " << func << '\n'); + LLVM_DEBUG(llvm::dbgs() << "Before replace state with kernel: " << func << '\n'); if (failed(applyPatternsAndFoldGreedily(func.getOperation(), std::move(patterns)))) signalPassFailure(); - LLVM_DEBUG(llvm::dbgs() << "After state initialization: " << func << '\n'); + LLVM_DEBUG(llvm::dbgs() << "After replace state with kerenl: " << func << '\n'); } }; } // namespace diff --git a/lib/Optimizer/Transforms/StateValidation.cpp b/lib/Optimizer/Transforms/StateValidation.cpp deleted file mode 100644 index c9d301740c6..00000000000 --- a/lib/Optimizer/Transforms/StateValidation.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#include "PassDetails.h" -#include "cudaq/Optimizer/Builder/Intrinsics.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" -#include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/Passes.h" - -namespace cudaq::opt { -#define GEN_PASS_DEF_STATEVALIDATION -#include "cudaq/Optimizer/Transforms/Passes.h.inc" -} // namespace cudaq::opt - -#define DEBUG_TYPE "state-validation" - -using namespace mlir; - -/// Validate that quantum code does not contain runtime calls and remove runtime -/// function definitions. -namespace { - -static bool isRuntimeStateCallName(llvm::StringRef funcName) { - static std::vector names = { - cudaq::getCudaqState, cudaq::createCudaqStateFromDataFP32, - cudaq::createCudaqStateFromDataFP64, cudaq::deleteCudaqState, - cudaq::getNumQubitsFromCudaqState}; - if (std::find(names.begin(), names.end(), funcName) != names.end()) - return true; - return false; -} - -static bool isRuntimeStateCall(Operation *callOp) { - if (callOp) { - if (auto call = dyn_cast(callOp)) { - if (auto calleeAttr = call.getCalleeAttr()) { - auto funcName = calleeAttr.getValue().str(); - if (isRuntimeStateCallName(funcName)) - return true; - } - } - } - return false; -} - -class ValidateStateCallPattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(func::CallOp callOp, - PatternRewriter &rewriter) const override { - if (isRuntimeStateCall(callOp)) { - auto name = callOp.getCalleeAttr().getValue(); - callOp.emitError( - "Synthesis did not remove func call for quantum platform: " + name); - } - return failure(); - } -}; - -class ValidateStateInitPattern - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(quake::InitializeStateOp initState, - PatternRewriter &rewriter) const override { - auto stateOp = initState.getOperand(1); - if (isa(stateOp.getType())) - initState.emitError("Synthesis did not remove `quake.init_state " - "` instruction"); - - return failure(); - } -}; - -class StateValidationPass - : public cudaq::opt::impl::StateValidationBase { -protected: -public: - using StateValidationBase::StateValidationBase; - - mlir::ModuleOp getModule() { return getOperation(); } - - void runOnOperation() override final { - auto *ctx = &getContext(); - auto module = getModule(); - SmallVector toErase; - - for (Operation &op : *module.getBody()) { - auto func = dyn_cast(op); - if (!func) - continue; - - RewritePatternSet patterns(ctx); - patterns.insert(ctx); - - LLVM_DEBUG(llvm::dbgs() << "Before state validation: " << func << '\n'); - - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) - signalPassFailure(); - - // Delete runtime function definitions. - if (func.getBody().empty() && isRuntimeStateCallName(func.getName())) - toErase.push_back(func); - - LLVM_DEBUG(llvm::dbgs() << "After state validation: " << func << '\n'); - } - - for (auto *op : toErase) - op->erase(); - } -}; - -} // namespace diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index c467811a666..a9053411fac 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -437,6 +437,9 @@ class BaseRemoteRESTQPU : public cudaq::QPU { mlir::PassManager pm(&context); if (!rawArgs.empty()) { cudaq::info("Run Argument Synth.\n"); + // For quantum hardware, we collect substitutions for the + // whole call tree of states, which are treated as calls to + // the kernels and their arguments that produced the state. opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); auto [kernels, substs] = argCon.collectAllSubstitutions(); @@ -446,10 +449,11 @@ class BaseRemoteRESTQPU : public cudaq::QPU { kernels.end()}, mlir::SmallVector{substs.begin(), substs.end()})); - pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); - pm.addNestedPass(opt::createStateInitialization()); - pm.addPass(opt::createStateValidation()); + pm.addNestedPass( + opt::createReplaceStateWithKernel()); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::createSymbolDCEPass()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); diff --git a/test/Quake/state_init.qke b/test/Quake/replace_state_with_kernel.qke similarity index 96% rename from test/Quake/state_init.qke rename to test/Quake/replace_state_with_kernel.qke index 9f43a965a4f..70b04e31030 100644 --- a/test/Quake/state_init.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -6,7 +6,7 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt -state-initialization -canonicalize %s | FileCheck %s +// RUN: cudaq-opt -replace-state-with-kernel -canonicalize %s | FileCheck %s module { func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { From 9f0937fcb022663cf1e94216e7acb9bd7c429572 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 21 Oct 2024 15:41:40 -0700 Subject: [PATCH 10/54] Format Signed-off-by: Anna Gringauze --- .../Transforms/ReplaceStateWithKernel.cpp | 37 +++++++++++-------- runtime/common/BaseRemoteRESTQPU.h | 2 +- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index d588f092167..5300f574154 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -67,13 +67,13 @@ static bool isNumberOfQubitsCall(Operation *op) { /// %5 = call @callee.modified_0() : () -> !quake.veq /// ``` // clang-format on -class ReplaceStateWithKernelPattern : public OpRewritePattern { +class ReplaceStateWithKernelPattern + : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(quake::InitializeStateOp initState, PatternRewriter &rewriter) const override { - //auto loc = initState.getLoc(); auto *alloca = initState.getOperand(0).getDefiningOp(); auto stateOp = initState.getOperand(1); @@ -87,30 +87,35 @@ class ReplaceStateWithKernelPattern : public OpRewritePattern()) { calleeNameOp = cast.getOperand(); - if (auto literal = - calleeNameOp.getDefiningOp()) { + if (auto literal = + calleeNameOp + .getDefiningOp()) { auto calleeName = literal.getStringLiteral(); - rewriter.replaceOpWithNewOp(initState, initState.getType(), calleeName, - mlir::ValueRange{}); + rewriter.replaceOpWithNewOp( + initState, initState.getType(), calleeName, + mlir::ValueRange{}); - if (alloca->getUses().empty()) + if (alloca->getUses().empty()) rewriter.eraseOp(alloca); - else { - alloca->emitError("Failed to remove `quake.alloca` in state synthesis"); + else { + alloca->emitError( + "Failed to remove `quake.alloca` in state synthesis"); return failure(); } if (isNumberOfQubitsCall(numOfQubits)) { if (numOfQubits->getUses().empty()) rewriter.eraseOp(numOfQubits); - else { - numOfQubits->emitError("Failed to remove runtime call to get number of qubits in state synthesis"); + else { + numOfQubits->emitError("Failed to remove runtime call to get " + "number of qubits in state synthesis"); return failure(); } } if (getState->getUses().empty()) rewriter.eraseOp(getState); - else { - alloca->emitError("Failed to remove runtime call to get state in state synthesis"); + else { + alloca->emitError("Failed to remove runtime call to get state " + "in state synthesis"); return failure(); } return success(); @@ -135,13 +140,15 @@ class ReplaceStateWithKernelPass RewritePatternSet patterns(ctx); patterns.insert(ctx); - LLVM_DEBUG(llvm::dbgs() << "Before replace state with kernel: " << func << '\n'); + LLVM_DEBUG(llvm::dbgs() + << "Before replace state with kernel: " << func << '\n'); if (failed(applyPatternsAndFoldGreedily(func.getOperation(), std::move(patterns)))) signalPassFailure(); - LLVM_DEBUG(llvm::dbgs() << "After replace state with kerenl: " << func << '\n'); + LLVM_DEBUG(llvm::dbgs() + << "After replace state with kerenl: " << func << '\n'); } }; } // namespace diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index cd57a245d60..2253b4a996a 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -411,7 +411,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { } // Add any global symbols, including global constant arrays. // Global constant arrays can be created during compilation, - // `lift-array-alloc`, `argument-synthesis`, `quake-synthesizer`, + // `lift-array-alloc`, `argument-synthesis`, `quake-synthesizer`, // and `get-concrete-matrix`passes. if (auto globalOp = dyn_cast(op)) moduleOp.push_back(globalOp.clone()); From 2f3a62327293e5c79b49c2249ecdf241467e6d9b Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 09:54:47 -0700 Subject: [PATCH 11/54] Fix failing test Signed-off-by: Anna Gringauze --- targettests/execution/qvector_init_from_state.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index 06c97b1e6a3..681e42eee07 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -108,8 +108,8 @@ int main() { } // clang-format off // CHECK: Passing large state from another kernel as argument (kernel mode) -// CHECK: 01111111111111 -// CHECK: 11111111111111 +// CHECK: 01111 +// CHECK: 11111 // clang-format on { From b3813503b148b98f4d7d074075a6a7496b1082c9 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 09:56:28 -0700 Subject: [PATCH 12/54] Format Signed-off-by: Anna Gringauze --- targettests/execution/qvector_init_from_state.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index 681e42eee07..d75a7e30d8d 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -109,7 +109,7 @@ int main() { // clang-format off // CHECK: Passing large state from another kernel as argument (kernel mode) // CHECK: 01111 -// CHECK: 11111 +// CHECK: 111111 // clang-format on { From dc87ca4c9b31d7d1037c5f103adc58a353822135 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 09:57:02 -0700 Subject: [PATCH 13/54] Format Signed-off-by: Anna Gringauze --- targettests/execution/qvector_init_from_state.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index d75a7e30d8d..681e42eee07 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -109,7 +109,7 @@ int main() { // clang-format off // CHECK: Passing large state from another kernel as argument (kernel mode) // CHECK: 01111 -// CHECK: 111111 +// CHECK: 11111 // clang-format on { From 53a34c97759a619a9298523705392412a2fc7974 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 14:46:03 -0700 Subject: [PATCH 14/54] Replaced getState intrinsic by cc.get_state op Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Builder/Intrinsics.h | 4 - include/cudaq/Optimizer/Dialect/CC/CCOps.td | 20 +++++ lib/Optimizer/Builder/Intrinsics.cpp | 4 - .../Transforms/ReplaceStateWithKernel.cpp | 77 +++++++------------ runtime/common/ArgumentConversion.cpp | 21 +---- runtime/common/BaseRemoteRESTQPU.h | 1 - runtime/test/test_argument_conversion.cpp | 22 ++---- test/Quake/replace_state_with_kernel.qke | 15 ++-- 8 files changed, 63 insertions(+), 101 deletions(-) diff --git a/include/cudaq/Optimizer/Builder/Intrinsics.h b/include/cudaq/Optimizer/Builder/Intrinsics.h index d545a576025..fa9ce53097f 100644 --- a/include/cudaq/Optimizer/Builder/Intrinsics.h +++ b/include/cudaq/Optimizer/Builder/Intrinsics.h @@ -55,10 +55,6 @@ static constexpr const char createCudaqStateFromDataFP32[] = // Delete a state created by the runtime functions above. static constexpr const char deleteCudaqState[] = "__nvqpp_cudaq_state_delete"; -// Get state of a kernel (placeholder function, calls are always replaced in -// opts) -static constexpr const char getCudaqState[] = "__nvqpp_cudaq_state_get"; - /// Builder for lowering the clang AST to an IR for CUDA-Q. Lowering includes /// the transformation of both quantum and classical computation. Different /// features of the CUDA-Q programming model are lowered into different dialects diff --git a/include/cudaq/Optimizer/Dialect/CC/CCOps.td b/include/cudaq/Optimizer/Dialect/CC/CCOps.td index a58e3d403d6..cda02c7a23a 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCOps.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCOps.td @@ -898,6 +898,26 @@ def cc_AddressOfOp : CCOp<"address_of", [Pure, }]; } +def cc_GetStateOp : CCOp<"get_state", [Pure] > { + let summary = "Get state from kernel with the provided name."; + let description = [{ + This operation is created by argument synthesis of state pointer arguments + for quantum devices. It takes a kernel name as ASCIIZ string literal value + and returns the kernel's quantum state. The operation is replaced by a call + to the kernel with the provided name in ReplaceStateByKernel pass. + + ```mlir + %0 = cc.get_state "callee" : !cc.ptr + ``` + }]; + + let arguments = (ins StrAttr:$calleeName); + let results = (outs cc_PointerType:$result); + let assemblyFormat = [{ + $calleeName `:` qualified(type(results)) attr-dict + }]; +} + def cc_GlobalOp : CCOp<"global", [IsolatedFromAbove, Symbol]> { let summary = "Create a global constant or variable"; let description = [{ diff --git a/lib/Optimizer/Builder/Intrinsics.cpp b/lib/Optimizer/Builder/Intrinsics.cpp index e0ed794264f..315743f057d 100644 --- a/lib/Optimizer/Builder/Intrinsics.cpp +++ b/lib/Optimizer/Builder/Intrinsics.cpp @@ -269,10 +269,6 @@ static constexpr IntrinsicCode intrinsicTable[] = { {cudaq::deleteCudaqState, {}, R"#( func.func private @__nvqpp_cudaq_state_delete(%p : !cc.ptr) -> () - )#"}, - - {cudaq::getCudaqState, {}, R"#( - func.func private @__nvqpp_cudaq_state_get(%p : !cc.ptr) -> !cc.ptr )#"}, {cudaq::getNumQubitsFromCudaqState, {}, R"#( diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index 5300f574154..80907bfec1d 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -43,10 +43,6 @@ static bool isCall(Operation *op, std::vector &&names) { return false; } -static bool isGetStateCall(Operation *op) { - return isCall(op, {cudaq::getCudaqState}); -} - static bool isNumberOfQubitsCall(Operation *op) { return isCall(op, {cudaq::getNumQubitsFromCudaqState}); } @@ -56,12 +52,10 @@ static bool isNumberOfQubitsCall(Operation *op) { /// the state. /// /// ``` -/// %0 = cc.string_literal "callee.modified_0" : !cc.ptr> -/// %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr -/// %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr -/// %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 -/// %4 = quake.alloca !quake.veq[%3 : i64] -/// %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq +/// %0 = cc.get_state "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr +/// %1 = call @__nvqpp_cudaq_state_numberOfQubits(%0) : (!cc.ptr) -> i64 +/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq /// ─────────────────────────────────────────── /// ... /// %5 = call @callee.modified_0() : () -> !quake.veq @@ -79,49 +73,34 @@ class ReplaceStateWithKernelPattern if (auto ptrTy = dyn_cast(stateOp.getType())) { if (isa(ptrTy.getElementType())) { - auto *getState = stateOp.getDefiningOp(); auto *numOfQubits = alloca->getOperand(0).getDefiningOp(); - - if (isGetStateCall(getState)) { - auto calleeNameOp = getState->getOperand(0); - if (auto cast = calleeNameOp.getDefiningOp()) { - calleeNameOp = cast.getOperand(); - - if (auto literal = - calleeNameOp - .getDefiningOp()) { - auto calleeName = literal.getStringLiteral(); - rewriter.replaceOpWithNewOp( - initState, initState.getType(), calleeName, - mlir::ValueRange{}); - - if (alloca->getUses().empty()) - rewriter.eraseOp(alloca); - else { - alloca->emitError( - "Failed to remove `quake.alloca` in state synthesis"); - return failure(); - } - if (isNumberOfQubitsCall(numOfQubits)) { - if (numOfQubits->getUses().empty()) - rewriter.eraseOp(numOfQubits); - else { - numOfQubits->emitError("Failed to remove runtime call to get " - "number of qubits in state synthesis"); - return failure(); - } - } - if (getState->getUses().empty()) - rewriter.eraseOp(getState); - else { - alloca->emitError("Failed to remove runtime call to get state " - "in state synthesis"); - return failure(); - } - return success(); + stateOp.getDefiningOp()->dump(); + + if (auto getState = stateOp.getDefiningOp()) { + auto calleeName = getState.getCalleeName(); + rewriter.replaceOpWithNewOp( + initState, initState.getType(), calleeName, mlir::ValueRange{}); + + if (alloca->getUses().empty()) + rewriter.eraseOp(alloca); + else { + alloca->emitError( + "Failed to remove `quake.alloca` in state synthesis"); + return failure(); + } + if (isNumberOfQubitsCall(numOfQubits)) { + if (numOfQubits->getUses().empty()) + rewriter.eraseOp(numOfQubits); + else { + numOfQubits->emitError("Failed to remove runtime call to get " + "number of qubits in state synthesis"); + return failure(); } } + return success(); } + numOfQubits->emitError( + "Failed to replace `quake.init_state` in state synthesis"); } } return failure(); diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 42b228dd3bf..c548d235238 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -243,27 +243,10 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, converter.genCallee(modifiedCalleeName, calleeArgs); // Create a subst for state pointer. - auto strLitTy = cudaq::cc::PointerType::get( - cudaq::cc::ArrayType::get(builder.getContext(), builder.getI8Type(), - modifiedCalleeKernelName.size() + 1)); - auto callee = builder.create( - loc, strLitTy, builder.getStringAttr(modifiedCalleeKernelName)); - - auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - auto calleeCast = builder.create(loc, i8PtrTy, callee); - - cudaq::IRBuilder irBuilder(ctx); - auto result = irBuilder.loadIntrinsic(substMod, cudaq::getCudaqState); - assert(succeeded(result) && "loading intrinsic should never fail"); - auto statePtrTy = cudaq::cc::PointerType::get(cudaq::cc::StateType::get(ctx)); - auto statePtr = - builder - .create(loc, statePtrTy, cudaq::getCudaqState, - ValueRange{calleeCast}) - .getResult(0); - return builder.create(loc, statePtrTy, statePtr); + return builder.create( + loc, statePtrTy, builder.getStringAttr(modifiedCalleeKernelName)); } TODO("cudaq::state* argument synthesis for quantum hardware for c functions"); diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 2c8654d540c..0421cde8774 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -458,7 +458,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { pm.addPass(opt::createDeleteStates()); pm.addNestedPass( opt::createReplaceStateWithKernel()); - pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(mlir::createSymbolDCEPass()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 9fe3d92f8fb..93939125c1b 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -380,13 +380,10 @@ void test_state(mlir::MLIRContext *ctx) { // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 -// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> -// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr -// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr +// CHECK: %[[VAL_1:.*]] = arith.constant 8 : i64 +// CHECK: %[[VAL_2:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr x 8>>) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_2]], %[[VAL_1]]) : (!cc.ptr, i64) -> !cc.ptr +// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr) -> !cc.ptr // CHECK: } // CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> // CHECK-DAG: func.func private @__nvqpp_cudaq_state_createFromData_fp64(!cc.ptr, i64) -> !cc.ptr @@ -490,13 +487,10 @@ void test_combinations(mlir::MLIRContext *ctx) { // CHECK: } // CHECK-LABEL: cc.arg_subst[1] { // CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 -// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> -// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr -// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr +// CHECK: %[[VAL_1:.*]] = arith.constant 8 : i64 +// CHECK: %[[VAL_2:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr x 8>>) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_2]], %[[VAL_1]]) : (!cc.ptr, i64) -> !cc.ptr +// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr) -> !cc.ptr // CHECK: } // CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> // CHECK-DAG: func.func private @__nvqpp_cudaq_state_createFromData_fp64(!cc.ptr, i64) -> !cc.ptr diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 70b04e31030..751e29775a9 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -10,18 +10,13 @@ module { func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %0 = cc.string_literal "callee.modified_0" : !cc.ptr> - %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr - %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr - %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 - %4 = quake.alloca !quake.veq[%3 : i64] - %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq - return + %0 = cc.get_state "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr + %1 = call @__nvqpp_cudaq_state_numberOfQubits(%0) : (!cc.ptr) -> i64 + %2 = quake.alloca !quake.veq[%1 : i64] + %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq +return } - func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 - func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr - func.func private @callee.modified_0() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { %cst = arith.constant 1.5707963267948966 : f64 %0 = quake.alloca !quake.veq<2> From fe6d409ec21b0f72016690213dd5a3781d9c53cc Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 14:47:59 -0700 Subject: [PATCH 15/54] Remove print Signed-off-by: Anna Gringauze --- lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index 80907bfec1d..bdc18982840 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -74,7 +74,6 @@ class ReplaceStateWithKernelPattern if (auto ptrTy = dyn_cast(stateOp.getType())) { if (isa(ptrTy.getElementType())) { auto *numOfQubits = alloca->getOperand(0).getDefiningOp(); - stateOp.getDefiningOp()->dump(); if (auto getState = stateOp.getDefiningOp()) { auto calleeName = getState.getCalleeName(); From 48704e3bcb648043ba9c1ccd7ecd056d620e88e6 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 14:50:08 -0700 Subject: [PATCH 16/54] Remove getCudaqState references Signed-off-by: Anna Gringauze --- lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp | 3 +-- runtime/common/BaseRemoteRESTQPU.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp index 04eac5b06f7..4de20fd7bef 100644 --- a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp +++ b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp @@ -49,8 +49,7 @@ struct VerifyNVQIRCallOpsPass cudaq::getNumQubitsFromCudaqState, cudaq::createCudaqStateFromDataFP32, cudaq::createCudaqStateFromDataFP64, - cudaq::deleteCudaqState, - cudaq::getCudaqState}; + cudaq::deleteCudaqState}; // It must be either NVQIR extension functions or in the allowed list. return std::find(NVQIR_FUNCS.begin(), NVQIR_FUNCS.end(), functionName) != NVQIR_FUNCS.end() || diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 0421cde8774..0d9a5ddbc96 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -408,8 +408,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { (funcOp.getName().equals(cudaq::getNumQubitsFromCudaqState) || funcOp.getName().equals(cudaq::createCudaqStateFromDataFP64) || funcOp.getName().equals(cudaq::createCudaqStateFromDataFP32) || - funcOp.getName().equals(cudaq::deleteCudaqState) || - funcOp.getName().equals(cudaq::getCudaqState))) + funcOp.getName().equals(cudaq::deleteCudaqState))) moduleOp.push_back(funcOp.clone()); } // Add any global symbols, including global constant arrays. From 137f621febc0c607dbea69d25eba70e7bcb696ca Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 15:01:25 -0700 Subject: [PATCH 17/54] Minor updates Signed-off-by: Anna Gringauze --- runtime/common/BaseRemoteRESTQPU.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 0d9a5ddbc96..5cf89c0332f 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -403,18 +403,21 @@ class BaseRemoteRESTQPU : public cudaq::QPU { for (auto &op : m_module.getOps()) { if (auto funcOp = dyn_cast(op)) { // Add function definitions for runtime functions that must - // be removed after synthesis in cleanup ops. + // be removed after synthesis in cleanup passes. + static const std::vector stateFuncs = { + cudaq::getNumQubitsFromCudaqState, + cudaq::createCudaqStateFromDataFP32, + cudaq::createCudaqStateFromDataFP64}; + if (funcOp.getBody().empty() && - (funcOp.getName().equals(cudaq::getNumQubitsFromCudaqState) || - funcOp.getName().equals(cudaq::createCudaqStateFromDataFP64) || - funcOp.getName().equals(cudaq::createCudaqStateFromDataFP32) || - funcOp.getName().equals(cudaq::deleteCudaqState))) + std::find(stateFuncs.begin(), stateFuncs.end(), funcOp.getName()) != + stateFuncs.end()) moduleOp.push_back(funcOp.clone()); } // Add any global symbols, including global constant arrays. // Global constant arrays can be created during compilation, // `lift-array-alloc`, `argument-synthesis`, `quake-synthesizer`, - // and `get-concrete-matrix`passes. + // and `get-concrete-matrix` passes. if (auto globalOp = dyn_cast(op)) moduleOp.push_back(globalOp.clone()); } From ad7c6bcd26a521f4401e4b46e97e09795a4f6333 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 18:05:49 -0700 Subject: [PATCH 18/54] Fix failing quake test Signed-off-by: Anna Gringauze --- test/Quake/replace_state_with_kernel.qke | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 751e29775a9..09570c62907 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -10,7 +10,7 @@ module { func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %0 = cc.get_state "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr + %0 = cc.get_state "callee.modified_0" : !cc.ptr %1 = call @__nvqpp_cudaq_state_numberOfQubits(%0) : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq From 78c0a4423cfa5070082a405925420ed0d8f52484 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 4 Nov 2024 16:02:06 -0800 Subject: [PATCH 19/54] Add a few state-related cc ops Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Dialect/CC/CCOps.td | 62 ++++++++++ include/cudaq/Optimizer/Transforms/Passes.td | 5 +- lib/Frontend/nvqpp/ConvertExpr.cpp | 13 +-- lib/Optimizer/CodeGen/QuakeToCodegen.cpp | 68 ++++++++++- lib/Optimizer/Transforms/DeleteStates.cpp | 112 +++++++------------ python/cudaq/kernel/ast_bridge.py | 6 +- python/cudaq/kernel/kernel_builder.py | 10 +- runtime/common/ArgumentConversion.cpp | 23 +--- runtime/cudaq/builder/kernel_builder.cpp | 11 +- runtime/test/test_argument_conversion.cpp | 20 +--- test/AST-Quake/qalloc_state.cpp | 9 +- test/Quake/delete_states.qke | 61 ++++------ 12 files changed, 218 insertions(+), 182 deletions(-) diff --git a/include/cudaq/Optimizer/Dialect/CC/CCOps.td b/include/cudaq/Optimizer/Dialect/CC/CCOps.td index a58e3d403d6..d58fc6bc335 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCOps.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCOps.td @@ -898,6 +898,68 @@ def cc_AddressOfOp : CCOp<"address_of", [Pure, }]; } +def cc_CreateStateOp : CCOp<"create_state", [Pure] > { + let summary = "Create state from data"; + let description = [{ + This operation takes a pointer to state data and creates a quantum state. + The operation can be optimized away in DeleteStates pass, or replaced + by an intrinsic runtime call on simulators. + + ```mlir + %0 = cc.create_state %data: !cc.ptr + ``` + }]; + + let arguments = (ins + AnyPointerType:$data, + AnySignlessInteger:$length + ); + let results = (outs AnyPointerType:$result); + let assemblyFormat = [{ + $data `,` $length `:` functional-type(operands, results) attr-dict + }]; +} + +def cc_GetNumberOfQubitsOp : CCOp<"get_number_of_qubits", [Pure] > { + let summary = "Get number of qubits from a quantum state"; + let description = [{ + This operation takes a state pointer argument and returns a number of + qubits in the state. The operation can be optimized away in some passes + line ReplaceStateByKernel or DeleteStates, or replaced by an intrinsic + runtime call on simulators. + + ```mlir + %0 = cc.get_number_of_qubits %state : i64 + ``` + }]; + + let arguments = (ins cc_PointerType:$state); + let results = (outs AnySignlessInteger:$result); + let assemblyFormat = [{ + $state `:` functional-type(operands, results) attr-dict + }]; +} + +def cc_GetStateOp : CCOp<"get_state", [Pure] > { + let summary = "Get state from kernel with the provided name."; + let description = [{ + This operation is created by argument synthesis of state pointer arguments + for quantum devices. It takes a kernel name as ASCIIZ string literal value + and returns the kernel's quantum state. The operation is replaced by a call + to the kernel with the provided name in ReplaceStateByKernel pass. + + ```mlir + %0 = cc.get_state "callee" : !cc.ptr + ``` + }]; + + let arguments = (ins StrAttr:$calleeName); + let results = (outs cc_PointerType:$result); + let assemblyFormat = [{ + $calleeName `:` qualified(type(results)) attr-dict + }]; +} + def cc_GlobalOp : CCOp<"global", [IsolatedFromAbove, Symbol]> { let summary = "Create a global constant or variable"; let description = [{ diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index da6f3163b3e..04964037c18 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -752,9 +752,8 @@ def DeleteStates : Pass<"delete-states", "mlir::ModuleOp"> { func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %c8_i64 = arith.constant 8 : i64 %0 = cc.address_of @foo.rodata_synth_0 : !cc.ptr x 8>> - %3 = cc.cast %0 : (!cc.ptr x 8>>) -> !cc.ptr - %4 = call @__nvqpp_cudaq_state_createFromData_fp32(%3, %c8_i64) : (!cc.ptr, i64) -> !cc.ptr - %5 = call @__nvqpp_cudaq_state_numberOfQubits(%4) : (!cc.ptr) -> i64 + %4 = cc.create_state %3, %c8_i64 : (!cc.ptr x 8>>, i64) -> !cc.ptr + %5 = cc.get_number_of_qubits %4 : (!cc.ptr) -> i64 %6 = quake.alloca !quake.veq[%5 : i64] %7 = quake.init_state %6, %4 : (!quake.veq, !cc.ptr) -> !quake.veq diff --git a/lib/Frontend/nvqpp/ConvertExpr.cpp b/lib/Frontend/nvqpp/ConvertExpr.cpp index e6350d1c5c1..fa0fd326f10 100644 --- a/lib/Frontend/nvqpp/ConvertExpr.cpp +++ b/lib/Frontend/nvqpp/ConvertExpr.cpp @@ -2694,19 +2694,12 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { initials = load.getPtrvalue(); } if (isStateType(initials.getType())) { - IRBuilder irBuilder(builder.getContext()); - auto mod = - builder.getBlock()->getParentOp()->getParentOfType(); - auto result = - irBuilder.loadIntrinsic(mod, getNumQubitsFromCudaqState); - assert(succeeded(result) && "loading intrinsic should never fail"); Value state = initials; auto i64Ty = builder.getI64Type(); - auto numQubits = builder.create( - loc, i64Ty, getNumQubitsFromCudaqState, ValueRange{state}); + auto numQubits = + builder.create(loc, i64Ty, state); auto veqTy = quake::VeqType::getUnsized(ctx); - Value alloc = builder.create(loc, veqTy, - numQubits.getResult(0)); + Value alloc = builder.create(loc, veqTy, numQubits); return pushValue(builder.create( loc, veqTy, alloc, state)); } diff --git a/lib/Optimizer/CodeGen/QuakeToCodegen.cpp b/lib/Optimizer/CodeGen/QuakeToCodegen.cpp index e9e56f8f5fe..6e913a2bec2 100644 --- a/lib/Optimizer/CodeGen/QuakeToCodegen.cpp +++ b/lib/Optimizer/CodeGen/QuakeToCodegen.cpp @@ -8,6 +8,9 @@ #include "QuakeToCodegen.h" #include "CodeGenOps.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/CodeGen/Passes.h" +#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" @@ -62,10 +65,73 @@ class ExpandComplexCast : public OpRewritePattern { return success(); } }; + +class CreateStateOpPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(cudaq::cc::CreateStateOp createStateOp, + PatternRewriter &rewriter) const override { + auto module = createStateOp->getParentOfType(); + auto loc = createStateOp.getLoc(); + auto ctx = createStateOp.getContext(); + auto buffer = createStateOp.getOperand(0); + auto size = createStateOp.getOperand(1); + + auto bufferTy = buffer.getType(); + auto ptrTy = cast(bufferTy); + auto arrTy = cast(ptrTy.getElementType()); + auto eleTy = arrTy.getElementType(); + auto is64Bit = isa(eleTy); + + if (auto cTy = dyn_cast(eleTy)) + is64Bit = isa(eleTy); + + auto createStateFunc = is64Bit ? cudaq::createCudaqStateFromDataFP64 + : cudaq::createCudaqStateFromDataFP32; + cudaq::IRBuilder irBuilder(ctx); + auto result = irBuilder.loadIntrinsic(module, createStateFunc); + assert(succeeded(result) && "loading intrinsic should never fail"); + + auto stateTy = cudaq::cc::StateType::get(ctx); + auto statePtrTy = cudaq::cc::PointerType::get(stateTy); + auto i8PtrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); + auto cast = rewriter.create(loc, i8PtrTy, buffer); + + rewriter.replaceOpWithNewOp( + createStateOp, statePtrTy, createStateFunc, ValueRange{cast, size}); + return success(); + } +}; + +class GetNumberOfQubitsOpPattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(cudaq::cc::GetNumberOfQubitsOp getNumQubitsOp, + PatternRewriter &rewriter) const override { + auto module = getNumQubitsOp->getParentOfType(); + auto ctx = getNumQubitsOp.getContext(); + auto state = getNumQubitsOp.getOperand(); + + cudaq::IRBuilder irBuilder(ctx); + auto result = + irBuilder.loadIntrinsic(module, cudaq::getNumQubitsFromCudaqState); + assert(succeeded(result) && "loading intrinsic should never fail"); + + rewriter.replaceOpWithNewOp( + getNumQubitsOp, rewriter.getI64Type(), + cudaq::getNumQubitsFromCudaqState, state); + return success(); + } +}; + } // namespace void cudaq::codegen::populateQuakeToCodegenPatterns( mlir::RewritePatternSet &patterns) { auto *ctx = patterns.getContext(); - patterns.insert(ctx); + patterns.insert(ctx); } diff --git a/lib/Optimizer/Transforms/DeleteStates.cpp b/lib/Optimizer/Transforms/DeleteStates.cpp index 7cc7bca0444..74b3a432c23 100644 --- a/lib/Optimizer/Transforms/DeleteStates.cpp +++ b/lib/Optimizer/Transforms/DeleteStates.cpp @@ -29,104 +29,79 @@ namespace cudaq::opt { using namespace mlir; namespace { - -static bool isCall(Operation *callOp, std::vector &&names) { - if (callOp) { - if (auto createStateCall = dyn_cast(callOp)) { - if (auto calleeAttr = createStateCall.getCalleeAttr()) { - auto funcName = calleeAttr.getValue().str(); - if (std::find(names.begin(), names.end(), funcName) != names.end()) - return true; - } - } - } - return false; -} - -static bool isCreateStateCall(Operation *callOp) { - return isCall(callOp, {cudaq::createCudaqStateFromDataFP64, - cudaq::createCudaqStateFromDataFP32}); -} - -static bool isNumberOfQubitsCall(Operation *callOp) { - return isCall(callOp, {cudaq::getNumQubitsFromCudaqState}); -} - -/// For a call to `__nvqpp_cudaq_state_createFromData_fpXX`, get the number of -/// qubits allocated. -static std::size_t getStateSize(Operation *callOp) { - if (isCreateStateCall(callOp)) { - if (auto createStateCall = dyn_cast(callOp)) { - auto sizeOperand = createStateCall.getOperand(1); - auto defOp = sizeOperand.getDefiningOp(); - while (defOp && !dyn_cast(defOp)) - defOp = defOp->getOperand(0).getDefiningOp(); - if (auto constOp = dyn_cast(defOp)) - return constOp.getValue().cast().getInt(); - } +/// For a `cc:CreateStateOp`, get the number of qubits allocated. +static std::size_t getStateSize(Operation *op) { + if (auto createStateOp = dyn_cast(op)) { + auto sizeOperand = createStateOp.getOperand(1); + auto defOp = sizeOperand.getDefiningOp(); + while (defOp && !dyn_cast(defOp)) + defOp = defOp->getOperand(0).getDefiningOp(); + if (auto constOp = dyn_cast(defOp)) + return constOp.getValue().cast().getInt(); } - callOp->emitError("Cannot compute number of qubits"); + op->emitError("Cannot compute number of qubits from createStateOp"); return 0; } // clang-format off -/// Remove `__nvqpp_cudaq_state_numberOfQubits` calls. +/// Replace `cc.get_number_of_qubits` by a constant. /// ``` -/// %1 = arith.constant 8 : i64 -/// %2 = call @__nvqpp_cudaq_state_createFromData_fp32(%0, %1) : (!cc.ptr, i64) -> !cc.ptr -/// %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 +/// %c8_i64 = arith.constant 8 : i64 +/// %2 = cc.create_state %3, %c8_i64 : (!cc.ptr, i64) -> !cc.ptr +/// %3 = cc.get_number_of_qubits %2 : i64 /// ... /// ─────────────────────────────────────────── -/// %1 = arith.constant 8 : i64 -/// %2 = call @__nvqpp_cudaq_state_createFromData_fp32(%0, %1) : (!cc.ptr, i64) -> !cc.ptr -/// %5 = arith.constant 3 : i64 +/// %c8_i64 = arith.constant 8 : i64 +/// %2 = cc.create_state %3, %c8_i64 : (!cc.ptr, i64) -> !cc.ptr +/// %3 = arith.constant 3 : i64 /// ``` // clang-format on -class NumberOfQubitsPattern : public OpRewritePattern { +class NumberOfQubitsPattern + : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(func::CallOp callOp, + LogicalResult matchAndRewrite(cudaq::cc::GetNumberOfQubitsOp op, PatternRewriter &rewriter) const override { - if (isNumberOfQubitsCall(callOp)) { - auto createStateOp = callOp.getOperand(0).getDefiningOp(); - if (isCreateStateCall(createStateOp)) { - auto size = getStateSize(createStateOp); - rewriter.replaceOpWithNewOp( - callOp, std::countr_zero(size), rewriter.getI64Type()); - return success(); - } + auto stateOp = op.getOperand(); + if (auto createStateOp = + stateOp.getDefiningOp()) { + auto size = getStateSize(createStateOp); + rewriter.replaceOpWithNewOp( + op, std::countr_zero(size), rewriter.getI64Type()); + return success(); } return failure(); } }; // clang-format off -/// Replace calls to `__nvqpp_cudaq_state_numberOfQubits` by a constant. +/// Remove `cc.create_state` instructions and pass their data directly to +/// the `quake.state_init` instruction instead. /// ``` /// %2 = cc.cast %1 : (!cc.ptr x 8>>) -> !cc.ptr -/// %3 = call @__nvqpp_cudaq_state_createFromData_fp32(%2, %c8_i64) : (!cc.ptr, i64) -> !cc.ptr +/// %3 = cc.create_state %3, %c8_i64 : (!cc.ptr, i64) -> !cc.ptr /// %4 = quake.alloca !quake.veq[%0 : i64] /// %5 = quake.init_state %4, %3 : (!quake.veq, !cc.ptr) -> !quake.veq /// ─────────────────────────────────────────── /// ... -/// %3 = call @__nvqpp_cudaq_state_createFromData_fp32(%2, %c8_i64) : (!cc.ptr, i64) -> !cc.ptr /// %4 = quake.alloca !quake.veq[%0 : i64] /// %5 = quake.init_state %4, %1 : (!quake.veq, !cc.ptr x 8>>) -> !quake.veq /// ``` // clang-format on + class StateToDataPattern : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(quake::InitializeStateOp initState, PatternRewriter &rewriter) const override { - auto stateOp = initState.getOperand(1).getDefiningOp(); + auto state = initState.getOperand(1); auto targets = initState.getTargets(); - if (isCreateStateCall(stateOp)) { - auto dataOp = stateOp->getOperand(0); - if (auto cast = dyn_cast(dataOp.getDefiningOp())) + if (auto createStateOp = state.getDefiningOp()) { + auto dataOp = createStateOp->getOperand(0); + if (auto cast = dataOp.getDefiningOp()) dataOp = cast.getOperand(); rewriter.replaceOpWithNewOp( initState, targets.getType(), targets, dataOp); @@ -163,10 +138,8 @@ class DeleteStatesPass llvm::SmallVector usedStates; func.walk([&](Operation *op) { - if (isCreateStateCall(op)) { - if (op->getUses().empty()) - op->erase(); - else + if (isa(op)) { + if (!op->getUses().empty()) usedStates.push_back(op); } }); @@ -178,15 +151,16 @@ class DeleteStatesPass func.walk([&](Operation *op) { if (isa(op)) { auto loc = op->getLoc(); - auto deleteState = cudaq::deleteCudaqState; - auto result = irBuilder.loadIntrinsic(module, deleteState); + auto result = + irBuilder.loadIntrinsic(module, cudaq::deleteCudaqState); assert(succeeded(result) && "loading intrinsic should never fail"); builder.setInsertionPoint(op); for (auto createStateOp : usedStates) { - auto results = cast(createStateOp).getResults(); - builder.create(loc, std::nullopt, deleteState, - results); + auto result = cast(createStateOp); + builder.create(loc, std::nullopt, + cudaq::deleteCudaqState, + mlir::ValueRange{result}); } } }); diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py index cae278143f5..13d694d7bca 100644 --- a/python/cudaq/kernel/ast_bridge.py +++ b/python/cudaq/kernel/ast_bridge.py @@ -2246,11 +2246,9 @@ def bodyBuilder(iterVal): # handle `cudaq.qvector(state)` statePtr = self.ifNotPointerThenStore(valueOrPtr) - symName = '__nvqpp_cudaq_state_numberOfQubits' - load_intrinsic(self.module, symName) i64Ty = self.getIntegerType() - numQubits = func.CallOp([i64Ty], symName, - [statePtr]).result + numQubits = cc.GetNumberOfQubitsOp(i64Ty, + statePtr).result veqTy = quake.VeqType.get(self.ctx) qubits = quake.AllocaOp(veqTy, size=numQubits).result diff --git a/python/cudaq/kernel/kernel_builder.py b/python/cudaq/kernel/kernel_builder.py index e8d6345ffbc..9f528acfeea 100644 --- a/python/cudaq/kernel/kernel_builder.py +++ b/python/cudaq/kernel/kernel_builder.py @@ -777,10 +777,8 @@ def qalloc(self, initializer=None): if isinstance(initializer, cudaq_runtime.State): statePtr = self.capturedDataStorage.storeCudaqState(initializer) - symName = '__nvqpp_cudaq_state_numberOfQubits' - load_intrinsic(self.module, symName) i64Ty = self.getIntegerType() - numQubits = func.CallOp([i64Ty], symName, [statePtr]).result + numQubits = cc.GetNumberOfQubitsOp(i64Ty, statePtr).result veqTy = quake.VeqType.get(self.ctx) qubits = quake.AllocaOp(veqTy, size=numQubits).result @@ -816,11 +814,9 @@ def qalloc(self, initializer=None): if cc.StateType.isinstance(valueTy): statePtr = initializer.mlirValue - symName = '__nvqpp_cudaq_state_numberOfQubits' - load_intrinsic(self.module, symName) i64Ty = self.getIntegerType() - numQubits = func.CallOp([i64Ty], symName, - [statePtr]).result + numQubits = cc.GetNumberOfQubitsOp(i64Ty, + statePtr).result veqTy = quake.VeqType.get(self.ctx) qubits = quake.AllocaOp(veqTy, size=numQubits).result diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 0de2589752f..09ddb9c74bc 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -130,33 +130,18 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, std::string name = kernelName.str() + ".rodata_synth_" + std::to_string(counter++); irBuilder.genVectorOfConstants(loc, substMod, name, vec); - auto conGlobal = builder.create(loc, ptrTy, name); - return builder.create(loc, arrTy, conGlobal); + return builder.create(loc, ptrTy, name); }; - auto conArr = is64Bit ? genConArray.template operator()() + auto buffer = is64Bit ? genConArray.template operator()() : genConArray.template operator()(); - auto createState = is64Bit ? cudaq::createCudaqStateFromDataFP64 - : cudaq::createCudaqStateFromDataFP32; - auto result = irBuilder.loadIntrinsic(substMod, createState); - assert(succeeded(result) && "loading intrinsic should never fail"); - auto arrSize = builder.create(loc, size, 64); auto stateTy = cudaq::cc::StateType::get(ctx); auto statePtrTy = cudaq::cc::PointerType::get(stateTy); - auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - auto buffer = builder.create(loc, arrTy); - builder.create(loc, conArr, buffer); - - auto cast = builder.create(loc, i8PtrTy, buffer); - auto statePtr = builder - .create(loc, statePtrTy, createState, - ValueRange{cast, arrSize}) - .getResult(0); - // TODO: Delete the new state before function exit. - return builder.create(loc, statePtrTy, statePtr); + return builder.create(loc, statePtrTy, buffer, + arrSize); } // The program is executed on quantum hardware, state data is not // available and needs to be regenerated. diff --git a/runtime/cudaq/builder/kernel_builder.cpp b/runtime/cudaq/builder/kernel_builder.cpp index 6961cc547f7..ebf10a6978f 100644 --- a/runtime/cudaq/builder/kernel_builder.cpp +++ b/runtime/cudaq/builder/kernel_builder.cpp @@ -514,16 +514,11 @@ QuakeValue qalloc(ImplicitLocOpBuilder &builder, QuakeValue &sizeOrVec) { auto eleTy = statePtrTy.getElementType(); if (auto stateTy = dyn_cast(eleTy)) { // get the number of qubits - IRBuilder irBuilder(context); - auto mod = builder.getBlock()->getParentOp()->getParentOfType(); - auto result = irBuilder.loadIntrinsic(mod, getNumQubitsFromCudaqState); - assert(succeeded(result) && "loading intrinsic should never fail"); - auto numQubits = builder.create( - builder.getI64Type(), getNumQubitsFromCudaqState, ValueRange{value}); + auto numQubits = builder.create( + builder.getI64Type(), value); // allocate the number of qubits we need auto veqTy = quake::VeqType::getUnsized(context); - Value qubits = - builder.create(veqTy, numQubits.getResult(0)); + Value qubits = builder.create(veqTy, numQubits); // Add the initialize state op qubits = builder.create(qubits.getType(), qubits, value); diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 9fe3d92f8fb..1326ac4d395 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -380,16 +380,10 @@ void test_state(mlir::MLIRContext *ctx) { // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 -// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> -// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr -// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr +// CHECK: %[[VAL_1:.*]] = arith.constant 8 : i64 +// CHECK: %[[VAL_2:.*]] = cc.create_state %[[VAL_0]], %[[VAL_1]] : (!cc.ptr x 8>, i64) -> !cc.ptr // CHECK: } // CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> -// CHECK-DAG: func.func private @__nvqpp_cudaq_state_createFromData_fp64(!cc.ptr, i64) -> !cc.ptr // clang-format on } @@ -490,16 +484,10 @@ void test_combinations(mlir::MLIRContext *ctx) { // CHECK: } // CHECK-LABEL: cc.arg_subst[1] { // CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 -// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> -// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr -// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr +// CHECK: %[[VAL_1:.*]] = arith.constant 8 : i64 +// CHECK: %[[VAL_5:.*]] = cc.create_state %[[VAL_0]], %[[VAL_1]] : (!cc.ptr x 8>>, i64) -> !cc.ptr // CHECK: } // CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> -// CHECK-DAG: func.func private @__nvqpp_cudaq_state_createFromData_fp64(!cc.ptr, i64) -> !cc.ptr // CHECK-LABEL: cc.arg_subst[2] { // CHECK: %[[VAL_0:.*]] = cc.alloca !cc.array // CHECK: %[[VAL_1:.*]] = cc.address_of @cstr.585800 : !cc.ptr> diff --git a/test/AST-Quake/qalloc_state.cpp b/test/AST-Quake/qalloc_state.cpp index 191c9c3a305..822f1e1f567 100644 --- a/test/AST-Quake/qalloc_state.cpp +++ b/test/AST-Quake/qalloc_state.cpp @@ -20,7 +20,7 @@ struct Eins { // CHECK-LABEL: func.func @__nvqpp__mlirgen__Eins( // CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr) -> !cc.stdvec -// CHECK: %[[VAL_3:.*]] = call @__nvqpp_cudaq_state_numberOfQubits(%[[VAL_0]]) : (!cc.ptr) -> i64 +// CHECK: %[[VAL_3:.*]] = cc.get_number_of_qubits %[[VAL_0]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_5:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_6:.*]] = quake.init_state %[[VAL_5]], %[[VAL_0]] : (!quake.veq, !cc.ptr) -> !quake.veq @@ -34,7 +34,7 @@ struct Zwei { // CHECK-LABEL: func.func @__nvqpp__mlirgen__Zwei( // CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr) -> !cc.stdvec -// CHECK: %[[VAL_3:.*]] = call @__nvqpp_cudaq_state_numberOfQubits(%[[VAL_0]]) : (!cc.ptr) -> i64 +// CHECK: %[[VAL_3:.*]] = cc.get_number_of_qubits %[[VAL_0]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_5:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_6:.*]] = quake.init_state %[[VAL_5]], %[[VAL_0]] : (!quake.veq, !cc.ptr) -> !quake.veq @@ -48,7 +48,7 @@ struct Drei { // CHECK-LABEL: func.func @__nvqpp__mlirgen__Drei( // CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr) -> !cc.stdvec -// CHECK: %[[VAL_3:.*]] = call @__nvqpp_cudaq_state_numberOfQubits(%[[VAL_0]]) : (!cc.ptr) -> i64 +// CHECK: %[[VAL_3:.*]] = cc.get_number_of_qubits %[[VAL_0]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_5:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_6:.*]] = quake.init_state %[[VAL_5]], %[[VAL_0]] : (!quake.veq, !cc.ptr) -> !quake.veq @@ -62,8 +62,7 @@ struct Vier { // CHECK-LABEL: func.func @__nvqpp__mlirgen__Vier( // CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr) -> !cc.stdvec -// CHECK: %[[VAL_3:.*]] = call @__nvqpp_cudaq_state_numberOfQubits(%[[VAL_0]]) : (!cc.ptr) -> i64 +// CHECK: %[[VAL_3:.*]] = cc.get_number_of_qubits %[[VAL_0]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_5:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_6:.*]] = quake.init_state %[[VAL_5]], %[[VAL_0]] : (!quake.veq, !cc.ptr) -> !quake.veq -// CHECK: func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 diff --git a/test/Quake/delete_states.qke b/test/Quake/delete_states.qke index caa7cca6218..bc9c3e1d474 100644 --- a/test/Quake/delete_states.qke +++ b/test/Quake/delete_states.qke @@ -12,33 +12,24 @@ module { func.func @__nvqpp__mlirgen__function_test_state_param._Z16test_state_paramPN5cudaq5stateE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %c8_i64 = arith.constant 8 : i64 %0 = cc.address_of @function_test_state_param._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 : !cc.ptr x 8>> - %1 = cc.load %0 : !cc.ptr x 8>> - %2 = cc.alloca !cc.array x 8> - cc.store %1, %2 : !cc.ptr x 8>> - %3 = cc.cast %2 : (!cc.ptr x 8>>) -> !cc.ptr - %4 = call @__nvqpp_cudaq_state_createFromData_fp32(%3, %c8_i64) : (!cc.ptr, i64) -> !cc.ptr - %5 = call @__nvqpp_cudaq_state_numberOfQubits(%4) : (!cc.ptr) -> i64 - %6 = quake.alloca !quake.veq[%5 : i64] - %7 = quake.init_state %6, %4 : (!quake.veq, !cc.ptr) -> !quake.veq + %1 = cc.create_state %0, %c8_i64 : (!cc.ptr x 8>>, i64) -> !cc.ptr + %2 = cc.get_number_of_qubits %1 : (!cc.ptr) -> i64 + %3 = quake.alloca !quake.veq[%2 : i64] + %4 = quake.init_state %3, %1 : (!quake.veq, !cc.ptr) -> !quake.veq return } - func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 - cc.global constant @function_test_state_param._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00 -,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> - func.func private @__nvqpp_cudaq_state_createFromData_fp32(!cc.ptr, i64) -> !cc.ptr + cc.global constant @function_test_state_param._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> // CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_state_param._Z16test_state_paramPN5cudaq5stateE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { // CHECK: %[[VAL_0:.*]] = cc.address_of @function_test_state_param._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 : !cc.ptr x 8>> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_1]], %[[VAL_2]] : !cc.ptr x 8>> -// CHECK: %[[VAL_3:.*]] = quake.alloca !quake.veq<3> -// CHECK: %[[VAL_4:.*]] = quake.init_state %[[VAL_3]], %[[VAL_2]] : (!quake.veq<3>, !cc.ptr x 8>>) -> !quake.veq<3> +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<3> +// CHECK: %[[VAL_2:.*]] = quake.init_state %[[VAL_1]], %[[VAL_0]] : (!quake.veq<3>, !cc.ptr x 8>>) -> !quake.veq<3> +// CHECK: return // CHECK: } -// CHECK-DAG: cc.global constant @function_test_state_param._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> +// CHECK-DAG: cc.global constant @function_test_state_param._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> func.func @__nvqpp__mlirgen__sub_kernel(%arg : !cc.ptr) attributes {"cudaq-kernel", no_this} { - %0 = call @__nvqpp_cudaq_state_numberOfQubits(%arg) : (!cc.ptr) -> i64 + %0 = cc.get_number_of_qubits %arg : (!cc.ptr) -> i64 %1 = quake.alloca !quake.veq[%0 : i64] %2 = quake.init_state %1, %arg : (!quake.veq, !cc.ptr) -> !quake.veq return @@ -47,38 +38,28 @@ module { func.func @__nvqpp__mlirgen__function_test_state_param1._Z16test_state_paramPN5cudaq5stateE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %c8_i64 = arith.constant 8 : i64 %0 = cc.address_of @function_test_state_param1._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 : !cc.ptr x 8>> - %1 = cc.load %0 : !cc.ptr x 8>> - %2 = cc.alloca !cc.array x 8> - cc.store %1, %2 : !cc.ptr x 8>> - %3 = cc.cast %2 : (!cc.ptr x 8>>) -> !cc.ptr - %4 = call @__nvqpp_cudaq_state_createFromData_fp32(%3, %c8_i64) : (!cc.ptr, i64) -> !cc.ptr - call @__nvqpp__mlirgen__sub_kernel(%4) : (!cc.ptr) -> () + %1 = cc.create_state %0, %c8_i64 : (!cc.ptr x 8>>, i64) -> !cc.ptr + call @__nvqpp__mlirgen__sub_kernel(%1) : (!cc.ptr) -> () return } cc.global constant @function_test_state_param1._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00 ,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> -// CHECK: func.func @__nvqpp__mlirgen__sub_kernel(%[[VAL_ARG:.*]]: !cc.ptr) attributes {"cudaq-kernel", no_this} { -// CHECK: %[[VAL_0:.*]] = call @__nvqpp_cudaq_state_numberOfQubits(%[[VAL_ARG]]) : (!cc.ptr) -> i64 +// CHECK: func.func @__nvqpp__mlirgen__sub_kernel(%arg0: !cc.ptr) attributes {"cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = cc.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 // CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq[%[[VAL_0]] : i64] -// CHECK: %[[VAL_2:.*]] = quake.init_state %[[VAL_1]], %[[VAL_ARG]] : (!quake.veq, !cc.ptr) -> !quake.veq +// CHECK: %[[VAL_2:.*]] = quake.init_state %[[VAL_1]], %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq // CHECK: return -// CHECK: } - -// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_state_param1._Z16test_state_paramPN5cudaq5stateE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: } +// CHECK: func.func @__nvqpp__mlirgen__function_test_state_param1._Z16test_state_paramPN5cudaq5stateE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { // CHECK: %[[VAL_0:.*]] = arith.constant 8 : i64 // CHECK: %[[VAL_1:.*]] = cc.address_of @function_test_state_param1._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = cc.load %[[VAL_1]] : !cc.ptr x 8>> -// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_2]], %[[VAL_3]] : !cc.ptr x 8>> -// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = call @__nvqpp_cudaq_state_createFromData_fp32(%[[VAL_4]], %[[VAL_0]]) : (!cc.ptr, i64) -> !cc.ptr -// CHECK: call @__nvqpp__mlirgen__sub_kernel(%[[VAL_5]]) : (!cc.ptr) -> () -// CHECK: call @__nvqpp_cudaq_state_delete(%[[VAL_5]]) : (!cc.ptr) -> () +// CHECK: %[[VAL_2:.*]] = cc.create_state %[[VAL_1]], %[[VAL_0]] : (!cc.ptr x 8>>, i64) -> !cc.ptr +// CHECK: call @__nvqpp__mlirgen__sub_kernel(%[[VAL_2]]) : (!cc.ptr) -> () +// CHECK: call @__nvqpp_cudaq_state_delete(%[[VAL_2]]) : (!cc.ptr) -> () // CHECK: return // CHECK: } -// CHECK-DAG: constant @function_test_state_param1._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> +// CHECK-DAG: cc.global constant @function_test_state_param1._Z16test_state_paramPN5cudaq5stateE.rodata_synth_0 (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> // CHECK-DAG: func.func private @__nvqpp_cudaq_state_delete(!cc.ptr) } - From 102f8196fef4393441f42c13a40961c05ba34ea7 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 5 Nov 2024 09:51:04 -0800 Subject: [PATCH 20/54] Fix test_argument_conversion Signed-off-by: Anna Gringauze --- runtime/test/test_argument_conversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 1326ac4d395..7c8e9f42053 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -381,7 +381,7 @@ void test_state(mlir::MLIRContext *ctx) { // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> // CHECK: %[[VAL_1:.*]] = arith.constant 8 : i64 -// CHECK: %[[VAL_2:.*]] = cc.create_state %[[VAL_0]], %[[VAL_1]] : (!cc.ptr x 8>, i64) -> !cc.ptr +// CHECK: %[[VAL_2:.*]] = cc.create_state %[[VAL_0]], %[[VAL_1]] : (!cc.ptr x 8>>, i64) -> !cc.ptr // CHECK: } // CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> // clang-format on From 5ea1d973daf78890ee7f4ad2b780f9adca868d42 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 5 Nov 2024 10:00:57 -0800 Subject: [PATCH 21/54] Add printing in failing tests Signed-off-by: Anna Gringauze --- python/tests/kernel/test_kernel_qvector_state_init.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/python/tests/kernel/test_kernel_qvector_state_init.py b/python/tests/kernel/test_kernel_qvector_state_init.py index 18fa3914b3e..c832cd64836 100644 --- a/python/tests/kernel/test_kernel_qvector_state_init.py +++ b/python/tests/kernel/test_kernel_qvector_state_init.py @@ -32,11 +32,18 @@ def test_kernel_synthesis_complex(): def kernel(vec: cudaq.State): q = cudaq.qvector(vec) + counts = cudaq.sample(kernel, state) + print(f"Non-synthesized: ${counts}") + assert '00' in counts + assert '10' in counts + assert len(counts) == 2 + synthesized = cudaq.synthesize(kernel, state) counts = cudaq.sample(synthesized) - print(counts) - assert '10' in counts + print(f"Synthesized: ${counts}") assert '00' in counts + assert '10' in counts + assert len(counts) == 2 # float From 074c60f778f9dc49995199903d99fe3f83eff41b Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 5 Nov 2024 10:02:38 -0800 Subject: [PATCH 22/54] Add printing in failing tests Signed-off-by: Anna Gringauze --- python/tests/kernel/test_kernel_qvector_state_init.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/tests/kernel/test_kernel_qvector_state_init.py b/python/tests/kernel/test_kernel_qvector_state_init.py index c832cd64836..64c1ef55d4e 100644 --- a/python/tests/kernel/test_kernel_qvector_state_init.py +++ b/python/tests/kernel/test_kernel_qvector_state_init.py @@ -34,6 +34,7 @@ def kernel(vec: cudaq.State): counts = cudaq.sample(kernel, state) print(f"Non-synthesized: ${counts}") + print(kernel) assert '00' in counts assert '10' in counts assert len(counts) == 2 @@ -41,6 +42,7 @@ def kernel(vec: cudaq.State): synthesized = cudaq.synthesize(kernel, state) counts = cudaq.sample(synthesized) print(f"Synthesized: ${counts}") + print(synthesized) assert '00' in counts assert '10' in counts assert len(counts) == 2 From 310f6ca48e0f458b23accbb84125ecca0591b902 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 12 Nov 2024 10:06:42 -0800 Subject: [PATCH 23/54] Fix failing tests Signed-off-by: Anna Gringauze --- lib/Optimizer/CodeGen/QuakeToCodegen.cpp | 2 +- python/cudaq/kernel/ast_bridge.py | 3 +++ .../cudaq/platform/py_alt_launch_kernel.cpp | 1 + .../kernel/test_kernel_qvector_state_init.py | 24 ++++++++----------- runtime/common/BaseRemoteRESTQPU.h | 1 + 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/lib/Optimizer/CodeGen/QuakeToCodegen.cpp b/lib/Optimizer/CodeGen/QuakeToCodegen.cpp index 6e913a2bec2..6774847bf80 100644 --- a/lib/Optimizer/CodeGen/QuakeToCodegen.cpp +++ b/lib/Optimizer/CodeGen/QuakeToCodegen.cpp @@ -85,7 +85,7 @@ class CreateStateOpPattern : public OpRewritePattern { auto is64Bit = isa(eleTy); if (auto cTy = dyn_cast(eleTy)) - is64Bit = isa(eleTy); + is64Bit = isa(cTy.getElementType()); auto createStateFunc = is64Bit ? cudaq::createCudaqStateFromDataFP64 : cudaq::createCudaqStateFromDataFP32; diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py index 13d694d7bca..ffd930bf72a 100644 --- a/python/cudaq/kernel/ast_bridge.py +++ b/python/cudaq/kernel/ast_bridge.py @@ -3829,6 +3829,9 @@ def visit_Name(self, node): if cc.StdvecType.isinstance(eleTy): self.pushValue(value) return + if cc.StateType.isinstance(eleTy): + self.pushValue(value) + return loaded = cc.LoadOp(value).result self.pushValue(loaded) elif cc.CallableType.isinstance( diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 90ba42b6171..b995f71f1ac 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -535,6 +535,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, pm.addNestedPass( cudaq::opt::createArgumentSynthesisPass(kernels, substs)); pm.addNestedPass(createCanonicalizerPass()); + pm.addPass(opt::createDeleteStates()); // Run state preparation for quantum devices (or their emulation) only. // Simulators have direct implementation of state initialization diff --git a/python/tests/kernel/test_kernel_qvector_state_init.py b/python/tests/kernel/test_kernel_qvector_state_init.py index 64c1ef55d4e..84a3a603f12 100644 --- a/python/tests/kernel/test_kernel_qvector_state_init.py +++ b/python/tests/kernel/test_kernel_qvector_state_init.py @@ -33,16 +33,12 @@ def kernel(vec: cudaq.State): q = cudaq.qvector(vec) counts = cudaq.sample(kernel, state) - print(f"Non-synthesized: ${counts}") - print(kernel) assert '00' in counts assert '10' in counts assert len(counts) == 2 synthesized = cudaq.synthesize(kernel, state) counts = cudaq.sample(synthesized) - print(f"Synthesized: ${counts}") - print(synthesized) assert '00' in counts assert '10' in counts assert len(counts) == 2 @@ -55,7 +51,7 @@ def kernel(vec: cudaq.State): def test_kernel_float_params_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') f = np.array([1. / np.sqrt(2.), 0., 0., 1. / np.sqrt(2.)], dtype=float) @@ -85,7 +81,7 @@ def test_kernel_float_params_f32(): @skipIfNvidiaFP64NotInstalled def test_kernel_complex_params_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], dtype=complex) @@ -104,7 +100,7 @@ def kernel(vec: cudaq.State): @skipIfNvidiaFP64NotInstalled def test_kernel_complex128_params_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], dtype=np.complex128) @@ -123,7 +119,7 @@ def kernel(vec: cudaq.State): @skipIfNvidiaFP64NotInstalled def test_kernel_complex64_params_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], dtype=np.complex64) @@ -181,7 +177,7 @@ def test_kernel_complex_params_f32(): @skipIfNvidiaFP64NotInstalled def test_kernel_complex_capture_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], dtype=complex) @@ -200,7 +196,7 @@ def kernel(): @skipIfNvidiaFP64NotInstalled def test_kernel_complex128_capture_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], dtype=np.complex128) @@ -219,7 +215,7 @@ def kernel(): @skipIfNvidiaFP64NotInstalled def test_kernel_complex128_capture_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], dtype=np.complex64) @@ -280,7 +276,7 @@ def test_kernel_complex_capture_f32(): @skipIfNvidiaFP64NotInstalled def test_kernel_simulation_dtype_complex_params_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], dtype=cudaq.complex()) @@ -318,7 +314,7 @@ def kernel(vec: cudaq.State): @skipIfNvidiaFP64NotInstalled def test_kernel_simulation_dtype_capture_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') c = np.array([1. / np.sqrt(2.) + 0j, 0., 0., 1. / np.sqrt(2.)], dtype=cudaq.complex()) @@ -359,7 +355,7 @@ def kernel(): @skipIfNvidiaFP64NotInstalled def test_init_from_other_kernel_state_f64(): cudaq.reset_target() - cudaq.set_target('nvidia-fp64') + cudaq.set_target('nvidia', option='fp64') @cudaq.kernel def bell(): diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 0834bc7e3e9..84eb527ebb5 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -444,6 +444,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { mlir::SmallVector substs = {substBuff}; pm.addNestedPass( opt::createArgumentSynthesisPass(kernels, substs)); + pm.addPass(opt::createDeleteStates()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); From 6fdccbadae996044512dc9453b8b94a3323d2f7c Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 12 Nov 2024 14:32:16 -0800 Subject: [PATCH 24/54] Add description for new algorithm for state syntesis Signed-off-by: Anna Gringauze --- .../Transforms/ReplaceStateWithKernel.cpp | 23 +---- runtime/common/ArgumentConversion.cpp | 83 ++++++++++++++++++- test/Quake/replace_state_with_kernel.qke | 5 +- 3 files changed, 86 insertions(+), 25 deletions(-) diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index ec7d5c25f71..c9b46205a02 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -29,31 +29,13 @@ namespace cudaq::opt { using namespace mlir; namespace { - -static bool isCall(Operation *op, std::vector &&names) { - if (op) { - if (auto callOp = dyn_cast(op)) { - if (auto calleeAttr = callOp.getCalleeAttr()) { - auto funcName = calleeAttr.getValue().str(); - if (std::find(names.begin(), names.end(), funcName) != names.end()) - return true; - } - } - } - return false; -} - -static bool isNumberOfQubitsCall(Operation *op) { - return isCall(op, {cudaq::getNumQubitsFromCudaqState}); -} - // clang-format off /// Replace `quake.init_state` by a call to a (modified) kernel that produced /// the state. /// /// ``` /// %0 = cc.get_state "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr -/// %1 = call @__nvqpp_cudaq_state_numberOfQubits(%0) : (!cc.ptr) -> i64 +/// %1 = cc.get_number_of_qubits %0 : (!cc.ptr) -> i64 /// %2 = quake.alloca !quake.veq[%1 : i64] /// %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq /// ─────────────────────────────────────────── @@ -87,7 +69,8 @@ class ReplaceStateWithKernelPattern "Failed to remove `quake.alloca` in state synthesis"); return failure(); } - if (isNumberOfQubitsCall(numOfQubits)) { + + if (isa(numOfQubits)) { if (numOfQubits->getUses().empty()) rewriter.eraseOp(numOfQubits); else { diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index bf903e02786..72ba288e586 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -160,8 +160,87 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, arrSize); } - // For quantum hardware, replace states with calls to kernels that generated - // them. + // For quantum hardware, we aim at replacing states with calls to kernels + // that generated them. This is done in 2 stages: + // + // 1. Replace state by cc.get_state instruction during argument conversion: + // + // Create two functions: + // - callee.num_qubits_N + // Calculates the number of qubits needed for the veq allocation + // - callee.init_state_N + // Initializes the veq passed as a parameter + // + // Then replace the state with + // `cc.get_state "callee.num_qubits_0" "callee.init_state_0"`: + // + // ``` + // func.func @caller(%arg0: !cc.ptr) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // %1 = cc.get_number_of_qubits %arg0: (!cc.ptr) -> i64 + // %2 = quake.alloca !quake.veq[%1 : i64] + // %3 = quake.init_state %2, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq + // return + // } + // + // func.func private @callee(%arg0: i64) attributes {"cudaq-kernel"} { + // %cst = arith.constant 1.5707963267948966 : f64 + // %0 = quake.alloca !quake.veq[%arg0 : i64] + // %1 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref + // quake.ry (%cst) %1 : (f64, !quake.ref) -> () + // return + // } + // + // Call from the user host code: + // state = cudaq.get_state(callee, 2) + // counts = cudaq.sample(caller, state) + // ``` + // + // => after argument synthesis: + // + // ``` + // func.func @caller() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // %0 = cc.get_state "callee.num_qubits_0" "callee.init_state_0" : !cc.ptr + // %1 = cc.get_number_of_qubits %0 : (!cc.ptr) -> i64 + // %2 = quake.alloca !quake.veq[%1 : i64] + // %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq + // return + // } + // + // func.func private @callee.num_qubits_0(%arg0: !quake.veq) -> i64 attributes {"cudaq-kernel"} { + // %cst = arith.constant 2 : i64 + // return %cst : i64 + // } + // + // func.func private @callee.init_state_0(%arg0: !quake.veq) attributes {"cudaq-kernel"} { + // %cst = arith.constant 1.5707963267948966 : f64 + // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref + // quake.ry (%cst) %1 : (f64, !quake.ref) -> () + // return + // } + // ``` + // + // 2. Replace the `cc.get_state` ops with calls to the generated functions + // synthesized with the arguments used to create the state: + // + // After ReplaceStateWithKernel pass: + // + // func.func @caller() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // %1 = call "callee.num_qubits_0" : () -> i64 + // %2 = quake.alloca !quake.veq[%1 : i64] + // call "callee.init_0" %2: (!quake.veq) -> () + // } + // + // func.func private @callee.get_number_of_qubits_0(%arg0: !quake.veq) -> i64 attributes {"cudaq-kernel"} { + // %cst = arith.constant 2 : i64 + // return %cst : i64 + // } + // + // func.func private @callee.init_0(%arg0: !quake.veq) attributes {"cudaq-kernel"} { + // %cst = arith.constant 1.5707963267948966 : f64 + // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref + // quake.ry (%cst) %1 : (f64, !quake.ref) -> () + // return + // } if (simState->getKernelInfo().has_value()) { auto [calleeName, calleeArgs] = simState->getKernelInfo().value(); diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 09570c62907..3fa8b62d7dd 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -11,12 +11,11 @@ module { func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %0 = cc.get_state "callee.modified_0" : !cc.ptr - %1 = call @__nvqpp_cudaq_state_numberOfQubits(%0) : (!cc.ptr) -> i64 + %1 = cc.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq -return + return } - func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 func.func private @callee.modified_0() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { %cst = arith.constant 1.5707963267948966 : f64 %0 = quake.alloca !quake.veq<2> From 1dfa8058fb0ec3e8be3ca99e3f470653dbb8fbe9 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 9 Jan 2025 11:46:58 -0800 Subject: [PATCH 25/54] Fix tests Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.h | 4 +- .../Transforms/ArgumentSynthesis.cpp | 13 +- runtime/common/ArgumentConversion.cpp | 24 +- runtime/common/BaseRemoteRESTQPU.h | 21 +- .../Remote-Sim/qvector_init_from_state.cpp | 194 +++++++------- .../qvector_init_from_state_lazy.cpp | 245 ++++++++++-------- .../execution/qvector_init_from_state.cpp | 91 ++++--- test/Quake/arg_subst-5.txt | 6 +- test/Quake/arg_subst_func.qke | 21 +- 9 files changed, 326 insertions(+), 293 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 6b66c473d53..9dd54c21f6f 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -62,8 +62,8 @@ createArgumentSynthesisPass(mlir::ArrayRef funcNames, /// functions and the substitutions text can be built as an unzipped pair of /// lists. std::unique_ptr -createArgumentSynthesisPass(const std::vector& funcNames, - const std::vector& substitutions); +createArgumentSynthesisPass(const std::vector &funcNames, + const std::vector &substitutions); // declarative passes #define GEN_PASS_DECL diff --git a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp index 377164865d3..932c091cb73 100644 --- a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp +++ b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp @@ -164,12 +164,11 @@ cudaq::opt::createArgumentSynthesisPass(ArrayRef funcNames, ArgumentSynthesisOptions{pairs}); } -std::unique_ptr -cudaq::opt::createArgumentSynthesisPass(const std::vector& funcNames, - const std::vector& substitutions) { +std::unique_ptr cudaq::opt::createArgumentSynthesisPass( + const std::vector &funcNames, + const std::vector &substitutions) { return cudaq::opt::createArgumentSynthesisPass( - mlir::SmallVector{funcNames.begin(), - funcNames.end()}, - mlir::SmallVector{substitutions.begin(), - substitutions.end()}); + mlir::SmallVector{funcNames.begin(), funcNames.end()}, + mlir::SmallVector{substitutions.begin(), + substitutions.end()}); } diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 08639270754..407fc718a04 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -175,15 +175,16 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // Then replace the state with // `quake.get_state "callee.num_qubits_0" "callee.init_state_0"`: // + // clang-format off // ``` - // func.func @caller(%arg0: !cc.ptr) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // func.func @caller(%arg0: !cc.ptr) { // %1 = quake.get_number_of_qubits %arg0: (!cc.ptr) -> i64 // %2 = quake.alloca !quake.veq[%1 : i64] // %3 = quake.init_state %2, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq // return // } // - // func.func private @callee(%arg0: i64) attributes {"cudaq-kernel"} { + // func.func private @callee(%arg0: i64) { // %cst = arith.constant 1.5707963267948966 : f64 // %0 = quake.alloca !quake.veq[%arg0 : i64] // %1 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref @@ -195,11 +196,13 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // state = cudaq.get_state(callee, 2) // counts = cudaq.sample(caller, state) // ``` + // clang-format on // // => after argument synthesis: // + // clang-format off // ``` - // func.func @caller() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // func.func @caller() { // %0 = quake.get_state "callee.num_qubits_0" "callee.init_state_0" : !cc.ptr // %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 // %2 = quake.alloca !quake.veq[%1 : i64] @@ -207,41 +210,46 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // return // } // - // func.func private @callee.num_qubits_0(%arg0: !quake.veq) -> i64 attributes {"cudaq-kernel"} { + // func.func private @callee.num_qubits_0(%arg0: !quake.veq) -> i64 { // %cst = arith.constant 2 : i64 // return %cst : i64 // } // - // func.func private @callee.init_state_0(%arg0: !quake.veq) attributes {"cudaq-kernel"} { + // func.func private @callee.init_state_0(%arg0: !quake.veq) { // %cst = arith.constant 1.5707963267948966 : f64 // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref // quake.ry (%cst) %1 : (f64, !quake.ref) -> () // return // } // ``` + // clang-format on // // 2. Replace the `quake.get_state` ops with calls to the generated functions // synthesized with the arguments used to create the state: // // After ReplaceStateWithKernel pass: // - // func.func @caller() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + // clang-format off + // ``` + // func.func @caller() { // %1 = call "callee.num_qubits_0" : () -> i64 // %2 = quake.alloca !quake.veq[%1 : i64] // call "callee.init_0" %2: (!quake.veq) -> () // } // - // func.func private @callee.get_number_of_qubits_0(%arg0: !quake.veq) -> i64 attributes {"cudaq-kernel"} { + // func.func private @callee.num_qubits_0(%arg0: !quake.veq) -> i64 { // %cst = arith.constant 2 : i64 // return %cst : i64 // } // - // func.func private @callee.init_0(%arg0: !quake.veq) attributes {"cudaq-kernel"} { + // func.func private @callee.init_0(%arg0: !quake.veq) { // %cst = arith.constant 1.5707963267948966 : f64 // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref // quake.ry (%cst) %1 : (f64, !quake.ref) -> () // return // } + // ``` + // clang-format on if (simState->getKernelInfo().has_value()) { auto [calleeName, calleeArgs] = simState->getKernelInfo().value(); diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 6ef988752df..cab04096f97 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -417,19 +417,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { moduleOp->setAttrs(m_module->getAttrDictionary()); for (auto &op : m_module.getOps()) { - // if (auto funcOp = dyn_cast(op)) { - // // Add function definitions for runtime functions that must - // // be removed after synthesis in cleanup passes. - // static const std::vector stateFuncs = { - // cudaq::getNumQubitsFromCudaqState, - // cudaq::createCudaqStateFromDataFP32, - // cudaq::createCudaqStateFromDataFP64}; - - // if (funcOp.getBody().empty() && - // std::find(stateFuncs.begin(), stateFuncs.end(), funcOp.getName()) != - // stateFuncs.end()) - // moduleOp.push_back(funcOp.clone()); - // } // Add any global symbols, including global constant arrays. // Global constant arrays can be created during compilation, // `lift-array-alloc`, `argument-synthesis`, `quake-synthesizer`, @@ -468,17 +455,11 @@ class BaseRemoteRESTQPU : public cudaq::QPU { auto [kernels, substs] = argCon.collectAllSubstitutions(); pm.addNestedPass( cudaq::opt::createArgumentSynthesisPass(kernels, substs)); - // pm.addNestedPass( - // cudaq::opt::createArgumentSynthesisPass( - // mlir::SmallVector{kernels.begin(), - // kernels.end()}, - // mlir::SmallVector{substs.begin(), - // substs.end()})); pm.addPass(opt::createDeleteStates()); pm.addNestedPass( opt::createReplaceStateWithKernel()); pm.addPass(mlir::createSymbolDCEPass()); - } else if (updatedArgs) {; + } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); } diff --git a/targettests/Remote-Sim/qvector_init_from_state.cpp b/targettests/Remote-Sim/qvector_init_from_state.cpp index 90246ac2cfd..098a52194e6 100644 --- a/targettests/Remote-Sim/qvector_init_from_state.cpp +++ b/targettests/Remote-Sim/qvector_init_from_state.cpp @@ -19,13 +19,8 @@ #include #include -__qpu__ void test_init_state() { - cudaq::qvector q(2); - ry(M_PI / 2.0, q[0]); -} - -__qpu__ void test_init_large_state() { - cudaq::qvector q(14); +__qpu__ void test_init_state(int n) { + cudaq::qvector q(n); ry(M_PI / 2.0, q[0]); } @@ -39,18 +34,18 @@ __qpu__ void test_state_param2(cudaq::state *state, cudaq::pauli_word w) { cudaq::exp_pauli(1.0, q, w); } -__qpu__ void test_state_param3(cudaq::state *initial_state, +__qpu__ void test_state_param3(cudaq::state *state, std::vector &words) { - cudaq::qvector q(initial_state); + cudaq::qvector q(state); for (std::size_t i = 0; i < words.size(); ++i) { cudaq::exp_pauli(1.0, q, words[i]); } } -__qpu__ void test_state_param4(cudaq::state *initial_state, +__qpu__ void test_state_param4(cudaq::state *state, std::vector &coefficients, std::vector &words) { - cudaq::qvector q(initial_state); + cudaq::qvector q(state); for (std::size_t i = 0; i < words.size(); ++i) { cudaq::exp_pauli(coefficients[i], q, words[i]); } @@ -83,48 +78,64 @@ int main() { counts = cudaq::sample(test_state_param, &state1); printCounts(counts); } - // clang-format off -// CHECK: Passing state created from data as argument (kernel mode) -// CHECK: 011 -// CHECK: 111 + // CHECK: Passing state created from data as argument (kernel mode) + // CHECK: 011 + // CHECK: 111 -// CHECK: 000 -// CHECK: 100 + // CHECK: 000 + // CHECK: 100 // clang-format on { std::cout << "Passing state from another kernel as argument (kernel mode)" << std::endl; - auto state = cudaq::get_state(test_init_state); + auto state = cudaq::get_state(test_init_state, 2); auto counts = cudaq::sample(test_state_param, &state); printCounts(counts); } // clang-format off -// CHECK: Passing state from another kernel as argument (kernel mode) -// CHECK: 01 -// CHECK: 11 + // CHECK: Passing state from another kernel as argument (kernel mode) + // CHECK: 01 + // CHECK: 11 // clang-format on { std::cout << "Passing large state from another kernel as argument (kernel mode)" << std::endl; - auto largeState = cudaq::get_state(test_init_large_state); + auto largeState = cudaq::get_state(test_init_state, 14); auto counts = cudaq::sample(test_state_param, &largeState); printCounts(counts); } // clang-format off -// CHECK: Passing large state from another kernel as argument (kernel mode) -// CHECK: 01111111111111 -// CHECK: 11111111111111 + // CHECK: Passing large state from another kernel as argument (kernel mode) + // CHECK: 01111111111111 + // CHECK: 11111111111111 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument" + " with pauli word arg (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state, 2); + auto counts = + cudaq::sample(test_state_param2, &state, cudaq::pauli_word{"XX"}); + printCounts(counts); + } + // clang-format off + // CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 // clang-format on { std::cout << "Passing state from another kernel as argument iteratively " "(kernel mode)" << std::endl; - auto state = cudaq::get_state(test_init_state); + auto state = cudaq::get_state(test_init_state, 2); for (auto i = 0; i < 4; i++) { auto counts = cudaq::sample(test_state_param, &state); std::cout << "Iteration: " << i << std::endl; @@ -133,42 +144,26 @@ int main() { } } // clang-format off -// CHECK: Passing state from another kernel as argument iteratively (kernel mode) -// CHECK: Iteration: 0 -// CHECK: 01 -// CHECK: 11 -// CHECK: Iteration: 1 -// CHECK: 00 -// CHECK: 10 -// CHECK: Iteration: 2 -// CHECK: 01 -// CHECK: 11 -// CHECK: Iteration: 3 -// CHECK: 00 -// CHECK: 10 - // clang-format on - - { - std::cout << "Passing state from another kernel as argument" - " with pauli word arg (kernel mode)" - << std::endl; - auto state = cudaq::get_state(test_init_state); - auto counts = cudaq::sample(test_state_param2, &state, cudaq::pauli_word{"XX"}); - printCounts(counts); - } - // clang-format off -// CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 + // CHECK: Passing state from another kernel as argument iteratively (kernel mode) + // CHECK: Iteration: 0 + // CHECK: 01 + // CHECK: 11 + // CHECK: Iteration: 1 + // CHECK: 00 + // CHECK: 10 + // CHECK: Iteration: 2 + // CHECK: 01 + // CHECK: 11 + // CHECK: Iteration: 3 + // CHECK: 00 + // CHECK: 10 // clang-format on { std::cout << "Passing state from another kernel as argument iteratively " "with vector args (kernel mode)" << std::endl; - auto state = cudaq::get_state(test_init_state); + auto state = cudaq::get_state(test_init_state, 2); auto words = std::vector{cudaq::pauli_word{"XX"}}; for (auto i = 0; i < 4; i++) { auto counts = cudaq::sample(test_state_param3, &state, words); @@ -178,36 +173,35 @@ int main() { words = std::vector{cudaq::pauli_word{"XY"}}; } } - // Passing state from another kernel as argument iteratively with vector args - // (kernel mode) // clang-format off -// CHECK: Iteration: 0 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 1 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 2 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 3 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 + // CHECK: Passing state from another kernel as argument iteratively with vector args (kernel mode) + // CHECK: Iteration: 0 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 1 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 2 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 3 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 // clang-format on { std::cout << "Passing state from another kernel as argument iteratively " "with vector args with 2 elements (kernel mode)" << std::endl; - auto state = cudaq::get_state(test_init_state); + auto state = cudaq::get_state(test_init_state, 2); auto words = std::vector{cudaq::pauli_word{"XX"}, cudaq::pauli_word{"II"}}; auto coeffs = std::vector{1.0, 2.0}; @@ -222,26 +216,26 @@ int main() { } } // clang-format off -// CHECK: Passing state from another kernel as argument iteratively with vector args with 2 elements (kernel mode) -// CHECK: Iteration: 0 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 1 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 2 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 3 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 + // CHECK: Passing state from another kernel as argument iteratively with vector args with 2 elements (kernel mode) + // CHECK: Iteration: 0 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 1 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 2 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 3 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 // clang-format on } diff --git a/targettests/Remote-Sim/qvector_init_from_state_lazy.cpp b/targettests/Remote-Sim/qvector_init_from_state_lazy.cpp index abd1c6e3474..9fedb6a995b 100644 --- a/targettests/Remote-Sim/qvector_init_from_state_lazy.cpp +++ b/targettests/Remote-Sim/qvector_init_from_state_lazy.cpp @@ -16,38 +16,34 @@ #include #include +#include +#include struct test_init_state { - void operator()() __qpu__ { - cudaq::qvector q(2); - ry(M_PI/2.0, q[0]); - } -}; - -struct test_init_large_state { - void operator()() __qpu__ { - cudaq::qvector q(14); - ry(M_PI/2.0, q[0]); + void operator()(int n) __qpu__ { + cudaq::qvector q(n); + ry(M_PI / 2.0, q[0]); } }; struct test_state_param { - void operator()(cudaq::state *initial_state) __qpu__ { - cudaq::qvector q(initial_state); + void operator()(cudaq::state *state) __qpu__ { + cudaq::qvector q(state); x(q); } }; struct test_state_param2 { - void operator()(cudaq::state *initial_state, cudaq::pauli_word w) __qpu__ { - cudaq::qvector q(initial_state); + void operator()(cudaq::state *state, cudaq::pauli_word w) __qpu__ { + cudaq::qvector q(state); cudaq::exp_pauli(1.0, q, w); } }; struct test_state_param3 { - void operator()(cudaq::state *initial_state, std::vector& words) __qpu__ { - cudaq::qvector q(initial_state); + void operator()(cudaq::state *state, + std::vector &words) __qpu__ { + cudaq::qvector q(state); for (std::size_t i = 0; i < words.size(); ++i) { cudaq::exp_pauli(1.0, q, words[i]); } @@ -55,15 +51,16 @@ struct test_state_param3 { }; struct test_state_param4 { - void operator()(cudaq::state *initial_state, std::vector &coefficients, std::vector& words) __qpu__ { - cudaq::qvector q(initial_state); + void operator()(cudaq::state *state, std::vector &coefficients, + std::vector &words) __qpu__ { + cudaq::qvector q(state); for (std::size_t i = 0; i < words.size(); ++i) { cudaq::exp_pauli(coefficients[i], q, words[i]); } } }; -void printCounts(cudaq::sample_result& result) { +void printCounts(cudaq::sample_result &result) { std::vector values{}; for (auto &&[bits, counts] : result) { values.push_back(bits); @@ -77,47 +74,77 @@ void printCounts(cudaq::sample_result& result) { int main() { std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; - std::vector vec1{0., 0., 0., 0., 0., 0., M_SQRT1_2, M_SQRT1_2}; + std::vector vec1{0., 0., 0., 0., + 0., 0., M_SQRT1_2, M_SQRT1_2}; auto state = cudaq::state::from_data(vec); auto state1 = cudaq::state::from_data(vec1); { - std::cout << "Passing state created from data as argument (kernel mode)" << std::endl; - auto counts = cudaq::sample(test_state_param{}, &state); - printCounts(counts); + std::cout << "Passing state created from data as argument (kernel mode)" + << std::endl; + auto counts = cudaq::sample(test_state_param{}, &state); + printCounts(counts); - counts = cudaq::sample(test_state_param{}, &state1); - printCounts(counts); + counts = cudaq::sample(test_state_param{}, &state1); + printCounts(counts); } -// CHECK: Passing state created from data as argument (kernel mode) -// CHECK: 011 -// CHECK: 111 + // clang-format off + // CHECK: Passing state created from data as argument (kernel mode) + // CHECK: 011 + // CHECK: 111 -// CHECK: 000 -// CHECK: 100 + // CHECK: 000 + // CHECK: 100 + // clang-format on { - std::cout << "Passing state from another kernel as argument (kernel mode)" << std::endl; - auto state = cudaq::get_state(test_init_state{}); + std::cout << "Passing state from another kernel as argument (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); auto counts = cudaq::sample(test_state_param{}, &state); printCounts(counts); } -// CHECK: Passing state from another kernel as argument (kernel mode) -// CHECK: 01 -// CHECK: 11 + // clang-format off + // CHECK: Passing state from another kernel as argument (kernel mode) + // CHECK: 01 + // CHECK: 11 + // clang-format on { - std::cout << "Passing large state from another kernel as argument (kernel mode)" << std::endl; - auto largeState = cudaq::get_state(test_init_large_state{}); + std::cout + << "Passing large state from another kernel as argument (kernel mode)" + << std::endl; + auto largeState = cudaq::get_state(test_init_state{}, 14); auto counts = cudaq::sample(test_state_param{}, &largeState); printCounts(counts); } -// CHECK: Passing large state from another kernel as argument (kernel mode) -// CHECK: 01111111111111 -// CHECK: 11111111111111 + // clang-format off + // CHECK: Passing large state from another kernel as argument (kernel mode) + // CHECK: 01111111111111 + // CHECK: 11111111111111 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument" + " with pauli word arg (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + auto counts = + cudaq::sample(test_state_param2{}, &state, cudaq::pauli_word{"XX"}); + printCounts(counts); + } + // clang-format off + // CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // clang-format on { - std::cout << "Passing state from another kernel as argument iteratively (kernel mode)" << std::endl; - auto state = cudaq::get_state(test_init_state{}); + std::cout << "Passing state from another kernel as argument iteratively " + "(kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); for (auto i = 0; i < 4; i++) { auto counts = cudaq::sample(test_state_param{}, &state); std::cout << "Iteration: " << i << std::endl; @@ -125,23 +152,27 @@ int main() { state = cudaq::get_state(test_state_param{}, &state); } } -// CHECK: Passing state from another kernel as argument iteratively (kernel mode) -// CHECK: Iteration: 0 -// CHECK: 01 -// CHECK: 11 -// CHECK: Iteration: 1 -// CHECK: 00 -// CHECK: 10 -// CHECK: Iteration: 2 -// CHECK: 01 -// CHECK: 11 -// CHECK: Iteration: 3 -// CHECK: 00 -// CHECK: 10 + // clang-format off + // CHECK: Passing state from another kernel as argument iteratively (kernel mode) + // CHECK: Iteration: 0 + // CHECK: 01 + // CHECK: 11 + // CHECK: Iteration: 1 + // CHECK: 00 + // CHECK: 10 + // CHECK: Iteration: 2 + // CHECK: 01 + // CHECK: 11 + // CHECK: Iteration: 3 + // CHECK: 00 + // CHECK: 10 + // clang-format on { - std::cout << "Passing state from another kernel as argument iteratively with vector args (kernel mode)" << std::endl; - auto state = cudaq::get_state(test_init_state{}); + std::cout << "Passing state from another kernel as argument iteratively " + "with vector args (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); auto words = std::vector{cudaq::pauli_word{"XX"}}; for (auto i = 0; i < 4; i++) { auto counts = cudaq::sample(test_state_param3{}, &state, words); @@ -151,61 +182,69 @@ int main() { words = std::vector{cudaq::pauli_word{"XY"}}; } } -// CHECK: Passing state from another kernel as argument iteratively with vector args (kernel mode) -// CHECK: Iteration: 0 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 1 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 2 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 3 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 + // clang-format off + // CHECK: Passing state from another kernel as argument iteratively with vector args (kernel mode) + // CHECK: Iteration: 0 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 1 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 2 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 3 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // clang-format on { - std::cout << "Passing state from another kernel as argument iteratively with vector args with 2 elements (kernel mode)" << std::endl; - auto state = cudaq::get_state(test_init_state{}); - auto words = std::vector{cudaq::pauli_word{"XX"}, cudaq::pauli_word{"II"}}; + std::cout << "Passing state from another kernel as argument iteratively " + "with vector args with 2 elements (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + auto words = std::vector{cudaq::pauli_word{"XX"}, + cudaq::pauli_word{"II"}}; auto coeffs = std::vector{1.0, 2.0}; for (auto i = 0; i < 4; i++) { auto counts = cudaq::sample(test_state_param4{}, &state, coeffs, words); std::cout << "Iteration: " << i << std::endl; printCounts(counts); state = cudaq::get_state(test_state_param4{}, &state, coeffs, words); - words = std::vector{cudaq::pauli_word{"II"}, cudaq::pauli_word{"XY"}}; + words = std::vector{cudaq::pauli_word{"II"}, + cudaq::pauli_word{"XY"}}; coeffs = std::vector{1.0, 2.0}; } } -// CHECK: Passing state from another kernel as argument iteratively with vector args with 2 elements (kernel mode) -// CHECK: Iteration: 0 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 1 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 2 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 -// CHECK: Iteration: 3 -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 + // clang-format off + // CHECK: Passing state from another kernel as argument iteratively with vector args with 2 elements (kernel mode) + // CHECK: Iteration: 0 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 1 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 2 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // CHECK: Iteration: 3 + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // clang-format on } diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index 681e42eee07..62d162e1781 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * @@ -27,7 +27,7 @@ struct test_init_state { void operator()(int n) __qpu__ { cudaq::qvector q(n); - ry(M_PI/2.0, q[0]); + ry(M_PI / 2.0, q[0]); } }; @@ -45,6 +45,26 @@ struct test_state_param2 { } }; +struct test_state_param3 { + void operator()(cudaq::state *state, + std::vector &words) __qpu__ { + cudaq::qvector q(state); + for (std::size_t i = 0; i < words.size(); ++i) { + cudaq::exp_pauli(1.0, q, words[i]); + } + } +}; + +struct test_state_param4 { + void operator()(cudaq::state *state, std::vector &coefficients, + std::vector &words) __qpu__ { + cudaq::qvector q(state); + for (std::size_t i = 0; i < words.size(); ++i) { + cudaq::exp_pauli(coefficients[i], q, words[i]); + } + } +}; + void printCounts(cudaq::sample_result &result) { std::vector values{}; for (auto &&[bits, counts] : result) { @@ -72,14 +92,13 @@ int main() { counts = cudaq::sample(test_state_param{}, &state1); printCounts(counts); } - // clang-format off -// CHECK: Passing state created from data as argument (kernel mode) -// CHECK: 011 -// CHECK: 111 + // CHECK: Passing state created from data as argument (kernel mode) + // CHECK: 011 + // CHECK: 111 -// CHECK: 000 -// CHECK: 100 + // CHECK: 000 + // CHECK: 100 // clang-format on { @@ -90,16 +109,17 @@ int main() { printCounts(counts); } // clang-format off -// CHECK: Passing state from another kernel as argument (kernel mode) -// CHECK: 01 -// CHECK: 11 + // CHECK: Passing state from another kernel as argument (kernel mode) + // CHECK: 01 + // CHECK: 11 // clang-format on { std::cout << "Passing large state from another kernel as argument (kernel mode)" << std::endl; - // TODO: State larger than 5 qubits fails on iqm machines with Adonis architecture + // TODO: State larger than 5 qubits fails on iqm machines with Adonis + // architecture // TODO: State larger than 8 qubits fails on oqc and anyon // Up to 14 bits works with quantinuum an ionq auto largeState = cudaq::get_state(test_init_state{}, 5); @@ -107,9 +127,9 @@ int main() { printCounts(counts); } // clang-format off -// CHECK: Passing large state from another kernel as argument (kernel mode) -// CHECK: 01111 -// CHECK: 11111 + // CHECK: Passing large state from another kernel as argument (kernel mode) + // CHECK: 01111 + // CHECK: 11111 // clang-format on { @@ -117,15 +137,16 @@ int main() { " with pauli word arg (kernel mode)" << std::endl; auto state = cudaq::get_state(test_init_state{}, 2); - auto counts = cudaq::sample(test_state_param2{}, &state, cudaq::pauli_word{"XX"}); + auto counts = + cudaq::sample(test_state_param2{}, &state, cudaq::pauli_word{"XX"}); printCounts(counts); } // clang-format off -// CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) -// CHECK: 00 -// CHECK: 01 -// CHECK: 10 -// CHECK: 11 + // CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 // clang-format on { @@ -141,18 +162,20 @@ int main() { } } // clang-format off -// CHECK: Passing state from another kernel as argument iteratively (kernel mode) -// CHECK: Iteration: 0 -// CHECK: 01 -// CHECK: 11 -// CHECK: Iteration: 1 -// CHECK: 00 -// CHECK: 10 -// CHECK: Iteration: 2 -// CHECK: 01 -// CHECK: 11 -// CHECK: Iteration: 3 -// CHECK: 00 -// CHECK: 10 + // CHECK: Passing state from another kernel as argument iteratively (kernel mode) + // CHECK: Iteration: 0 + // CHECK: 01 + // CHECK: 11 + // CHECK: Iteration: 1 + // CHECK: 00 + // CHECK: 10 + // CHECK: Iteration: 2 + // CHECK: 01 + // CHECK: 11 + // CHECK: Iteration: 3 + // CHECK: 00 + // CHECK: 10 // clang-format on + + // TODO: add tests for vectors of pauli words after we can lifts the arrays of pauli words. } diff --git a/test/Quake/arg_subst-5.txt b/test/Quake/arg_subst-5.txt index c5e727bb79e..2038ad31ccd 100644 --- a/test/Quake/arg_subst-5.txt +++ b/test/Quake/arg_subst-5.txt @@ -7,9 +7,5 @@ // ========================================================================== // cc.arg_subst[0] { - %0 = cc.string_literal "init" : !cc.ptr> - %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr - %2 = func.call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr - %3 = cc.cast %2 : (!cc.ptr) -> !cc.ptr + %0 = quake.get_state "init" : !cc.ptr } -func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index 97bb3ff3a4d..768216567d7 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -148,16 +148,13 @@ func.func @testy4(%arg0: !cc.stdvec>) { // CHECK: } func.func @testy5(%arg0: !cc.ptr) { - %3 = call @__nvqpp_cudaq_state_numberOfQubits(%arg0) : (!cc.ptr) -> i64 - %4 = quake.alloca !quake.veq[%3 : i64] - %5 = quake.init_state %4, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq + %0 = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 + %1 = quake.alloca !quake.veq[%0 : i64] + %5 = quake.init_state %1, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq return } -func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 -func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr - -func.func private @init(%arg0: i32) -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { +func.func private @init(%arg0: i32) -> !quake.veq { %cst = arith.constant 1.5707963267948966 : f64 %0 = cc.cast signed %arg0 : (i32) -> i64 %1 = quake.alloca !quake.veq[%0 : i64] @@ -166,17 +163,13 @@ func.func private @init(%arg0: i32) -> !quake.veq attributes {"cudaq-entrypoi } // CHECK-LABEL: func.func @testy5() { -// CHECK: %[[VAL_0:.*]] = cc.string_literal "init" : !cc.ptr> -// CHECK: %[[VAL_1:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr>) -> !cc.ptr -// CHECK: %[[VAL_2:.*]] = call @__nvqpp_cudaq_state_get(%[[VAL_1]]) : (!cc.ptr) -> !cc.ptr -// CHECK: %[[VAL_3:.*]] = call @__nvqpp_cudaq_state_numberOfQubits(%[[VAL_2]]) : (!cc.ptr) -> i64 +// CHECK: %[[VAL_2:.*]] = quake.get_state "init" : !cc.ptr +// CHECK: %[[VAL_3:.*]] = quake.get_number_of_qubits %[[VAL_2]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_5:.*]] = quake.init_state %[[VAL_4]], %[[VAL_2]] : (!quake.veq, !cc.ptr) -> !quake.veq // CHECK: return // CHECK: } -// CHECK: func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 -// CHECK: func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr -// CHECK: func.func private @init() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: func.func private @init() -> !quake.veq { // CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<2> // CHECK: %[[VAL_8:.*]] = quake.relax_size %[[VAL_7:.*]] : (!quake.veq<2>) -> !quake.veq // CHECK: return %[[VAL_8]] : !quake.veq From 95633714a23ad2823369a86b7455537239da5b02 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 21 Jan 2025 14:52:19 -0800 Subject: [PATCH 26/54] Make intermediate IR legal by separating allocs --- .../cudaq/Optimizer/Dialect/Quake/QuakeOps.td | 22 +- include/cudaq/Optimizer/Transforms/Passes.td | 10 +- .../Transforms/ReplaceStateWithKernel.cpp | 79 +++-- runtime/common/ArgumentConversion.cpp | 312 ++++++++++++++---- runtime/cudaq/qis/quantum_state.h | 4 +- runtime/test/CMakeLists.txt | 1 + runtime/test/FakeQuantumState.h | 159 +++++++++ runtime/test/FakeSimulationState.h | 20 +- runtime/test/test_argument_conversion.cpp | 238 ++++++++++++- .../execution/qvector_init_from_state.cpp | 1 + test/Quake/arg_subst-5.txt | 2 +- test/Quake/arg_subst-6.txt | 2 +- test/Quake/arg_subst_func.qke | 26 +- test/Quake/replace_state_with_kernel.qke | 56 +++- 14 files changed, 774 insertions(+), 158 deletions(-) create mode 100644 runtime/test/FakeQuantumState.h diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index 9a2af0ee622..e5bb1222088 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -1467,19 +1467,29 @@ def QuakeOp_GetStateOp : QuakeOp<"get_state", [Pure] > { let summary = "Get state from kernel with the provided name."; let description = [{ This operation is created by argument synthesis of state pointer arguments - for quantum devices. It takes a kernel name as ASCIIZ string literal value - and returns the kernel's quantum state. The operation is replaced by a call - to the kernel with the provided name in ReplaceStateByKernel pass. + for quantum devices. + + It takes two kernel names as ASCIIZ string literals: + - "num_qubits" for determining the size of the allocation to initialize + - "init" for initializing the state the same way as the original kernel + passed to `cudaq::get_state`) as ASCIIZ string literal + + And returns the quantum state of the original kernel passed to + `cudaq::get_state`. The operation is replaced by calls to the kernels with + the provided names in `ReplaceStateByKernel` pass. ```mlir - %0 = quake.get_state "callee" : !cc.ptr + %0 = quake.get_state "num_qubits" "init" : !cc.ptr ``` }]; - let arguments = (ins StrAttr:$calleeName); + let arguments = (ins + StrAttr:$numQubitsFuncName, + StrAttr:$initFuncName + ); let results = (outs cc_PointerType:$result); let assemblyFormat = [{ - $calleeName `:` qualified(type(results)) attr-dict + $numQubitsFuncName $initFuncName `:` qualified(type(results)) attr-dict }]; } diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 00aecc8a718..71bbd8dd4d5 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -841,8 +841,8 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func Before ReplaceStateWithKernel (replace-state-with-kernel): ``` - func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %0 = quake.get_state "callee.modified_0" : !cc.ptr + func.func @foo() { + %0 = quake.get_state "callee.num_qubits_0" "callee.init_0": !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq @@ -852,8 +852,10 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func After ReplaceStateWithKernel (replace-state-with-kernel): ``` - func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %3 = call @__nvqpp__mlirgen__test_init_state.modified_0() : () -> !quake.veq + func.func @foo() { + %1 = call @callee.num_qubits_0() : () -> i64 + %2 = quake.alloca !quake.veq[%1 : i64] + %3 = call @callee.init_0(%2) : (!quake.veq) -> !quake.veq return } ``` diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index e232ae0983a..d102d156da2 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -29,60 +29,77 @@ namespace cudaq::opt { using namespace mlir; namespace { +// clang-format off +/// Replace `quake.get_number_of_qubits` by a call to a a function +/// that computes the number of qubits for a state. +/// +/// ``` +/// %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr +/// %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 +/// ─────────────────────────────────────────── +/// ... +/// %1 = call @callee.num_qubits_0() : () -> i64 +/// ``` +// clang-format on +class ReplaceGetNumQubitsPattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(quake::GetNumberOfQubitsOp numQubits, + PatternRewriter &rewriter) const override { + + auto stateOp = numQubits.getOperand(); + if (auto getState = stateOp.getDefiningOp()) { + auto numQubitsName = getState.getNumQubitsFuncName(); + + rewriter.setInsertionPoint(numQubits); + rewriter.replaceOpWithNewOp( + numQubits, numQubits.getType(), numQubitsName, mlir::ValueRange{}); + return success(); + } + return numQubits->emitError( + "ReplaceStateWithKernel: failed to replace `quake.get_num_qubits`"); + } +}; + // clang-format off /// Replace `quake.init_state` by a call to a (modified) kernel that produced /// the state. /// /// ``` -/// %0 = quake.get_state "callee.modified_0" : !cc.ptr -/// %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 -/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr /// %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq /// ─────────────────────────────────────────── /// ... -/// %5 = call @callee.modified_0() : () -> !quake.veq +/// %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq /// ``` // clang-format on -class ReplaceStateWithKernelPattern +class ReplaceInitStatePattern : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(quake::InitializeStateOp initState, PatternRewriter &rewriter) const override { - auto *alloca = initState.getOperand(0).getDefiningOp(); + auto allocaOp = initState.getOperand(0); auto stateOp = initState.getOperand(1); if (auto ptrTy = dyn_cast(stateOp.getType())) { if (isa(ptrTy.getElementType())) { - auto *numOfQubits = alloca->getOperand(0).getDefiningOp(); - if (auto getState = stateOp.getDefiningOp()) { - auto calleeName = getState.getCalleeName(); + auto initName = getState.getInitFuncName(); + + rewriter.setInsertionPoint(initState); rewriter.replaceOpWithNewOp( - initState, initState.getType(), calleeName, mlir::ValueRange{}); - - if (alloca->getUses().empty()) - rewriter.eraseOp(alloca); - else { - alloca->emitError( - "Failed to remove `quake.alloca` in state synthesis"); - return failure(); - } - - if (isa(numOfQubits)) { - if (numOfQubits->getUses().empty()) - rewriter.eraseOp(numOfQubits); - else { - numOfQubits->emitError("Failed to remove runtime call to get " - "number of qubits in state synthesis"); - return failure(); - } - } + initState, initState.getType(), initName, + mlir::ValueRange{allocaOp}); + return success(); } - numOfQubits->emitError( - "Failed to replace `quake.init_state` in state synthesis"); + + return initState->emitError( + "ReplaceStateWithKernel: failed to replace `quake.init_state`"); } } return failure(); @@ -99,7 +116,7 @@ class ReplaceStateWithKernelPass auto *ctx = &getContext(); auto func = getOperation(); RewritePatternSet patterns(ctx); - patterns.insert(ctx); + patterns.insert(ctx); LLVM_DEBUG(llvm::dbgs() << "Before replace state with kernel: " << func << '\n'); diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 407fc718a04..ebc8c52ae18 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -99,6 +99,225 @@ static Value genConstant(OpBuilder &, cudaq::cc::StructType, void *, static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp substMod, llvm::DataLayout &); +/// Create callee.init_N that initializes the state +/// Callee: +/// func.func @__nvqpp__mlirgen__callee(%arg0: i64) { +/// %0 = cc.alloca i64 +/// cc.store %arg0, %0 : !cc.ptr +/// %1 = cc.load %0 : !cc.ptr +/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref +/// quake.x %3 : (!quake.ref) -> () +/// return +/// } +/// callee.init_N: +/// func.func private @callee.init_0(%arg0: !quake.veq, %arg0: i64) -> +/// !!quake.veq { +/// %1 = quake.extract_ref %arg0[1] : (!quake.veq<2>) -> !quake.ref +/// quake.x %1 : (f64, !quake.ref) -> () +/// return %arg0: !quake.veq +/// } +static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, + func::FuncOp calleeFunc, + std::string &initKernelName) { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(sourceMod.getBody()); + + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); + + auto initFunc = cast(builder.clone(*calleeFunc)); + + auto argTypes = calleeFunc.getArgumentTypes(); + auto retTy = quake::VeqType::getUnsized(ctx); + auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retTy}); + + initFunc.setName(initKernelName); + initFunc.setType(funcTy); + initFunc.setPrivate(); + + OpBuilder newBuilder(ctx); + + auto *entryBlock = &initFunc.getRegion().front(); + newBuilder.setInsertionPointToStart(entryBlock); + auto intType = newBuilder.getI64Type(); + Value zero = newBuilder.create(loc, 0, intType); + Value one = newBuilder.create(loc, 1, intType); + Value begin = zero; + + auto argPos = initFunc.getArguments().size(); + + // Detect errors in kernel passed to get_state. + std::function processInner = [&](Block &block) { + for (auto &op : block) { + for (auto ®ion : op.getRegions()) { + for (auto &b : region) + processInner(b); + } + // Don't allow returns in inner scopes + if (auto retOp = dyn_cast(&op)) + calleeFunc.emitError("Encountered return in inner scope in a kernel " + "passed to get_state"); + } + }; + + for (auto &op : calleeFunc.getRegion().front()) + for (auto ®ion : op.getRegions()) + for (auto &b : region) + processInner(b); + + // Process outer block to initialize the allocation passed as an argument. + std::function process = [&](Block &block) { + SmallVector cleanUps; + Operation *replacedReturn = nullptr; + + Value arg; + Value subArg; + Value blockBegin = begin; + Value blockAllocSize = zero; + for (auto &op : block) { + if (auto alloc = dyn_cast(&op)) { + newBuilder.setInsertionPointAfter(alloc); + + if (!arg) { + initFunc.insertArgument(argPos, retTy, {}, loc); + arg = initFunc.getArgument(argPos); + } + + auto allocSize = alloc.getSize(); + auto offset = newBuilder.create(loc, allocSize, one); + subArg = + newBuilder.create(loc, retTy, arg, begin, offset); + alloc.replaceAllUsesWith(subArg); + cleanUps.push_back(alloc); + begin = newBuilder.create(loc, begin, allocSize); + blockAllocSize = + newBuilder.create(loc, blockAllocSize, allocSize); + } + + if (auto retOp = dyn_cast(&op)) { + if (retOp != replacedReturn) { + newBuilder.setInsertionPointAfter(retOp); + + auto offset = + newBuilder.create(loc, blockAllocSize, one); + Value ret = newBuilder.create(loc, retTy, arg, + blockBegin, offset); + + assert(arg && "No veq allocations found"); + replacedReturn = newBuilder.create(loc, ret); + cleanUps.push_back(retOp); + } + } + } + + for (auto &op : cleanUps) { + op->dropAllReferences(); + op->dropAllUses(); + op->erase(); + } + }; + + // Process the function body + process(initFunc.getRegion().front()); +} + +/// Create callee.num_qubits_N that calculates the number of qubits to +/// initialize Callee: func.func @callee(%arg0: i64) { +/// %0 = cc.alloca i64 +/// cc.store %arg0, %0 : !cc.ptr +/// %1 = cc.load %0 : !cc.ptr +/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref +/// quake.x %3 : (!quake.ref) -> () +/// return +/// } +/// +/// callee.num_qubits_0: +/// func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { +/// %0 = cc.alloca i64 +/// cc.store %arg0, %0 : !cc.ptr +/// %1 = cc.load %0 : !cc.ptr +/// return %1 : i64 +/// } +static void createNumQubitsFunc(OpBuilder &builder, ModuleOp sourceMod, + func::FuncOp calleeFunc, + std::string &numQubitsKernelName) { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(sourceMod.getBody()); + + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); + + auto numQubitsFunc = cast(builder.clone(*calleeFunc)); + + auto argTypes = calleeFunc.getArgumentTypes(); + auto retType = builder.getI64Type(); + auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retType}); + + numQubitsFunc.setName(numQubitsKernelName); + numQubitsFunc.setType(funcTy); + numQubitsFunc.setPrivate(); + + OpBuilder newBuilder(ctx); + + auto *entryBlock = &numQubitsFunc.getRegion().front(); + newBuilder.setInsertionPointToStart(entryBlock); + Value size = newBuilder.create(loc, 0, retType); + + // Process block recursively to calculate and return allocation size + // and remove everything else. + std::function process = [&](Block &block) { + SmallVector used; + Operation *replacedReturn = nullptr; + + for (auto &op : block) { + // Calculate allocation size (existing allocation size plus new one) + if (auto alloc = dyn_cast(&op)) { + auto allocSize = alloc.getSize(); + newBuilder.setInsertionPointAfter(alloc); + size = newBuilder.create(loc, size, allocSize); + } + + // Return allocation size + if (auto retOp = dyn_cast(&op)) { + if (retOp != replacedReturn) { + + newBuilder.setInsertionPointAfter(retOp); + auto newRet = newBuilder.create(loc, size); + replacedReturn = newRet; + used.push_back(newRet); + } + } + } + + // Collect all ops needed for size calculation + SmallVector keep; + while (!used.empty()) { + auto *op = used.pop_back_val(); + keep.push_back(op); + for (auto opnd : op->getOperands()) + if (auto defOp = opnd.getDefiningOp()) + used.push_back(defOp); + } + + // Remove the rest of the ops + SmallVector toErase; + for (auto &op : block) + if (std::find(keep.begin(), keep.end(), &op) == keep.end()) + toErase.push_back(&op); + + for (auto &op : toErase) { + op->dropAllReferences(); + op->dropAllUses(); + op->erase(); + } + }; + + // Process the function body + process(numQubitsFunc.getRegion().front()); +} + static Value genConstant(OpBuilder &builder, const cudaq::state *v, llvm::DataLayout &layout, cudaq::opt::ArgumentConverter &converter) { @@ -185,10 +404,9 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // } // // func.func private @callee(%arg0: i64) { - // %cst = arith.constant 1.5707963267948966 : f64 // %0 = quake.alloca !quake.veq[%arg0 : i64] // %1 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref - // quake.ry (%cst) %1 : (f64, !quake.ref) -> () + // quake.x %1 : (!quake.ref) -> () // return // } // @@ -210,15 +428,13 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // return // } // - // func.func private @callee.num_qubits_0(%arg0: !quake.veq) -> i64 { - // %cst = arith.constant 2 : i64 - // return %cst : i64 + // func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { + // return %arg0 : i64 // } // - // func.func private @callee.init_state_0(%arg0: !quake.veq) { - // %cst = arith.constant 1.5707963267948966 : f64 + // func.func private @callee.init_state_0(%arg0: i64, %arg1: !quake.veq) { // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref - // quake.ry (%cst) %1 : (f64, !quake.ref) -> () + // quake.x %1 : (f64, !quake.ref) -> () // return // } // ``` @@ -232,21 +448,21 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // clang-format off // ``` // func.func @caller() { - // %1 = call "callee.num_qubits_0" : () -> i64 + // %1 = call 2callee.num_qubits_0() : () -> i64 // %2 = quake.alloca !quake.veq[%1 : i64] - // call "callee.init_0" %2: (!quake.veq) -> () + // %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq // } // - // func.func private @callee.num_qubits_0(%arg0: !quake.veq) -> i64 { + // func.func private @callee.num_qubits_0() -> i64 { // %cst = arith.constant 2 : i64 // return %cst : i64 // } // - // func.func private @callee.init_0(%arg0: !quake.veq) { + // func.func private @callee.init_0(%arg0: !quake.veq): !quake.veq { // %cst = arith.constant 1.5707963267948966 : f64 // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref // quake.ry (%cst) %1 : (f64, !quake.ref) -> () - // return + // return %arg0 // } // ``` // clang-format on @@ -264,66 +480,32 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto fromModule = parseSourceString(code, ctx); static unsigned counter = 0; - std::string modifiedCalleeName = - calleeName + ".modified_" + std::to_string(counter++); - std::string modifiedCalleeKernelName = - cudaq::runtime::cudaqGenPrefixName + modifiedCalleeName; + std::string initName = calleeName + ".init_" + std::to_string(counter); + std::string initKernelName = cudaq::runtime::cudaqGenPrefixName + initName; + + std::string numQubitsName = + calleeName + ".num_qubits_" + std::to_string(counter++); + std::string numQubitsKernelName = + cudaq::runtime::cudaqGenPrefixName + numQubitsName; - // Create callee.modified that returns concat of veq allocations. auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); assert(calleeFunc && "callee is missing"); - auto argTypes = calleeFunc.getArgumentTypes(); - auto retType = quake::VeqType::getUnsized(ctx); - auto funcTy = FunctionType::get(ctx, argTypes, {retType}); - - { - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(sourceMod.getBody()); - - auto modifiedCalleeFunc = cast(builder.clone(*calleeFunc)); - modifiedCalleeFunc.setName(modifiedCalleeKernelName); - modifiedCalleeFunc.setType(funcTy); - modifiedCalleeFunc.setPrivate(); - - OpBuilder modifiedBuilder(ctx); - SmallVector allocations; - SmallVector cleanUps; - for (auto &op : modifiedCalleeFunc.getOps()) { - if (auto alloc = dyn_cast(op)) { - allocations.push_back(alloc.getResult()); - // Replace by the result of quake.init_state if used by it - for (auto *user : op.getUsers()) { - if (auto init = dyn_cast(*user)) { - allocations.pop_back(); - allocations.push_back(init.getResult()); - } - } - } - if (auto retOp = dyn_cast(op)) { - if (retOp.getOperands().size() == 0) { - modifiedBuilder.setInsertionPointAfter(retOp); - assert(allocations.size() > 0 && "No veq allocations found"); - Value ret = modifiedBuilder.create( - loc, quake::VeqType::getUnsized(ctx), allocations); - modifiedBuilder.create(loc, ret); - cleanUps.push_back(retOp); - } - } - } - for (auto *op : cleanUps) { - op->dropAllUses(); - op->erase(); - } - } - // Create substitutions for the `callee.modified.N`. - converter.genCallee(modifiedCalleeName, calleeArgs); + // Create `callee.init_N` and `callee.num_qubits_N` used for + // `quake.get_state` replacement later in ReplaceStateWithKernel pass + createInitFunc(builder, sourceMod, calleeFunc, initKernelName); + createNumQubitsFunc(builder, sourceMod, calleeFunc, numQubitsKernelName); + + // Create substitutions for the `callee.init_N` and `callee.num_qubits_N`. + converter.genCallee(initName, calleeArgs); + converter.genCallee(numQubitsName, calleeArgs); - // Create a subst for state pointer. + // Create a substitution for the state pointer. auto statePtrTy = cudaq::cc::PointerType::get(cudaq::cc::StateType::get(ctx)); return builder.create( - loc, statePtrTy, builder.getStringAttr(modifiedCalleeKernelName)); + loc, statePtrTy, builder.getStringAttr(numQubitsKernelName), + builder.getStringAttr(initKernelName)); } TODO("cudaq::state* argument synthesis for quantum hardware for c functions"); diff --git a/runtime/cudaq/qis/quantum_state.h b/runtime/cudaq/qis/quantum_state.h index 63117eb4629..c9b1b30029b 100644 --- a/runtime/cudaq/qis/quantum_state.h +++ b/runtime/cudaq/qis/quantum_state.h @@ -1,5 +1,5 @@ /****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * @@ -76,6 +76,8 @@ class QuantumState : public cudaq::SimulationState { (addArgument(args), ...); } QuantumState() = default; + QuantumState(const QuantumState &other) + : kernelName(other.kernelName), args(other.args), deleters() {} virtual ~QuantumState(); /// @brief True if the state has amplitudes or density matrix available. diff --git a/runtime/test/CMakeLists.txt b/runtime/test/CMakeLists.txt index 753cecb9616..0c93df80f5e 100644 --- a/runtime/test/CMakeLists.txt +++ b/runtime/test/CMakeLists.txt @@ -24,6 +24,7 @@ link_directories(${CMAKE_BINARY_DIR}/lib) target_link_libraries(${TEST_NAME} PUBLIC cudaq-mlir-runtime + cudaq ) set_property(TARGET ${TEST_NAME} PROPERTY FOLDER test) diff --git a/runtime/test/FakeQuantumState.h b/runtime/test/FakeQuantumState.h new file mode 100644 index 00000000000..14acec132a6 --- /dev/null +++ b/runtime/test/FakeQuantumState.h @@ -0,0 +1,159 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "cudaq/qis/state.h" +#include +#include + +#include + +/// @cond DO_NOT_DOCUMENT +/// @brief Fake simulation state to use in tests. +class FakeQuantumState : public cudaq::SimulationState { +private: + std::string kernelName; + std::vector args; + //std::vector> deleters; + +public: + virtual std::unique_ptr + createFromSizeAndPtr(std::size_t size, void *data, + std::size_t dataType) override { + throw std::runtime_error("Not implemented"); + } + + FakeQuantumState() = default; + // FakeQuantumState(const std::string& kernelName, int arg) : kernelName(kernelName) { + // std::cout << "ARG: " << arg << std::endl; + // addArgument(arg); + // } + + FakeQuantumState(const std::string& kernelName, const std::vector args) : kernelName(kernelName), args(args) { + //std::cout << "ARG: " << arg << std::endl; + //addArgument(arg); + } + + FakeQuantumState(const FakeQuantumState& other): kernelName(other.kernelName), args(other.args) {} + + // template + // void addArgument(const T &arg) { + // if constexpr (std::is_pointer_v>) { + // if constexpr (std::is_copy_constructible_v< + // std::remove_pointer_t>>) { + // auto ptr = new std::remove_pointer_t>(*arg); + // args.push_back(ptr); + // deleters.push_back([](void *ptr) { + // delete static_cast> *>(ptr); + // }); + // } else { + // throw std::invalid_argument( + // "Unsupported argument type: only pointers to copy-constructible " + // "types and copy-constructible types are supported."); + // } + // } else if constexpr (std::is_copy_constructible_v>) { + // auto *ptr = new std::decay_t(arg); + // args.push_back(ptr); + // deleters.push_back( + // [](void *ptr) { delete static_cast *>(ptr); }); + // } else { + // throw std::invalid_argument( + // "Unsupported argument type: only pointers to copy-constructible " + // "types and copy-constructible types are supported."); + // } + // } + + virtual std::unique_ptr + createFromData(const cudaq::state_data &data) override { + throw std::runtime_error("Not implemented"); + } + + virtual bool hasData() const override { return false; } + + virtual std::optional>> + getKernelInfo() const override { + return std::make_pair(kernelName, args); + } + + virtual Tensor getTensor(std::size_t tensorIdx = 0) const override { + throw std::runtime_error("Not implemented"); + //return Tensor(); + } + + virtual std::vector getTensors() const override { + throw std::runtime_error("Not implemented"); + //return std::vector(); + } + + virtual std::size_t getNumTensors() const override { return 1; } + + virtual std::size_t getNumQubits() const override { + throw std::runtime_error("Not implemented"); + //return 0; + } + + virtual std::complex overlap(const SimulationState &other) override { + throw std::runtime_error("Not implemented"); + //return 0; + } + + virtual std::complex + getAmplitude(const std::vector &basisState) override { + throw std::runtime_error("Not implemented"); + //return 0; + } + + virtual std::vector> + getAmplitudes(const std::vector> &basisStates) override { + throw std::runtime_error("Not implemented"); + //return {0}; + } + + virtual void dump(std::ostream &os) const override { + throw std::runtime_error("Not implemented"); + } + + virtual precision getPrecision() const override { + return cudaq::SimulationState::precision::fp64; + } + + virtual void destroyState() override { + } + + virtual std::complex + operator()(std::size_t tensorIdx, + const std::vector &indices) override { + throw std::runtime_error("Not implemented"); + } + + virtual std::size_t getNumElements() const override { + throw std::runtime_error("Not implemented"); + } + + virtual bool isDeviceData() const override { return false; } + + virtual bool isArrayLike() const override { return true; } + + virtual void toHost(std::complex *clientAllocatedData, + std::size_t numElements) const override { + throw std::runtime_error("Not implemented"); + } + + virtual void toHost(std::complex *clientAllocatedData, + std::size_t numElements) const override { + throw std::runtime_error("Not implemented"); + } + + virtual ~FakeQuantumState() override { + // for (std::size_t counter = 0; auto &ptr : args) + // deleters[counter++](ptr); + + // args.clear(); + // deleters.clear(); + } +}; +/// @endcond diff --git a/runtime/test/FakeSimulationState.h b/runtime/test/FakeSimulationState.h index 4dcec050bec..74a0c0c66ed 100644 --- a/runtime/test/FakeSimulationState.h +++ b/runtime/test/FakeSimulationState.h @@ -21,7 +21,7 @@ class FakeSimulationState : public cudaq::SimulationState { virtual std::unique_ptr createFromSizeAndPtr(std::size_t size, void *data, std::size_t dataType) override { - std::runtime_error("Not implemented"); + throw std::runtime_error("Not implemented"); return std::make_unique(size, data); } @@ -30,17 +30,17 @@ class FakeSimulationState : public cudaq::SimulationState { virtual std::unique_ptr createFromData(const cudaq::state_data &data) override { - std::runtime_error("Not implemented"); + throw std::runtime_error("Not implemented"); return std::make_unique(0, nullptr); } virtual Tensor getTensor(std::size_t tensorIdx = 0) const override { - std::runtime_error("Not implemented"); + throw std::runtime_error("Not implemented"); return Tensor(); } virtual std::vector getTensors() const override { - std::runtime_error("Not implemented"); + throw std::runtime_error("Not implemented"); return std::vector(); } @@ -51,33 +51,31 @@ class FakeSimulationState : public cudaq::SimulationState { } virtual std::complex overlap(const SimulationState &other) override { - std::runtime_error("Not implemented"); + throw std::runtime_error("Not implemented"); return 0; } virtual std::complex getAmplitude(const std::vector &basisState) override { - std::runtime_error("Not implemented"); + throw std::runtime_error("Not implemented"); return 0; } virtual std::vector> getAmplitudes(const std::vector> &basisStates) override { - std::runtime_error("Not implemented"); + throw std::runtime_error("Not implemented"); return {0}; } virtual void dump(std::ostream &os) const override { - std::runtime_error("Not implemented"); + throw std::runtime_error("Not implemented"); } virtual precision getPrecision() const override { return cudaq::SimulationState::precision::fp64; } - virtual void destroyState() override { - std::runtime_error("Not implemented"); - } + virtual void destroyState() override {} virtual std::complex operator()(std::size_t tensorIdx, diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 9f936cdcec8..90ac7b763d3 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -11,6 +11,7 @@ // RUN: test_argument_conversion | FileCheck %s +#include "FakeQuantumState.h" #include "FakeSimulationState.h" #include "common/ArgumentConversion.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" @@ -20,9 +21,28 @@ #include "mlir/Parser/Parser.h" #include +extern "C" void __cudaq_deviceCodeHolderAdd(const char *, const char *); + +void dumpSubstitutionModules(cudaq::opt::ArgumentConverter &ab) { + std::function dump = + [&dump](cudaq::opt::ArgumentConverter &con) { + // Dump the conversions + llvm::outs() << "========================================\n" + "Substitution module:\n" + << con.getKernelName() << "\n" + << con.getSubstitutionModule() << '\n'; + + for (auto &calleeCon : con.getCalleeConverters()) + dump(calleeCon); + }; + + dump(ab); +} + void doSimpleTest(mlir::MLIRContext *ctx, const std::string &typeName, - std::vector args) { - std::string code = R"#( + std::vector args, + const std::string &additionalCode = "") { + std::string code = additionalCode + R"#( func.func private @callee(%0: )#" + typeName + R"#() func.func @__nvqpp__mlirgen__testy(%0: )#" + @@ -37,10 +57,10 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" + cudaq::opt::ArgumentConverter ab{"testy", *mod}; // Create the argument conversions ab.gen(args); - // Dump the conversions - llvm::outs() << "========================================\n" - "Substitution module:\n" - << ab.getSubstitutionModule() << '\n'; + // Dump the modified source module + llvm::outs() << "Source module (after):\n" << *mod << '\n'; + // Dump all conversions + dumpSubstitutionModules(ab); } void doTest(mlir::MLIRContext *ctx, std::vector &typeNames, @@ -80,14 +100,12 @@ void doTest(mlir::MLIRContext *ctx, std::vector &typeNames, auto mod = mlir::parseSourceString(code, ctx); llvm::outs() << "Source module:\n" << *mod << '\n'; cudaq::opt::ArgumentConverter ab{"testy", *mod}; - // Create the argument conversions ab.gen_drop_front(args, startingArgIdx); - - // Dump the conversions - llvm::outs() << "========================================\n" - "Substitution module:\n" - << ab.getSubstitutionModule() << '\n'; + // Dump the modified source module + llvm::outs() << "Source module (after):\n" << *mod << '\n'; + // Dump all conversions + dumpSubstitutionModules(ab); } void test_scalars(mlir::MLIRContext *ctx) { @@ -361,7 +379,7 @@ void test_recursive(mlir::MLIRContext *ctx) { // clang-format on } -void test_state(mlir::MLIRContext *ctx) { +void test_simulation_state(mlir::MLIRContext *ctx) { { std::vector> data{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; @@ -384,6 +402,197 @@ void test_state(mlir::MLIRContext *ctx) { // clang-format on } +void test_quantum_state(mlir::MLIRContext *ctx) { + { + auto kernel = "init"; + auto kernelCode = + "" + "func.func private @__nvqpp__mlirgen__init(%arg0: i64) {\n" + " %0 = quake.alloca !quake.veq[%arg0 : i64]\n" + " %1 = quake.extract_ref %0[0] : (!quake.veq) -> !quake.ref\n" + " quake.x %1 : (!quake.ref) -> ()\n" + " return\n" + "}\n"; + __cudaq_deviceCodeHolderAdd(kernel, kernelCode); + + std::int64_t n = 2; + std::vector a = {static_cast(&n)}; + auto x = cudaq::state(new FakeQuantumState(kernel, a)); + std::vector v = {static_cast(&x)}; + doSimpleTest(ctx, "!cc.ptr", v, kernelCode); + } + + // clang-format off +// CHECK: Source module: +// CHECK: func.func private @__nvqpp__mlirgen__init(%arg0: i64) { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] +// CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () +// CHECK: return +// CHECK: } +// CHECK: func.func private @callee(!cc.ptr) + +// CHECK: Source module (after): +// CHECK: func.func private @__nvqpp__mlirgen__init(%arg0: i64) { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] +// CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () +// CHECK: return +// CHECK: } +// CHECK: func.func private @callee(!cc.ptr) +// CHECK: func.func private @__nvqpp__mlirgen__init.init_0(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = arith.subi %arg0, %[[VAL_1]] : i64 +// CHECK: %[[VAL_3:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_2]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: %[[VAL_5:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_3]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_6]] : (!quake.ref) -> () +// CHECK: %[[VAL_7:.*]] = arith.subi %[[VAL_5]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_8:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_7]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: return %[[VAL_8]] : !quake.veq +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init.num_qubits_0(%arg0: i64) -> i64 { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: return %[[VAL_1]] : i64 +// CHECK: } + +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: testy +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = quake.get_state "__nvqpp__mlirgen__init.num_qubits_0" "__nvqpp__mlirgen__init.init_0" : !cc.ptr +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init.init_0 +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init.num_qubits_0 +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK: } + // clang-format on + + { + auto kernel = "init"; + auto kernelCode = + "" + " func.func private @__nvqpp__mlirgen__init(%arg0: i64) {\n" + " %2 = quake.alloca !quake.veq[%arg0 : i64]\n" + " %3 = quake.extract_ref %2[0] : (!quake.veq) -> !quake.ref\n" + " quake.x %3 : (!quake.ref) -> ()\n" + " %measOut = quake.mz %3 name \"\" : (!quake.ref) -> !quake.measure\n" + " %4 = quake.discriminate %measOut : (!quake.measure) -> i1\n" + " cc.if(%4) {\n" + " %6 = quake.alloca !quake.veq[%arg0 : i64]\n" + " %7 = quake.extract_ref %6[0] : (!quake.veq) -> !quake.ref\n" + " quake.x %7 : (!quake.ref) -> ()\n" + " %8 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref\n" + " quake.y %8 : (!quake.ref) -> ()\n" + " }\n" + " return\n" + "}\n"; + + __cudaq_deviceCodeHolderAdd(kernel, kernelCode); + + std::int64_t n = 2; + std::vector a = {static_cast(&n)}; + auto x = cudaq::state(new FakeQuantumState(kernel, a)); + std::vector v = {static_cast(&x)}; + doSimpleTest(ctx, "!cc.ptr", v, kernelCode); + } + + // clang-format off +// CHECK: Source module: +// CHECK: func.func private @__nvqpp__mlirgen__init(%arg0: i64) { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] +// CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () +// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "" : (!quake.ref) -> !quake.measure +// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_2]] : (!quake.measure) -> i1 +// CHECK: cc.if(%[[VAL_3]]) { +// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%arg0 : i64] +// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_4]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_5]] : (!quake.ref) -> () +// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_0]][1] : (!quake.veq) -> !quake.ref +// CHECK: quake.y %[[VAL_6]] : (!quake.ref) -> () +// CHECK: } +// CHECK: return +// CHECK: } +// CHECK: func.func private @callee(!cc.ptr) + +// CHECK: Source module (after): +// CHECK: func.func private @__nvqpp__mlirgen__init(%arg0: i64) { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] +// CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () +// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "" : (!quake.ref) -> !quake.measure +// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_2]] : (!quake.measure) -> i1 +// CHECK: cc.if(%[[VAL_3]]) { +// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%arg0 : i64] +// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_4]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_5]] : (!quake.ref) -> () +// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_0]][1] : (!quake.veq) -> !quake.ref +// CHECK: quake.y %[[VAL_6]] : (!quake.ref) -> () +// CHECK: } +// CHECK: return +// CHECK: } +// CHECK: func.func private @callee(!cc.ptr) +// CHECK: func.func private @__nvqpp__mlirgen__init.init_1(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = arith.subi %arg0, %[[VAL_1]] : i64 +// CHECK: %[[VAL_3:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_2]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: %[[VAL_5:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_3]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_6]] : (!quake.ref) -> () +// CHECK: %[[VAL_7:.*]] = quake.mz %[[VAL_6]] name "" : (!quake.ref) -> !quake.measure +// CHECK: %[[VAL_8:.*]] = quake.discriminate %[[VAL_7]] : (!quake.measure) -> i1 +// CHECK: cc.if(%[[VAL_8]]) { +// CHECK: %[[VAL_11:.*]] = quake.alloca !quake.veq[%arg0 : i64] +// CHECK: %[[VAL_12:.*]] = quake.extract_ref %[[VAL_11]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_12]] : (!quake.ref) -> () +// CHECK: %[[VAL_13:.*]] = quake.extract_ref %[[VAL_3]][1] : (!quake.veq) -> !quake.ref +// CHECK: quake.y %[[VAL_13]] : (!quake.ref) -> () +// CHECK: } +// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_5]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_10:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_9]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: return %[[VAL_10]] : !quake.veq +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init.num_qubits_1(%arg0: i64) -> i64 { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: return %[[VAL_1]] : i64 +// CHECK: } + +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: testy +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = quake.get_state "__nvqpp__mlirgen__init.num_qubits_1" "__nvqpp__mlirgen__init.init_1" : !cc.ptr +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init.init_1 +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init.num_qubits_1 +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK: } + // clang-format on +} + void test_combinations(mlir::MLIRContext *ctx) { { bool x = true; @@ -514,7 +723,8 @@ int main() { test_vectors(&context); test_aggregates(&context); test_recursive(&context); - test_state(&context); + test_simulation_state(&context); + test_quantum_state(&context); test_combinations(&context); return 0; } diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index 62d162e1781..482440b4b8f 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -13,6 +13,7 @@ // Quantum emulators // RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target anyon --emulate %s -o %t && %t | FileCheck %s // 2 different IQM machines for 2 different topologies // RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s diff --git a/test/Quake/arg_subst-5.txt b/test/Quake/arg_subst-5.txt index 2038ad31ccd..5b43881daf5 100644 --- a/test/Quake/arg_subst-5.txt +++ b/test/Quake/arg_subst-5.txt @@ -7,5 +7,5 @@ // ========================================================================== // cc.arg_subst[0] { - %0 = quake.get_state "init" : !cc.ptr + %0 = quake.get_state "num_qubits" "init" : !cc.ptr } diff --git a/test/Quake/arg_subst-6.txt b/test/Quake/arg_subst-6.txt index 4c3a55d883a..7a53d0369de 100644 --- a/test/Quake/arg_subst-6.txt +++ b/test/Quake/arg_subst-6.txt @@ -7,5 +7,5 @@ // ========================================================================== // cc.arg_subst[0] { - %c2_i32 = arith.constant 2 : i32 + %c2_i64 = arith.constant 2 : i64 } diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index 768216567d7..b54188850b6 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -6,7 +6,7 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt --argument-synthesis=functions=foo:%S/arg_subst.txt,blink:%S/arg_subst.txt,testy1:%S/arg_subst-1.txt,testy2:%S/arg_subst-2.txt,testy3:%S/arg_subst-3.txt,testy4:%S/arg_subst-4.txt,testy5:%S/arg_subst-5.txt,init:%S/arg_subst-6.txt --canonicalize %s | FileCheck %s +// RUN: cudaq-opt --argument-synthesis=functions=foo:%S/arg_subst.txt,blink:%S/arg_subst.txt,testy1:%S/arg_subst-1.txt,testy2:%S/arg_subst-2.txt,testy3:%S/arg_subst-3.txt,testy4:%S/arg_subst-4.txt,testy5:%S/arg_subst-5.txt,num_qubits:%S/arg_subst-6.txt,init:%S/arg_subst-6.txt --canonicalize %s | FileCheck %s func.func private @bar(i32) func.func private @baz(f32) @@ -154,23 +154,25 @@ func.func @testy5(%arg0: !cc.ptr) { return } -func.func private @init(%arg0: i32) -> !quake.veq { - %cst = arith.constant 1.5707963267948966 : f64 - %0 = cc.cast signed %arg0 : (i32) -> i64 - %1 = quake.alloca !quake.veq[%0 : i64] - %2 = quake.concat %1 : (!quake.veq) -> !quake.veq - return %2 : !quake.veq +func.func @init(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { + return %arg1 : !quake.veq +} + +func.func @num_qubits(%arg0: i64) -> i64 { + return %arg0 : i64 } // CHECK-LABEL: func.func @testy5() { -// CHECK: %[[VAL_2:.*]] = quake.get_state "init" : !cc.ptr +// CHECK: %[[VAL_2:.*]] = quake.get_state "num_qubits" "init" : !cc.ptr // CHECK: %[[VAL_3:.*]] = quake.get_number_of_qubits %[[VAL_2]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_5:.*]] = quake.init_state %[[VAL_4]], %[[VAL_2]] : (!quake.veq, !cc.ptr) -> !quake.veq // CHECK: return // CHECK: } -// CHECK: func.func private @init() -> !quake.veq { -// CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<2> -// CHECK: %[[VAL_8:.*]] = quake.relax_size %[[VAL_7:.*]] : (!quake.veq<2>) -> !quake.veq -// CHECK: return %[[VAL_8]] : !quake.veq +// CHECK: func.func @init(%arg0: !quake.veq) -> !quake.veq { +// CHECK: return %arg0 : !quake.veq +// CHECK: } +// CHECK: func.func @num_qubits() -> i64 { +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK: return %[[VAL_0]] : i64 // CHECK: } diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index d234db9a617..15bc9cab104 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -9,23 +9,55 @@ // RUN: cudaq-opt -replace-state-with-kernel -canonicalize %s | FileCheck %s module { - func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %0 = quake.get_state "callee.modified_0" : !cc.ptr + + func.func private @callee.num_qubits_0() -> i64 { + %cst = arith.constant 2 : i64 + return %cst : i64 + } + + func.func private @callee.init_0(%arg0: !quake.veq) -> !quake.veq { + %cst = arith.constant 1.5707963267948966 : f64 + %1 = quake.extract_ref %arg0[0] : (!quake.veq) -> !quake.ref + quake.ry (%cst) %1 : (f64, !quake.ref) -> () + return %arg0: !quake.veq + } + + func.func @caller0() { + %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq return } - func.func private @callee.modified_0() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { - %cst = arith.constant 1.5707963267948966 : f64 - %0 = quake.alloca !quake.veq<2> - %1 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref - quake.ry (%cst) %1 : (f64, !quake.ref) -> () - %2 = quake.relax_size %0 : (!quake.veq<2>) -> !quake.veq - return %2 : !quake.veq + +// CHECK-LABEL: func.func @caller0() { +// CHECK: %[[VAL_0:.*]] = call @callee.num_qubits_0() : () -> i64 +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq[%[[VAL_0]] : i64] +// CHECK: %[[VAL_2:.*]] = call @callee.init_0(%[[VAL_1]]) : (!quake.veq) -> !quake.veq +// CHECK: return +// CHECK: } + + func.func @caller1(%arg0: i64) { + %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr + %2 = quake.alloca !quake.veq[%arg0 : i64] + %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq + return } -// CHECK-LABEL: func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// CHECK: %[[VAL_0:.*]] = call @callee.modified_0() : () -> !quake.veq + +// CHECK-LABEL: func.func @caller1(%arg0: i64) { +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq[%arg0 : i64] +// CHECK: %[[VAL_2:.*]] = call @callee.init_0(%[[VAL_1]]) : (!quake.veq) -> !quake.veq // CHECK: return // CHECK: } -} + + func.func @caller2() -> i64 { + %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr + %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 + return %1: i64 + } + +// CHECK-LABEL: func.func @caller2() -> i64 { +// CHECK: %[[VAL_0:.*]] = call @callee.num_qubits_0() : () -> i64 +// CHECK: return %[[VAL_0]] : i64 +// CHECK: } +} \ No newline at end of file From 008e8c17f23f68cd178d6fd0d71453c1ca3c2630 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 21 Jan 2025 15:01:17 -0800 Subject: [PATCH 27/54] DCO Remediation Commit for Anna Gringauze I, Anna Gringauze , hereby add my Signed-off-by to this commit: 95633714a23ad2823369a86b7455537239da5b02 Signed-off-by: Anna Gringauze --- runtime/test/FakeQuantumState.h | 63 ++++-------------------------- runtime/test/FakeSimulationState.h | 6 --- 2 files changed, 8 insertions(+), 61 deletions(-) diff --git a/runtime/test/FakeQuantumState.h b/runtime/test/FakeQuantumState.h index 14acec132a6..2f20babd955 100644 --- a/runtime/test/FakeQuantumState.h +++ b/runtime/test/FakeQuantumState.h @@ -18,7 +18,6 @@ class FakeQuantumState : public cudaq::SimulationState { private: std::string kernelName; std::vector args; - //std::vector> deleters; public: virtual std::unique_ptr @@ -28,44 +27,11 @@ class FakeQuantumState : public cudaq::SimulationState { } FakeQuantumState() = default; - // FakeQuantumState(const std::string& kernelName, int arg) : kernelName(kernelName) { - // std::cout << "ARG: " << arg << std::endl; - // addArgument(arg); - // } - - FakeQuantumState(const std::string& kernelName, const std::vector args) : kernelName(kernelName), args(args) { - //std::cout << "ARG: " << arg << std::endl; - //addArgument(arg); - } - - FakeQuantumState(const FakeQuantumState& other): kernelName(other.kernelName), args(other.args) {} - - // template - // void addArgument(const T &arg) { - // if constexpr (std::is_pointer_v>) { - // if constexpr (std::is_copy_constructible_v< - // std::remove_pointer_t>>) { - // auto ptr = new std::remove_pointer_t>(*arg); - // args.push_back(ptr); - // deleters.push_back([](void *ptr) { - // delete static_cast> *>(ptr); - // }); - // } else { - // throw std::invalid_argument( - // "Unsupported argument type: only pointers to copy-constructible " - // "types and copy-constructible types are supported."); - // } - // } else if constexpr (std::is_copy_constructible_v>) { - // auto *ptr = new std::decay_t(arg); - // args.push_back(ptr); - // deleters.push_back( - // [](void *ptr) { delete static_cast *>(ptr); }); - // } else { - // throw std::invalid_argument( - // "Unsupported argument type: only pointers to copy-constructible " - // "types and copy-constructible types are supported."); - // } - // } + FakeQuantumState(const std::string &kernelName, + const std::vector args) + : kernelName(kernelName), args(args) {} + FakeQuantumState(const FakeQuantumState &other) + : kernelName(other.kernelName), args(other.args) {} virtual std::unique_ptr createFromData(const cudaq::state_data &data) override { @@ -81,36 +47,30 @@ class FakeQuantumState : public cudaq::SimulationState { virtual Tensor getTensor(std::size_t tensorIdx = 0) const override { throw std::runtime_error("Not implemented"); - //return Tensor(); } virtual std::vector getTensors() const override { throw std::runtime_error("Not implemented"); - //return std::vector(); } virtual std::size_t getNumTensors() const override { return 1; } virtual std::size_t getNumQubits() const override { throw std::runtime_error("Not implemented"); - //return 0; } virtual std::complex overlap(const SimulationState &other) override { throw std::runtime_error("Not implemented"); - //return 0; } virtual std::complex getAmplitude(const std::vector &basisState) override { throw std::runtime_error("Not implemented"); - //return 0; } virtual std::vector> getAmplitudes(const std::vector> &basisStates) override { throw std::runtime_error("Not implemented"); - //return {0}; } virtual void dump(std::ostream &os) const override { @@ -121,8 +81,7 @@ class FakeQuantumState : public cudaq::SimulationState { return cudaq::SimulationState::precision::fp64; } - virtual void destroyState() override { - } + virtual void destroyState() override {} virtual std::complex operator()(std::size_t tensorIdx, @@ -130,7 +89,7 @@ class FakeQuantumState : public cudaq::SimulationState { throw std::runtime_error("Not implemented"); } - virtual std::size_t getNumElements() const override { + virtual std::size_t getNumElements() const override { throw std::runtime_error("Not implemented"); } @@ -148,12 +107,6 @@ class FakeQuantumState : public cudaq::SimulationState { throw std::runtime_error("Not implemented"); } - virtual ~FakeQuantumState() override { - // for (std::size_t counter = 0; auto &ptr : args) - // deleters[counter++](ptr); - - // args.clear(); - // deleters.clear(); - } + virtual ~FakeQuantumState() override {} }; /// @endcond diff --git a/runtime/test/FakeSimulationState.h b/runtime/test/FakeSimulationState.h index 74a0c0c66ed..49667e481c7 100644 --- a/runtime/test/FakeSimulationState.h +++ b/runtime/test/FakeSimulationState.h @@ -31,17 +31,14 @@ class FakeSimulationState : public cudaq::SimulationState { virtual std::unique_ptr createFromData(const cudaq::state_data &data) override { throw std::runtime_error("Not implemented"); - return std::make_unique(0, nullptr); } virtual Tensor getTensor(std::size_t tensorIdx = 0) const override { throw std::runtime_error("Not implemented"); - return Tensor(); } virtual std::vector getTensors() const override { throw std::runtime_error("Not implemented"); - return std::vector(); } virtual std::size_t getNumTensors() const override { return 1; } @@ -52,19 +49,16 @@ class FakeSimulationState : public cudaq::SimulationState { virtual std::complex overlap(const SimulationState &other) override { throw std::runtime_error("Not implemented"); - return 0; } virtual std::complex getAmplitude(const std::vector &basisState) override { throw std::runtime_error("Not implemented"); - return 0; } virtual std::vector> getAmplitudes(const std::vector> &basisStates) override { throw std::runtime_error("Not implemented"); - return {0}; } virtual void dump(std::ostream &os) const override { From f8e35eb5a330ad0852a9cb6025570f5b49e416e6 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 11 Feb 2025 20:48:19 -0800 Subject: [PATCH 28/54] Address some PR comments Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.h | 12 ++--- .../Transforms/ArgumentSynthesis.cpp | 16 +++--- .../cudaq/platform/py_alt_launch_kernel.cpp | 2 +- runtime/common/ArgumentConversion.cpp | 49 ++++++++++--------- runtime/common/ArgumentConversion.h | 15 +++--- runtime/common/BaseRemoteRESTQPU.h | 32 ++++++++++-- runtime/cudaq/cudaq.cpp | 7 +++ runtime/cudaq/utils/registry.h | 1 + runtime/test/test_argument_conversion.cpp | 8 +-- 9 files changed, 88 insertions(+), 54 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index bab623bb88e..89d3268fb11 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -58,12 +58,12 @@ std::unique_ptr createArgumentSynthesisPass(mlir::ArrayRef funcNames, mlir::ArrayRef substitutions); -/// Helper function to build an argument synthesis pass. The names of the -/// functions and the substitutions text can be built as an unzipped pair of -/// lists. -std::unique_ptr -createArgumentSynthesisPass(const std::vector &funcNames, - const std::vector &substitutions); +// /// Helper function to build an argument synthesis pass. The names of the +// /// functions and the substitutions text can be built as an unzipped pair of +// /// lists. +// std::unique_ptr +// createArgumentSynthesisPass(const mlir::SmallVector &funcNames, +// const mlir::SmallVector &substitutions); // declarative passes #define GEN_PASS_DECL diff --git a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp index 932c091cb73..359f8839ee5 100644 --- a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp +++ b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp @@ -164,11 +164,11 @@ cudaq::opt::createArgumentSynthesisPass(ArrayRef funcNames, ArgumentSynthesisOptions{pairs}); } -std::unique_ptr cudaq::opt::createArgumentSynthesisPass( - const std::vector &funcNames, - const std::vector &substitutions) { - return cudaq::opt::createArgumentSynthesisPass( - mlir::SmallVector{funcNames.begin(), funcNames.end()}, - mlir::SmallVector{substitutions.begin(), - substitutions.end()}); -} +// std::unique_ptr cudaq::opt::createArgumentSynthesisPass( +// const mlir::SmallVector &funcNames, +// const mlir::SmallVector &substitutions) { +// return cudaq::opt::createArgumentSynthesisPass( +// mlir::SmallVector{funcNames.begin(), funcNames.end()}, +// mlir::SmallVector{substitutions.begin(), +// substitutions.end()}); +// } diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 6b20a7bdfbb..b67c092660e 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -535,7 +535,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, auto isLocalSimulator = platform.is_simulator() && !platform.is_emulated(); auto isSimulator = isLocalSimulator || isRemoteSimulator; - cudaq::opt::ArgumentConverter argCon(name, unwrap(module)); + opt::ArgumentConverter argCon(name, unwrap(module)); argCon.gen(runtimeArgs.getArgs()); std::string kernName = cudaq::runtime::cudaqGenPrefixName + name; SmallVector kernels = {kernName}; diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index ebc8c52ae18..bc1a558737e 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -20,6 +20,8 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/Parser/Parser.h" +#include + using namespace mlir; template @@ -119,7 +121,7 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, /// } static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, func::FuncOp calleeFunc, - std::string &initKernelName) { + StringRef initKernelName) { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToEnd(sourceMod.getBody()); @@ -242,7 +244,7 @@ static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, /// } static void createNumQubitsFunc(OpBuilder &builder, ModuleOp sourceMod, func::FuncOp calleeFunc, - std::string &numQubitsKernelName) { + StringRef numQubitsKernelName) { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToEnd(sourceMod.getBody()); @@ -478,27 +480,29 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto code = cudaq::get_quake_by_name(calleeName, /*throwException=*/false); assert(!code.empty() && "Quake code not found for callee"); auto fromModule = parseSourceString(code, ctx); + + auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); + assert(calleeFunc && "callee func is missing"); static unsigned counter = 0; - std::string initName = calleeName + ".init_" + std::to_string(counter); - std::string initKernelName = cudaq::runtime::cudaqGenPrefixName + initName; - - std::string numQubitsName = - calleeName + ".num_qubits_" + std::to_string(counter++); - std::string numQubitsKernelName = - cudaq::runtime::cudaqGenPrefixName + numQubitsName; - - auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); - assert(calleeFunc && "callee is missing"); + auto initName = calleeName + ".init_" + std::to_string(counter); + auto numQubitsName = calleeName + ".num_qubits_" + std::to_string(counter++); + auto initKernelName = cudaq::runtime::cudaqGenPrefixName + initName; + auto numQubitsKernelName = cudaq::runtime::cudaqGenPrefixName + numQubitsName; // Create `callee.init_N` and `callee.num_qubits_N` used for // `quake.get_state` replacement later in ReplaceStateWithKernel pass createInitFunc(builder, sourceMod, calleeFunc, initKernelName); createNumQubitsFunc(builder, sourceMod, calleeFunc, numQubitsKernelName); - // Create substitutions for the `callee.init_N` and `callee.num_qubits_N`. - converter.genCallee(initName, calleeArgs); - converter.genCallee(numQubitsName, calleeArgs); + // Create and register names for new `init` and `num_qubits` kernels so + // ArgumentConverters can keep a string reference to a valid memory. + auto registeredInitName = cudaq::registry::cudaqRegisterAuxKernelName(initName.c_str()); + auto registeredNumQubitsName = cudaq::registry::cudaqRegisterAuxKernelName(numQubitsName.c_str()); + + // Create substitutions for `callee.init_N` and `callee.num_qubits_N`. + converter.genCallee(registeredInitName, calleeArgs); + converter.genCallee(registeredNumQubitsName, calleeArgs); // Create a substitution for the state pointer. auto statePtrTy = @@ -682,8 +686,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy, //===----------------------------------------------------------------------===// cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, - ModuleOp sourceModule, - bool isSimulator) + ModuleOp sourceModule) : sourceModule(sourceModule), builder(sourceModule.getContext()), kernelName(kernelName) { substModule = builder.create(builder.getUnknownLoc()); @@ -694,7 +697,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { // We should look up the input type signature here. auto fun = sourceModule.lookupSymbol( - cudaq::runtime::cudaqGenPrefixName + kernelName); + cudaq::runtime::cudaqGenPrefixName + kernelName.str()); FunctionType fromFuncTy = fun.getFunctionType(); for (auto iter : llvm::enumerate(llvm::zip(fromFuncTy.getInputs(), arguments))) { @@ -816,22 +819,22 @@ void cudaq::opt::ArgumentConverter::gen_drop_front( gen(partialArgs); } -std::pair, std::vector> +std::pair, SmallVector> cudaq::opt::ArgumentConverter::collectAllSubstitutions() { - std::vector kernels; - std::vector substs; + SmallVector kernels; + SmallVector substs; std::function collect = [&kernels, &substs, &collect](ArgumentConverter &con) { auto name = con.getKernelName(); std::string kernName = cudaq::runtime::cudaqGenPrefixName + name.str(); - kernels.push_back(kernName); + kernels.emplace_back(kernName); { std::string substBuff; llvm::raw_string_ostream ss(substBuff); ss << con.getSubstitutionModule(); - substs.push_back(substBuff); + substs.emplace_back(substBuff); } for (auto &calleeCon : con.getCalleeConverters()) diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 1cb2b86ac14..25f033efb78 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -15,6 +15,7 @@ #include "mlir/IR/Types.h" #include #include +#include namespace cudaq::opt { @@ -22,8 +23,7 @@ class ArgumentConverter { public: /// Build an instance to create argument substitutions for a specified \p /// kernelName in \p sourceModule. - ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule, - bool isSimulator = true); + ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule); /// Generate a substitution ModuleOp for the vector of arguments presented. /// The arguments are those presented to the kernel, kernelName. @@ -52,24 +52,25 @@ class ArgumentConverter { mlir::StringRef getKernelName() { return kernelName; } - void genCallee(std::string &calleeName, std::vector &args) { - auto converter = ArgumentConverter(calleeName, sourceModule); + void genCallee(mlir::StringRef calleeName, std::vector &args) { + // auto converter = ArgumentConverter(calleeName, sourceModule); + // converter.gen(args); + auto converter = calleeConverters.emplace_back(ArgumentConverter(calleeName, sourceModule)); converter.gen(args); - calleeConverters.push_back(converter); } std::vector &getCalleeConverters() { return calleeConverters; } - std::pair, std::vector> + std::pair, mlir::SmallVector> collectAllSubstitutions(); private: mlir::ModuleOp sourceModule; mlir::ModuleOp substModule; mlir::OpBuilder builder; - std::string kernelName; + mlir::StringRef kernelName; mlir::SmallVector substitutions; std::vector calleeConverters; }; diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index de0e768e77d..4b60e4a537d 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -447,14 +447,36 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (!rawArgs.empty() || updatedArgs) { mlir::PassManager pm(&context); if (!rawArgs.empty()) { - // For quantum hardware, we collect substitutions for the - // whole call tree of states, which are treated as calls to - // the kernels and their arguments that produced the state. opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); - auto [kernels, substs] = argCon.collectAllSubstitutions(); + + // For quantum hardware, we traverse the tree of ArgumentConverters + // for the call tree of states and collect substitutions for all calls. + mlir::SmallVector kernels; + mlir::SmallVector substs; + + std::function collect = + [&kernels, &substs, &collect](opt::ArgumentConverter &con) { + auto name = con.getKernelName(); + std::string kernName = cudaq::runtime::cudaqGenPrefixName + name.str(); + kernels.emplace_back(kernName); + + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << con.getSubstitutionModule(); + substs.emplace_back(substBuff); + + for (auto &calleeCon : con.getCalleeConverters()) + collect(calleeCon); + }; + + collect(argCon); + + mlir::SmallVector funcNames{kernels.begin(), kernels.end()}; + mlir::SmallVector substitutions{substs.begin(), substs.end()}; + pm.addNestedPass( - cudaq::opt::createArgumentSynthesisPass(kernels, substs)); + cudaq::opt::createArgumentSynthesisPass(funcNames, substitutions)); pm.addPass(opt::createDeleteStates()); pm.addNestedPass( opt::createReplaceStateWithKernel()); diff --git a/runtime/cudaq/cudaq.cpp b/runtime/cudaq/cudaq.cpp index 071f658f43f..07da9f60048 100644 --- a/runtime/cudaq/cudaq.cpp +++ b/runtime/cudaq/cudaq.cpp @@ -19,6 +19,7 @@ #include "distributed/mpi_plugin.h" #include #include +#include #include #include #include @@ -242,6 +243,7 @@ void cudaq::registry::__cudaq_deviceCodeHolderAdd(const char *key, //===----------------------------------------------------------------------===// static std::vector kernelRegistry; +static std::list auxKernelRegistry; static std::map argsCreators; static std::map lambdaNames; @@ -252,6 +254,11 @@ void cudaq::registry::cudaqRegisterKernelName(const char *kernelName) { kernelRegistry.emplace_back(kernelName); } +const char * cudaq::registry::cudaqRegisterAuxKernelName(const char *kernelName) { + std::unique_lock lock(globalRegistryMutex); + return auxKernelRegistry.emplace_back(kernelName).c_str(); +} + void cudaq::registry::__cudaq_registerLinkableKernel(void *hostSideFunc, const char *kernelName, void *deviceSideFunc) { diff --git a/runtime/cudaq/utils/registry.h b/runtime/cudaq/utils/registry.h index 2748afe17e7..cb6a7c0c146 100644 --- a/runtime/cudaq/utils/registry.h +++ b/runtime/cudaq/utils/registry.h @@ -13,6 +13,7 @@ namespace cudaq::registry { extern "C" { void __cudaq_deviceCodeHolderAdd(const char *, const char *); void cudaqRegisterKernelName(const char *); +const char* cudaqRegisterAuxKernelName(const char *); void cudaqRegisterArgsCreator(const char *, char *); void cudaqRegisterLambdaName(const char *, const char *); diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index a5e5fa3474f..1ecf85c0404 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -487,7 +487,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { " %2 = quake.alloca !quake.veq[%arg0 : i64]\n" " %3 = quake.extract_ref %2[0] : (!quake.veq) -> !quake.ref\n" " quake.x %3 : (!quake.ref) -> ()\n" - " %measOut = quake.mz %3 name \"\" : (!quake.ref) -> !quake.measure\n" + " %measOut = quake.mz %3 name \"q0\" : (!quake.ref) -> !quake.measure\n" " %4 = quake.discriminate %measOut : (!quake.measure) -> i1\n" " cc.if(%4) {\n" " %6 = quake.alloca !quake.veq[%arg0 : i64]\n" @@ -514,7 +514,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] // CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref // CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () -// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "" : (!quake.ref) -> !quake.measure +// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "q0" : (!quake.ref) -> !quake.measure // CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_2]] : (!quake.measure) -> i1 // CHECK: cc.if(%[[VAL_3]]) { // CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%arg0 : i64] @@ -532,7 +532,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] // CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref // CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () -// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "" : (!quake.ref) -> !quake.measure +// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "q0" : (!quake.ref) -> !quake.measure // CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_2]] : (!quake.measure) -> i1 // CHECK: cc.if(%[[VAL_3]]) { // CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%arg0 : i64] @@ -553,7 +553,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: %[[VAL_5:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 // CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_3]][0] : (!quake.veq) -> !quake.ref // CHECK: quake.x %[[VAL_6]] : (!quake.ref) -> () -// CHECK: %[[VAL_7:.*]] = quake.mz %[[VAL_6]] name "" : (!quake.ref) -> !quake.measure +// CHECK: %[[VAL_7:.*]] = quake.mz %[[VAL_6]] name "q0" : (!quake.ref) -> !quake.measure // CHECK: %[[VAL_8:.*]] = quake.discriminate %[[VAL_7]] : (!quake.measure) -> i1 // CHECK: cc.if(%[[VAL_8]]) { // CHECK: %[[VAL_11:.*]] = quake.alloca !quake.veq[%arg0 : i64] From e79ad6abe648e89ae3f620678c3fc62c2a02d71e Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 12 Feb 2025 11:52:52 -0800 Subject: [PATCH 29/54] Address more CR comments Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Dialect/CC/CCTypes.td | 6 +++ .../cudaq/Optimizer/Dialect/Quake/QuakeOps.td | 16 +++--- include/cudaq/Optimizer/Transforms/Passes.h | 7 --- include/cudaq/Optimizer/Transforms/Passes.td | 2 +- .../Transforms/ArgumentSynthesis.cpp | 9 ---- .../Transforms/ReplaceStateWithKernel.cpp | 10 ++-- runtime/common/ArgumentConversion.cpp | 49 +++++-------------- runtime/common/ArgumentConversion.h | 15 +++--- runtime/common/BaseRemoteRESTQPU.h | 46 ++++++++++------- runtime/cudaq/cudaq.cpp | 6 --- runtime/cudaq/utils/registry.h | 1 - runtime/test/test_argument_conversion.cpp | 7 +-- test/Quake/arg_subst-5.txt | 2 +- test/Quake/arg_subst_func.qke | 2 +- test/Quake/replace_state_with_kernel.qke | 6 +-- 15 files changed, 79 insertions(+), 105 deletions(-) diff --git a/include/cudaq/Optimizer/Dialect/CC/CCTypes.td b/include/cudaq/Optimizer/Dialect/CC/CCTypes.td index 18bce4e156a..03b8d9541d9 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCTypes.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCTypes.td @@ -313,4 +313,10 @@ def AnyStateInitLike : TypeConstraint; def AnyStateInitType : Type; +def AnyStatePointerType : Type< + And<[ + cc_PointerType.predicate, + CPred<"$_self.cast().getElementType().isa()"> + ]>, + "state pointer type">; #endif // CUDAQ_DIALECT_CC_TYPES_TD diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index e5bb1222088..65730b84f29 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -1418,7 +1418,7 @@ def quake_CreateStateOp : QuakeOp<"create_state", [Pure]> { cc_PointerType:$data, AnySignlessInteger:$length ); - let results = (outs cc_PointerType:$result); + let results = (outs AnyStatePointerType:$result); let assemblyFormat = [{ $data `,` $length `:` functional-type(operands, results) attr-dict }]; @@ -1436,7 +1436,7 @@ def QuakeOp_DeleteStateOp : QuakeOp<"delete_state", [] > { ``` }]; - let arguments = (ins cc_PointerType:$state); + let arguments = (ins AnyStatePointerType:$state); let results = (outs); let assemblyFormat = [{ $state `:` type(operands) attr-dict @@ -1456,7 +1456,7 @@ def quake_GetNumberOfQubitsOp : QuakeOp<"get_number_of_qubits", [Pure] > { ``` }]; - let arguments = (ins cc_PointerType:$state); + let arguments = (ins AnyStatePointerType:$state); let results = (outs AnySignlessInteger:$result); let assemblyFormat = [{ $state `:` functional-type(operands, results) attr-dict @@ -1479,17 +1479,17 @@ def QuakeOp_GetStateOp : QuakeOp<"get_state", [Pure] > { the provided names in `ReplaceStateByKernel` pass. ```mlir - %0 = quake.get_state "num_qubits" "init" : !cc.ptr + %0 = quake.get_state @num_qubits @init : !cc.ptr ``` }]; let arguments = (ins - StrAttr:$numQubitsFuncName, - StrAttr:$initFuncName + FlatSymbolRefAttr:$numQubitsFunc, + FlatSymbolRefAttr:$initFunc ); - let results = (outs cc_PointerType:$result); + let results = (outs AnyStatePointerType:$result); let assemblyFormat = [{ - $numQubitsFuncName $initFuncName `:` qualified(type(results)) attr-dict + $numQubitsFunc $initFunc `:` qualified(type(results)) attr-dict }]; } diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 89d3268fb11..4bfddf6101d 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -58,13 +58,6 @@ std::unique_ptr createArgumentSynthesisPass(mlir::ArrayRef funcNames, mlir::ArrayRef substitutions); -// /// Helper function to build an argument synthesis pass. The names of the -// /// functions and the substitutions text can be built as an unzipped pair of -// /// lists. -// std::unique_ptr -// createArgumentSynthesisPass(const mlir::SmallVector &funcNames, -// const mlir::SmallVector &substitutions); - // declarative passes #define GEN_PASS_DECL #define GEN_PASS_REGISTRATION diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 7c77b7d5fbd..9afc56a4083 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -842,7 +842,7 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func Before ReplaceStateWithKernel (replace-state-with-kernel): ``` func.func @foo() { - %0 = quake.get_state "callee.num_qubits_0" "callee.init_0": !cc.ptr + %0 = quake.get_state @callee.num_qubits_0 @callee.init_0: !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq diff --git a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp index 359f8839ee5..76a3ac36ca8 100644 --- a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp +++ b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp @@ -163,12 +163,3 @@ cudaq::opt::createArgumentSynthesisPass(ArrayRef funcNames, return std::make_unique( ArgumentSynthesisOptions{pairs}); } - -// std::unique_ptr cudaq::opt::createArgumentSynthesisPass( -// const mlir::SmallVector &funcNames, -// const mlir::SmallVector &substitutions) { -// return cudaq::opt::createArgumentSynthesisPass( -// mlir::SmallVector{funcNames.begin(), funcNames.end()}, -// mlir::SmallVector{substitutions.begin(), -// substitutions.end()}); -// } diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index d102d156da2..872e12c3f32 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -34,7 +34,7 @@ namespace { /// that computes the number of qubits for a state. /// /// ``` -/// %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr +/// %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr /// %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 /// ─────────────────────────────────────────── /// ... @@ -51,11 +51,11 @@ class ReplaceGetNumQubitsPattern auto stateOp = numQubits.getOperand(); if (auto getState = stateOp.getDefiningOp()) { - auto numQubitsName = getState.getNumQubitsFuncName(); + auto numQubitsFunc = getState.getNumQubitsFunc(); rewriter.setInsertionPoint(numQubits); rewriter.replaceOpWithNewOp( - numQubits, numQubits.getType(), numQubitsName, mlir::ValueRange{}); + numQubits, numQubits.getType(), numQubitsFunc, mlir::ValueRange{}); return success(); } return numQubits->emitError( @@ -68,7 +68,7 @@ class ReplaceGetNumQubitsPattern /// the state. /// /// ``` -/// %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr +/// %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr /// %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq /// ─────────────────────────────────────────── /// ... @@ -88,7 +88,7 @@ class ReplaceInitStatePattern if (auto ptrTy = dyn_cast(stateOp.getType())) { if (isa(ptrTy.getElementType())) { if (auto getState = stateOp.getDefiningOp()) { - auto initName = getState.getInitFuncName(); + auto initName = getState.getInitFunc(); rewriter.setInsertionPoint(initState); rewriter.replaceOpWithNewOp( diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index bc1a558737e..ceccc2dc24e 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -120,8 +120,7 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, /// return %arg0: !quake.veq /// } static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, - func::FuncOp calleeFunc, - StringRef initKernelName) { + func::FuncOp calleeFunc, StringRef initKernelName) { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToEnd(sourceMod.getBody()); @@ -394,7 +393,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // Initializes the veq passed as a parameter // // Then replace the state with - // `quake.get_state "callee.num_qubits_0" "callee.init_state_0"`: + // `quake.get_state @callee.num_qubits_0 @callee.init_state_0`: // // clang-format off // ``` @@ -423,7 +422,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // clang-format off // ``` // func.func @caller() { - // %0 = quake.get_state "callee.num_qubits_0" "callee.init_state_0" : !cc.ptr + // %0 = quake.get_state @callee.num_qubits_0 @callee.init_state_0 : !cc.ptr // %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 // %2 = quake.alloca !quake.veq[%1 : i64] // %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq @@ -480,15 +479,17 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto code = cudaq::get_quake_by_name(calleeName, /*throwException=*/false); assert(!code.empty() && "Quake code not found for callee"); auto fromModule = parseSourceString(code, ctx); - + auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); assert(calleeFunc && "callee func is missing"); static unsigned counter = 0; auto initName = calleeName + ".init_" + std::to_string(counter); - auto numQubitsName = calleeName + ".num_qubits_" + std::to_string(counter++); + auto numQubitsName = + calleeName + ".num_qubits_" + std::to_string(counter++); auto initKernelName = cudaq::runtime::cudaqGenPrefixName + initName; - auto numQubitsKernelName = cudaq::runtime::cudaqGenPrefixName + numQubitsName; + auto numQubitsKernelName = + cudaq::runtime::cudaqGenPrefixName + numQubitsName; // Create `callee.init_N` and `callee.num_qubits_N` used for // `quake.get_state` replacement later in ReplaceStateWithKernel pass @@ -497,9 +498,11 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // Create and register names for new `init` and `num_qubits` kernels so // ArgumentConverters can keep a string reference to a valid memory. - auto registeredInitName = cudaq::registry::cudaqRegisterAuxKernelName(initName.c_str()); - auto registeredNumQubitsName = cudaq::registry::cudaqRegisterAuxKernelName(numQubitsName.c_str()); - + auto ®isteredInitName = + cudaq::opt::ArgumentConverter::registerKernelName(initName); + auto ®isteredNumQubitsName = + cudaq::opt::ArgumentConverter::registerKernelName(numQubitsName); + // Create substitutions for `callee.init_N` and `callee.num_qubits_N`. converter.genCallee(registeredInitName, calleeArgs); converter.genCallee(registeredNumQubitsName, calleeArgs); @@ -818,29 +821,3 @@ void cudaq::opt::ArgumentConverter::gen_drop_front( } gen(partialArgs); } - -std::pair, SmallVector> -cudaq::opt::ArgumentConverter::collectAllSubstitutions() { - SmallVector kernels; - SmallVector substs; - - std::function collect = - [&kernels, &substs, &collect](ArgumentConverter &con) { - auto name = con.getKernelName(); - std::string kernName = cudaq::runtime::cudaqGenPrefixName + name.str(); - kernels.emplace_back(kernName); - - { - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << con.getSubstitutionModule(); - substs.emplace_back(substBuff); - } - - for (auto &calleeCon : con.getCalleeConverters()) - collect(calleeCon); - }; - - collect(*this); - return {kernels, substs}; -} diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 25f033efb78..d07a5e5e989 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -13,9 +13,9 @@ #include "cudaq/qis/state.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Types.h" +#include #include #include -#include namespace cudaq::opt { @@ -53,9 +53,7 @@ class ArgumentConverter { mlir::StringRef getKernelName() { return kernelName; } void genCallee(mlir::StringRef calleeName, std::vector &args) { - // auto converter = ArgumentConverter(calleeName, sourceModule); - // converter.gen(args); - auto converter = calleeConverters.emplace_back(ArgumentConverter(calleeName, sourceModule)); + auto &converter = calleeConverters.emplace_back(calleeName, sourceModule); converter.gen(args); } @@ -63,10 +61,15 @@ class ArgumentConverter { return calleeConverters; } - std::pair, mlir::SmallVector> - collectAllSubstitutions(); + static const std::string ®isterKernelName(const std::string &kernelName) { + return kernelNameRegistry.emplace_back(kernelName); + } private: + // Note: use std::list to make sure we always return valid references + // when registering new kernel names. + static std::list kernelNameRegistry; + mlir::ModuleOp sourceModule; mlir::ModuleOp substModule; mlir::OpBuilder builder; diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 4b60e4a537d..5cdd07d1646 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -450,31 +450,41 @@ class BaseRemoteRESTQPU : public cudaq::QPU { opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); - // For quantum hardware, we traverse the tree of ArgumentConverters - // for the call tree of states and collect substitutions for all calls. + // For quantum devices, we've created a tree of ArgumentConverters + // with nodes corresponding to `init` and `num_qubits` functions + // created from a kernel that generated the state argument. + // Traverse the tree and collect substitutions for all those + // functions. + + // Store kernel and substitution strings on the stack. + // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; std::function collect = - [&kernels, &substs, &collect](opt::ArgumentConverter &con) { - auto name = con.getKernelName(); - std::string kernName = cudaq::runtime::cudaqGenPrefixName + name.str(); - kernels.emplace_back(kernName); - - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << con.getSubstitutionModule(); - substs.emplace_back(substBuff); - - for (auto &calleeCon : con.getCalleeConverters()) - collect(calleeCon); - }; + [&kernels, &substs, &collect](opt::ArgumentConverter &con) { + { + auto name = con.getKernelName(); + std::string kernName = + cudaq::runtime::cudaqGenPrefixName + name.str(); + kernels.emplace_back(kernName); + } + { + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << con.getSubstitutionModule(); + substs.emplace_back(substBuff); + } + for (auto &calleeCon : con.getCalleeConverters()) + collect(calleeCon); + }; collect(argCon); - mlir::SmallVector funcNames{kernels.begin(), kernels.end()}; - mlir::SmallVector substitutions{substs.begin(), substs.end()}; - + mlir::SmallVector funcNames{kernels.begin(), + kernels.end()}; + mlir::SmallVector substitutions{substs.begin(), + substs.end()}; pm.addNestedPass( cudaq::opt::createArgumentSynthesisPass(funcNames, substitutions)); pm.addPass(opt::createDeleteStates()); diff --git a/runtime/cudaq/cudaq.cpp b/runtime/cudaq/cudaq.cpp index 07da9f60048..5dbdf4ee8cf 100644 --- a/runtime/cudaq/cudaq.cpp +++ b/runtime/cudaq/cudaq.cpp @@ -243,7 +243,6 @@ void cudaq::registry::__cudaq_deviceCodeHolderAdd(const char *key, //===----------------------------------------------------------------------===// static std::vector kernelRegistry; -static std::list auxKernelRegistry; static std::map argsCreators; static std::map lambdaNames; @@ -254,11 +253,6 @@ void cudaq::registry::cudaqRegisterKernelName(const char *kernelName) { kernelRegistry.emplace_back(kernelName); } -const char * cudaq::registry::cudaqRegisterAuxKernelName(const char *kernelName) { - std::unique_lock lock(globalRegistryMutex); - return auxKernelRegistry.emplace_back(kernelName).c_str(); -} - void cudaq::registry::__cudaq_registerLinkableKernel(void *hostSideFunc, const char *kernelName, void *deviceSideFunc) { diff --git a/runtime/cudaq/utils/registry.h b/runtime/cudaq/utils/registry.h index cb6a7c0c146..2748afe17e7 100644 --- a/runtime/cudaq/utils/registry.h +++ b/runtime/cudaq/utils/registry.h @@ -13,7 +13,6 @@ namespace cudaq::registry { extern "C" { void __cudaq_deviceCodeHolderAdd(const char *, const char *); void cudaqRegisterKernelName(const char *); -const char* cudaqRegisterAuxKernelName(const char *); void cudaqRegisterArgsCreator(const char *, char *); void cudaqRegisterLambdaName(const char *, const char *); diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 1ecf85c0404..84c8d425001 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -463,7 +463,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.get_state "__nvqpp__mlirgen__init.num_qubits_0" "__nvqpp__mlirgen__init.init_0" : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init.num_qubits_0 @__nvqpp__mlirgen__init.init_0 : !cc.ptr // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: @@ -487,7 +487,8 @@ void test_quantum_state(mlir::MLIRContext *ctx) { " %2 = quake.alloca !quake.veq[%arg0 : i64]\n" " %3 = quake.extract_ref %2[0] : (!quake.veq) -> !quake.ref\n" " quake.x %3 : (!quake.ref) -> ()\n" - " %measOut = quake.mz %3 name \"q0\" : (!quake.ref) -> !quake.measure\n" + " %measOut = quake.mz %3 name \"q0\" : (!quake.ref) -> " + "!quake.measure\n" " %4 = quake.discriminate %measOut : (!quake.measure) -> i1\n" " cc.if(%4) {\n" " %6 = quake.alloca !quake.veq[%arg0 : i64]\n" @@ -576,7 +577,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.get_state "__nvqpp__mlirgen__init.num_qubits_1" "__nvqpp__mlirgen__init.init_1" : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init.num_qubits_1 @__nvqpp__mlirgen__init.init_1 : !cc.ptr // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: diff --git a/test/Quake/arg_subst-5.txt b/test/Quake/arg_subst-5.txt index 5b43881daf5..5020e7fe096 100644 --- a/test/Quake/arg_subst-5.txt +++ b/test/Quake/arg_subst-5.txt @@ -7,5 +7,5 @@ // ========================================================================== // cc.arg_subst[0] { - %0 = quake.get_state "num_qubits" "init" : !cc.ptr + %0 = quake.get_state @num_qubits @init : !cc.ptr } diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index 0a3fa2d653f..8df6c5e1433 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -163,7 +163,7 @@ func.func @num_qubits(%arg0: i64) -> i64 { } // CHECK-LABEL: func.func @testy5() { -// CHECK: %[[VAL_2:.*]] = quake.get_state "num_qubits" "init" : !cc.ptr +// CHECK: %[[VAL_2:.*]] = quake.get_state @num_qubits @init : !cc.ptr // CHECK: %[[VAL_3:.*]] = quake.get_number_of_qubits %[[VAL_2]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_5:.*]] = quake.init_state %[[VAL_4]], %[[VAL_2]] : (!quake.veq, !cc.ptr) -> !quake.veq diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 15bc9cab104..24bdd787216 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -23,7 +23,7 @@ module { } func.func @caller0() { - %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr + %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq @@ -38,7 +38,7 @@ module { // CHECK: } func.func @caller1(%arg0: i64) { - %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr + %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr %2 = quake.alloca !quake.veq[%arg0 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq return @@ -51,7 +51,7 @@ module { // CHECK: } func.func @caller2() -> i64 { - %0 = quake.get_state "callee.num_qubits_0" "callee.init_0" : !cc.ptr + %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 return %1: i64 } From c0d9ae9e51a3ff342f99fab2dc25935be05a2b26 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 12 Feb 2025 22:08:09 -0800 Subject: [PATCH 30/54] Cleanup Signed-off-by: Anna Gringauze --- python/runtime/cudaq/platform/py_alt_launch_kernel.cpp | 2 +- runtime/common/ArgumentConversion.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index b67c092660e..6b20a7bdfbb 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -535,7 +535,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, auto isLocalSimulator = platform.is_simulator() && !platform.is_emulated(); auto isSimulator = isLocalSimulator || isRemoteSimulator; - opt::ArgumentConverter argCon(name, unwrap(module)); + cudaq::opt::ArgumentConverter argCon(name, unwrap(module)); argCon.gen(runtimeArgs.getArgs()); std::string kernName = cudaq::runtime::cudaqGenPrefixName + name; SmallVector kernels = {kernName}; diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index ceccc2dc24e..421faa48c3b 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -20,8 +20,6 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/Parser/Parser.h" -#include - using namespace mlir; template From 1ecd8cc0168e2f454efb200da013a37e20ac347c Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 13 Feb 2025 14:09:49 -0800 Subject: [PATCH 31/54] Address CR comments Signed-off-by: Anna Gringauze --- .../cudaq/Optimizer/Dialect/Quake/QuakeOps.td | 19 +-- include/cudaq/Optimizer/Transforms/Passes.td | 42 +++++- runtime/common/ArgumentConversion.cpp | 19 ++- runtime/cudaq/algorithms/get_state.h | 13 +- runtime/test/FakeQuantumState.h | 37 +++-- runtime/test/FakeSimulationState.h | 1 - runtime/test/test_argument_conversion.cpp | 131 +++++++++++++++++- test/Quake/replace_state_with_kernel.qke | 2 +- 8 files changed, 216 insertions(+), 48 deletions(-) diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index 65730b84f29..6bb5e985092 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -1469,14 +1469,17 @@ def QuakeOp_GetStateOp : QuakeOp<"get_state", [Pure] > { This operation is created by argument synthesis of state pointer arguments for quantum devices. - It takes two kernel names as ASCIIZ string literals: - - "num_qubits" for determining the size of the allocation to initialize - - "init" for initializing the state the same way as the original kernel - passed to `cudaq::get_state`) as ASCIIZ string literal - - And returns the quantum state of the original kernel passed to - `cudaq::get_state`. The operation is replaced by calls to the kernels with - the provided names in `ReplaceStateByKernel` pass. + It takes two kernel names as symbol references: + - @num_qubits for determining the size of the allocation to initialize + - @init for initializing the state the same way as the original kernel + passed to `cudaq::get_state`. + + This operation will return of the original kernel passed to + `cudaq::get_state`. `cudaq::get_state`. + + The operation may be replaced by calls to the @num_qubits and @init calls, + which will reproduce the specified state in the `ReplaceStateByKernel` + pass. ```mlir %0 = quake.get_state @num_qubits @init : !cc.ptr diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 9afc56a4083..687eee5120d 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -828,14 +828,44 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func let summary = "Replace `quake.init_state` instructions with call to the kernel generating the state"; let description = [{ - Argument synthesis for state pointers for quantum devices substitutes state - argument by a new state created from `__nvqpp_cudaq_state_get` intrinsic, which - in turn accepts the name for the synthesized kernel that generated the state. + This optimization replaces `quake.init_state`, `quake.get_number_of_qubits`, + and `quake.get_state` operations. + + Before this optimization, argument synthesis for state pointers for quantum + devices substituted a new state created from the `quake.get_state` operation + for the state argument. + + The `quake.get_state` operation accepts symbols for the synthesized kernels + `num_qubits` and `init` that argument synthesis generated from the original + kernel call that generated the state, e.g., the `cudaq::get_state` call that + refers to the result of a specific quantum kernel being invoked with a set + of parameters + + For example, for the user code: + ``` + state = cudaq::get_state(callee, args) + caller(state) + ``` + + The argument synthesis generated the following new kernels from the `callee` + and synthesized them to substitute their arguments with `args`: + ``` + func.func @callee_init(qubits: !quake.veq, arguments) -> !quake.veq + func.func @callee_num_qubits(arguments) -> i64 + ``` + + The argument synthesis also substituted the state argument in the `caller` + with: + ``` + quake.get_state @callee_num_qubits @callee_init: !cc.ptr + ``` - This optimization completes the replacement of `quake.init_state` instruction by: + This optimization performs the replacements for the the following operations + that use a state produced by `quake.get_state @num_qubits @init` operation: - - Replace `quake.init_state` by a call that `get_state` call refers to. - - Remove all unneeded instructions. + - Replace `quake.get_number_of_qubits` operation by the @num_qubits call + - Replace `quake.init_state` operation by the @init call + - Clean up unused `quake.get_state` operation For example: diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 421faa48c3b..664d10549f9 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -100,7 +100,8 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp substMod, llvm::DataLayout &); /// Create callee.init_N that initializes the state -/// Callee: +/// Callee (the kernel captured by state): +// clang-format off /// func.func @__nvqpp__mlirgen__callee(%arg0: i64) { /// %0 = cc.alloca i64 /// cc.store %arg0, %0 : !cc.ptr @@ -117,6 +118,7 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, /// quake.x %1 : (f64, !quake.ref) -> () /// return %arg0: !quake.veq /// } +// clang-format on static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, func::FuncOp calleeFunc, StringRef initKernelName) { OpBuilder::InsertionGuard guard(builder); @@ -139,9 +141,8 @@ static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, auto *entryBlock = &initFunc.getRegion().front(); newBuilder.setInsertionPointToStart(entryBlock); - auto intType = newBuilder.getI64Type(); - Value zero = newBuilder.create(loc, 0, intType); - Value one = newBuilder.create(loc, 1, intType); + Value zero = newBuilder.create(loc, 0, 64); + Value one = newBuilder.create(loc, 1, 64); Value begin = zero; auto argPos = initFunc.getArguments().size(); @@ -149,10 +150,10 @@ static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, // Detect errors in kernel passed to get_state. std::function processInner = [&](Block &block) { for (auto &op : block) { - for (auto ®ion : op.getRegions()) { + for (auto ®ion : op.getRegions()) for (auto &b : region) processInner(b); - } + // Don't allow returns in inner scopes if (auto retOp = dyn_cast(&op)) calleeFunc.emitError("Encountered return in inner scope in a kernel " @@ -222,7 +223,10 @@ static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, } /// Create callee.num_qubits_N that calculates the number of qubits to -/// initialize Callee: func.func @callee(%arg0: i64) { +/// initialize the state +/// Callee: (the kernel captured by state): +// clang-format off +/// func.func @callee(%arg0: i64) { /// %0 = cc.alloca i64 /// cc.store %arg0, %0 : !cc.ptr /// %1 = cc.load %0 : !cc.ptr @@ -239,6 +243,7 @@ static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, /// %1 = cc.load %0 : !cc.ptr /// return %1 : i64 /// } +// clang-format on static void createNumQubitsFunc(OpBuilder &builder, ModuleOp sourceMod, func::FuncOp calleeFunc, StringRef numQubitsKernelName) { diff --git a/runtime/cudaq/algorithms/get_state.h b/runtime/cudaq/algorithms/get_state.h index 204de442934..dacb2ef2793 100644 --- a/runtime/cudaq/algorithms/get_state.h +++ b/runtime/cudaq/algorithms/get_state.h @@ -119,17 +119,16 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { return state(new RemoteSimulationState(std::forward(kernel), std::forward(args)...)); } -#endif +#else #if defined(CUDAQ_QUANTUM_DEVICE) // Store kernel name and arguments for quantum states. - if (!cudaq::get_quake_by_name(cudaq::getKernelName(kernel), false).empty()) { + if (!cudaq::get_quake_by_name(cudaq::getKernelName(kernel), false).empty()) return state(new QuantumState(std::forward(kernel), std::forward(args)...)); - } else { - throw std::runtime_error( - "cudaq::state* argument synthesis is not supported for quantum hardware" - "for c-like functions, use class kernels instead"); - } + throw std::runtime_error( + "cudaq::state* argument synthesis is not supported for quantum hardware" + "for c-like functions, use class kernels instead"); +#endif #endif return details::extractState([&]() mutable { cudaq::invokeKernel(std::forward(kernel), diff --git a/runtime/test/FakeQuantumState.h b/runtime/test/FakeQuantumState.h index 2f20babd955..87a177a6c21 100644 --- a/runtime/test/FakeQuantumState.h +++ b/runtime/test/FakeQuantumState.h @@ -10,14 +10,14 @@ #include #include -#include - /// @cond DO_NOT_DOCUMENT /// @brief Fake simulation state to use in tests. -class FakeQuantumState : public cudaq::SimulationState { +class FakeDeviceState : public cudaq::SimulationState { private: std::string kernelName; std::vector args; + std::size_t size = 0; + void *data = 0; public: virtual std::unique_ptr @@ -26,11 +26,11 @@ class FakeQuantumState : public cudaq::SimulationState { throw std::runtime_error("Not implemented"); } - FakeQuantumState() = default; - FakeQuantumState(const std::string &kernelName, - const std::vector args) + FakeDeviceState() = default; + FakeDeviceState(std::size_t size, void *data) : size(size), data(data) {} + FakeDeviceState(const std::string &kernelName, const std::vector args) : kernelName(kernelName), args(args) {} - FakeQuantumState(const FakeQuantumState &other) + FakeDeviceState(const FakeDeviceState &other) : kernelName(other.kernelName), args(other.args) {} virtual std::unique_ptr @@ -38,7 +38,7 @@ class FakeQuantumState : public cudaq::SimulationState { throw std::runtime_error("Not implemented"); } - virtual bool hasData() const override { return false; } + virtual bool hasData() const override { return data != nullptr; } virtual std::optional>> getKernelInfo() const override { @@ -53,9 +53,15 @@ class FakeQuantumState : public cudaq::SimulationState { throw std::runtime_error("Not implemented"); } - virtual std::size_t getNumTensors() const override { return 1; } + virtual std::size_t getNumTensors() const override { + if (hasData()) + return 1; + throw std::runtime_error("Not implemented"); + } virtual std::size_t getNumQubits() const override { + if (hasData()) + return std::countr_zero(size); throw std::runtime_error("Not implemented"); } @@ -78,7 +84,9 @@ class FakeQuantumState : public cudaq::SimulationState { } virtual precision getPrecision() const override { - return cudaq::SimulationState::precision::fp64; + if (hasData()) + return cudaq::SimulationState::precision::fp64; + throw std::runtime_error("Not implemented"); } virtual void destroyState() override {} @@ -86,10 +94,17 @@ class FakeQuantumState : public cudaq::SimulationState { virtual std::complex operator()(std::size_t tensorIdx, const std::vector &indices) override { + if (hasData()) { + assert(tensorIdx == 0); + assert(indices.size() == 1); + return *(static_cast *>(data) + indices[0]); + } throw std::runtime_error("Not implemented"); } virtual std::size_t getNumElements() const override { + if (hasData()) + return size; throw std::runtime_error("Not implemented"); } @@ -107,6 +122,6 @@ class FakeQuantumState : public cudaq::SimulationState { throw std::runtime_error("Not implemented"); } - virtual ~FakeQuantumState() override {} + virtual ~FakeDeviceState() override {} }; /// @endcond diff --git a/runtime/test/FakeSimulationState.h b/runtime/test/FakeSimulationState.h index 49667e481c7..53e2b0bf936 100644 --- a/runtime/test/FakeSimulationState.h +++ b/runtime/test/FakeSimulationState.h @@ -22,7 +22,6 @@ class FakeSimulationState : public cudaq::SimulationState { createFromSizeAndPtr(std::size_t size, void *data, std::size_t dataType) override { throw std::runtime_error("Not implemented"); - return std::make_unique(size, data); } FakeSimulationState() = default; diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 84c8d425001..795f3947dc7 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -11,16 +11,135 @@ // RUN: test_argument_conversion | FileCheck %s -#include "FakeQuantumState.h" -#include "FakeSimulationState.h" +// #include "FakeQuantumState.h" +// #include "FakeSimulationState.h" #include "common/ArgumentConversion.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" #include "cudaq/Optimizer/InitAllDialects.h" #include "cudaq/qis/pauli_word.h" +#include "cudaq/qis/state.h" #include "mlir/Parser/Parser.h" +#include +#include #include +/// @cond DO_NOT_DOCUMENT +/// @brief Fake simulation state to use in tests. +class FakeDeviceState : public cudaq::SimulationState { +private: + std::string kernelName; + std::vector args; + std::size_t size = 0; + void *data = 0; + +public: + virtual std::unique_ptr + createFromSizeAndPtr(std::size_t size, void *data, + std::size_t dataType) override { + throw std::runtime_error("Not implemented"); + } + + FakeDeviceState() = default; + FakeDeviceState(std::size_t size, void *data) : size(size), data(data) {} + FakeDeviceState(const std::string &kernelName, const std::vector args) + : kernelName(kernelName), args(args) {} + FakeDeviceState(const FakeDeviceState &other) + : kernelName(other.kernelName), args(other.args) {} + + virtual std::unique_ptr + createFromData(const cudaq::state_data &data) override { + throw std::runtime_error("Not implemented"); + } + + virtual bool hasData() const override { return data != nullptr; } + + virtual std::optional>> + getKernelInfo() const override { + return std::make_pair(kernelName, args); + } + + virtual Tensor getTensor(std::size_t tensorIdx = 0) const override { + throw std::runtime_error("Not implemented"); + } + + virtual std::vector getTensors() const override { + throw std::runtime_error("Not implemented"); + } + + virtual std::size_t getNumTensors() const override { + if (hasData()) + return 1; + throw std::runtime_error("Not implemented"); + } + + virtual std::size_t getNumQubits() const override { + if (hasData()) + return std::countr_zero(size); + throw std::runtime_error("Not implemented"); + } + + virtual std::complex overlap(const SimulationState &other) override { + throw std::runtime_error("Not implemented"); + } + + virtual std::complex + getAmplitude(const std::vector &basisState) override { + throw std::runtime_error("Not implemented"); + } + + virtual std::vector> + getAmplitudes(const std::vector> &basisStates) override { + throw std::runtime_error("Not implemented"); + } + + virtual void dump(std::ostream &os) const override { + throw std::runtime_error("Not implemented"); + } + + virtual precision getPrecision() const override { + if (hasData()) + return cudaq::SimulationState::precision::fp64; + throw std::runtime_error("Not implemented"); + } + + virtual void destroyState() override {} + + virtual std::complex + operator()(std::size_t tensorIdx, + const std::vector &indices) override { + if (hasData()) { + assert(tensorIdx == 0); + assert(indices.size() == 1); + return *(static_cast *>(data) + indices[0]); + } + throw std::runtime_error("Not implemented"); + } + + virtual std::size_t getNumElements() const override { + if (hasData()) + return size; + throw std::runtime_error("Not implemented"); + } + + virtual bool isDeviceData() const override { return false; } + + virtual bool isArrayLike() const override { return true; } + + virtual void toHost(std::complex *clientAllocatedData, + std::size_t numElements) const override { + throw std::runtime_error("Not implemented"); + } + + virtual void toHost(std::complex *clientAllocatedData, + std::size_t numElements) const override { + throw std::runtime_error("Not implemented"); + } + + virtual ~FakeDeviceState() override {} +}; +/// @endcond + extern "C" void __cudaq_deviceCodeHolderAdd(const char *, const char *); void dumpSubstitutionModules(cudaq::opt::ArgumentConverter &ab) { @@ -383,7 +502,7 @@ void test_simulation_state(mlir::MLIRContext *ctx) { { std::vector> data{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; - auto x = cudaq::state(new FakeSimulationState(data.size(), data.data())); + auto x = cudaq::state(new FakeDeviceState(data.size(), data.data())); std::vector v = {static_cast(&x)}; doSimpleTest(ctx, "!cc.ptr", v); } @@ -406,7 +525,6 @@ void test_quantum_state(mlir::MLIRContext *ctx) { { auto kernel = "init"; auto kernelCode = - "" "func.func private @__nvqpp__mlirgen__init(%arg0: i64) {\n" " %0 = quake.alloca !quake.veq[%arg0 : i64]\n" " %1 = quake.extract_ref %0[0] : (!quake.veq) -> !quake.ref\n" @@ -417,7 +535,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::int64_t n = 2; std::vector a = {static_cast(&n)}; - auto x = cudaq::state(new FakeQuantumState(kernel, a)); + auto x = cudaq::state(new FakeDeviceState(kernel, a)); std::vector v = {static_cast(&x)}; doSimpleTest(ctx, "!cc.ptr", v, kernelCode); } @@ -482,7 +600,6 @@ void test_quantum_state(mlir::MLIRContext *ctx) { { auto kernel = "init"; auto kernelCode = - "" " func.func private @__nvqpp__mlirgen__init(%arg0: i64) {\n" " %2 = quake.alloca !quake.veq[%arg0 : i64]\n" " %3 = quake.extract_ref %2[0] : (!quake.veq) -> !quake.ref\n" @@ -504,7 +621,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::int64_t n = 2; std::vector a = {static_cast(&n)}; - auto x = cudaq::state(new FakeQuantumState(kernel, a)); + auto x = cudaq::state(new FakeDeviceState(kernel, a)); std::vector v = {static_cast(&x)}; doSimpleTest(ctx, "!cc.ptr", v, kernelCode); } diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 24bdd787216..58b474a65b0 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -60,4 +60,4 @@ module { // CHECK: %[[VAL_0:.*]] = call @callee.num_qubits_0() : () -> i64 // CHECK: return %[[VAL_0]] : i64 // CHECK: } -} \ No newline at end of file +} From de387fce8aefb617f460a92c843175368bc8940b Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 14 Feb 2025 13:11:48 -0800 Subject: [PATCH 32/54] Address more CR comments Signed-off-by: Anna Gringauze --- .../cudaq/Optimizer/Dialect/Quake/QuakeOps.td | 4 +- include/cudaq/Optimizer/Transforms/Passes.td | 19 +-- .../Transforms/ReplaceStateWithKernel.cpp | 2 +- runtime/common/ArgumentConversion.cpp | 26 ++-- runtime/common/ArgumentConversion.h | 7 +- runtime/common/BaseRemoteRESTQPU.h | 1 + runtime/cudaq/algorithms/get_state.h | 15 ++- .../default/rest/helpers/braket/braket.yml | 2 + .../rest/helpers/infleqtion/infleqtion.yml | 2 + .../cudaq/platform/fermioniq/fermioniq.yml | 2 + runtime/test/FakeQuantumState.h | 127 ------------------ runtime/test/FakeSimulationState.h | 101 -------------- runtime/test/test_argument_conversion.cpp | 8 +- .../execution/qvector_init_from_state.cpp | 47 +------ .../qvector_init_from_state_pauli.cpp | 78 +++++++++++ 15 files changed, 139 insertions(+), 302 deletions(-) delete mode 100644 runtime/test/FakeQuantumState.h delete mode 100644 runtime/test/FakeSimulationState.h create mode 100644 targettests/execution/qvector_init_from_state_pauli.cpp diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index 6bb5e985092..cfb16bd100c 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -1475,8 +1475,8 @@ def QuakeOp_GetStateOp : QuakeOp<"get_state", [Pure] > { passed to `cudaq::get_state`. This operation will return of the original kernel passed to - `cudaq::get_state`. `cudaq::get_state`. - + `cudaq::get_state`. `cudaq::get_state`. + The operation may be replaced by calls to the @num_qubits and @init calls, which will reproduce the specified state in the `ReplaceStateByKernel` pass. diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index f42559cb37d..1351b3bdf17 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -855,15 +855,16 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func let summary = "Replace `quake.init_state` instructions with call to the kernel generating the state"; let description = [{ - This optimization replaces `quake.init_state`, `quake.get_number_of_qubits`, - and `quake.get_state` operations. + This optimization replaces `quake.init_state`, `quake.get_number_of_qubits`, + and `quake.get_state` operations invoked on state pointers during argument + synthesis for quantum devices. Before this optimization, argument synthesis for state pointers for quantum - devices substituted a new state created from the `quake.get_state` operation - for the state argument. - + devices substituted a state created from the `quake.get_state` operation + for the state argument. + The `quake.get_state` operation accepts symbols for the synthesized kernels - `num_qubits` and `init` that argument synthesis generated from the original + `@num_qubits` and `@init` that argument synthesis generated from the original kernel call that generated the state, e.g., the `cudaq::get_state` call that refers to the result of a specific quantum kernel being invoked with a set of parameters @@ -877,8 +878,8 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func The argument synthesis generated the following new kernels from the `callee` and synthesized them to substitute their arguments with `args`: ``` - func.func @callee_init(qubits: !quake.veq, arguments) -> !quake.veq func.func @callee_num_qubits(arguments) -> i64 + func.func @callee_init(qubits: !quake.veq, arguments) -> !quake.veq ``` The argument synthesis also substituted the state argument in the `caller` @@ -890,8 +891,8 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func This optimization performs the replacements for the the following operations that use a state produced by `quake.get_state @num_qubits @init` operation: - - Replace `quake.get_number_of_qubits` operation by the @num_qubits call - - Replace `quake.init_state` operation by the @init call + - Replace `quake.get_number_of_qubits` operation by the call to `@num_qubits` + - Replace `quake.init_state` operation by the call to `@init` - Clean up unused `quake.get_state` operation For example: diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index 872e12c3f32..d385f061738 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -30,7 +30,7 @@ using namespace mlir; namespace { // clang-format off -/// Replace `quake.get_number_of_qubits` by a call to a a function +/// Replace `quake.get_number_of_qubits` by a call to a function /// that computes the number of qubits for a state. /// /// ``` diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 664d10549f9..6078fb45125 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -334,7 +334,10 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // If the state has amplitude data, we materialize the data as a state // vector and create a new state from it. - // TODO: how to handle density matrices? Should we just inline calls? + // TODO: add an option to use the kernel info if available, i.e. for + // remote simulators + // TODO: add an option of storing the kernel info on simulators if + // preferred i.e. to support synthesis of density matrices. if (simState->hasData()) { // The call below might cause lazy execution of the state kernel. // TODO: For lazy execution scenario on remote simulators, we have the @@ -384,7 +387,8 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, arrSize); } - // For quantum hardware, we aim at replacing states with calls to kernels + // Otherwise (ie quantum hardware, where getting the amplitude data is not + // efficient) we aim at replacing states with calls to kernels (`callees`) // that generated them. This is done in 2 stages: // // 1. Replace state by quake.get_state instruction during argument conversion: @@ -392,11 +396,11 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // Create two functions: // - callee.num_qubits_N // Calculates the number of qubits needed for the veq allocation - // - callee.init_state_N + // - callee.init_N // Initializes the veq passed as a parameter // // Then replace the state with - // `quake.get_state @callee.num_qubits_0 @callee.init_state_0`: + // `quake.get_state @callee.num_qubits_0 @callee.init_0`: // // clang-format off // ``` @@ -436,7 +440,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // return %arg0 : i64 // } // - // func.func private @callee.init_state_0(%arg0: i64, %arg1: !quake.veq) { + // func.func private @callee.init_0(%arg0: i64, %arg1: !quake.veq) { // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref // quake.x %1 : (f64, !quake.ref) -> () // return @@ -444,15 +448,16 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // ``` // clang-format on // - // 2. Replace the `quake.get_state` ops with calls to the generated functions - // synthesized with the arguments used to create the state: + // 2. Replace the `quake.get_state` and ops that use its state with calls to + // the generated functions, synthesized with the arguments used to create the + // original state: // // After ReplaceStateWithKernel pass: // // clang-format off // ``` // func.func @caller() { - // %1 = call 2callee.num_qubits_0() : () -> i64 + // %1 = call callee.num_qubits_0() : () -> i64 // %2 = quake.alloca !quake.veq[%1 : i64] // %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq // } @@ -506,7 +511,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto ®isteredNumQubitsName = cudaq::opt::ArgumentConverter::registerKernelName(numQubitsName); - // Create substitutions for `callee.init_N` and `callee.num_qubits_N`. + // Convert arguments for `callee.init_N` and `callee.num_qubits_N`. converter.genCallee(registeredInitName, calleeArgs); converter.genCallee(registeredNumQubitsName, calleeArgs); @@ -691,6 +696,9 @@ Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy, //===----------------------------------------------------------------------===// +std::list cudaq::opt::ArgumentConverter::kernelNameRegistry = + std::list(); + cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, ModuleOp sourceModule) : sourceModule(sourceModule), builder(sourceModule.getContext()), diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index d07a5e5e989..6d2b2135c37 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -66,8 +66,11 @@ class ArgumentConverter { } private: - // Note: use std::list to make sure we always return valid references - // when registering new kernel names. + /// Keeps kernel names created during argument conversion in memory. + /// References to those names are used by the argument converters for + /// those kernels. + /// Note: use std::list to make sure we always return valid references + /// when registering new kernel names. static std::list kernelNameRegistry; mlir::ModuleOp sourceModule; diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 3a45628a498..0763e11304c 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -450,6 +450,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (!rawArgs.empty() || updatedArgs) { mlir::PassManager pm(&context); if (!rawArgs.empty()) { + cudaq::info("Run Argument Synth.\n"); opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); diff --git a/runtime/cudaq/algorithms/get_state.h b/runtime/cudaq/algorithms/get_state.h index dacb2ef2793..caec195715d 100644 --- a/runtime/cudaq/algorithms/get_state.h +++ b/runtime/cudaq/algorithms/get_state.h @@ -120,14 +120,25 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { std::forward(args)...)); } #else -#if defined(CUDAQ_QUANTUM_DEVICE) +#if defined(CUDAQ_QUANTUM_DEVICE) && !defined(CUDAQ_LIBRARY_MODE) // Store kernel name and arguments for quantum states. if (!cudaq::get_quake_by_name(cudaq::getKernelName(kernel), false).empty()) return state(new QuantumState(std::forward(kernel), std::forward(args)...)); throw std::runtime_error( "cudaq::state* argument synthesis is not supported for quantum hardware" - "for c-like functions, use class kernels instead"); + " for c-like functions, use class kernels instead"); +#else +#if defined(CUDAQ_QUANTUM_DEVICE) + // Kernel builder is MLIR-based kernel. + if constexpr (has_name::value) + return state(new QuantumState(std::forward(kernel), + std::forward(args)...)); + + throw std::runtime_error( + "cudaq::state* argument synthesis is not supported for quantum hardware" + " for c-like functions in library mode"); +#endif #endif #endif return details::extractState([&]() mutable { diff --git a/runtime/cudaq/platform/default/rest/helpers/braket/braket.yml b/runtime/cudaq/platform/default/rest/helpers/braket/braket.yml index 7e2b573f65c..f409e4fa298 100644 --- a/runtime/cudaq/platform/default/rest/helpers/braket/braket.yml +++ b/runtime/cudaq/platform/default/rest/helpers/braket/braket.yml @@ -16,6 +16,8 @@ config: link-libs: ["-lcudaq-rest-qpu"] # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Define the lowering pipeline platform-lowering-config: "classical-optimization-pipeline,globalize-array-values,func.func(state-prep),unitary-synthesis,canonicalize,apply-op-specialization,aggressive-early-inlining,classical-optimization-pipeline,func.func(lower-to-cfg,canonicalize,multicontrol-decomposition),decomposition{enable-patterns=SToR1,TToR1,R1ToU3,U3ToRotations,CHToCX,CCZToCX,CRzToCX,CRyToCX,CRxToCX,CR1ToCX,RxAdjToRx,RyAdjToRy,RzAdjToRz},quake-to-cc-prep,func.func(expand-control-veqs,combine-quantum-alloc,canonicalize,combine-measurements),symbol-dce" # Tell the rest-qpu that we are generating OpenQASM 2.0. diff --git a/runtime/cudaq/platform/default/rest/helpers/infleqtion/infleqtion.yml b/runtime/cudaq/platform/default/rest/helpers/infleqtion/infleqtion.yml index a9fa9484526..5f9c82b022e 100644 --- a/runtime/cudaq/platform/default/rest/helpers/infleqtion/infleqtion.yml +++ b/runtime/cudaq/platform/default/rest/helpers/infleqtion/infleqtion.yml @@ -16,6 +16,8 @@ config: link-libs: ["-lcudaq-rest-qpu"] # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Define the lowering pipeline platform-lowering-config: "classical-optimization-pipeline,globalize-array-values,func.func(state-prep),unitary-synthesis,canonicalize,apply-op-specialization,aggressive-early-inlining,classical-optimization-pipeline,func.func(lower-to-cfg),canonicalize,func.func(multicontrol-decomposition),decomposition{enable-patterns=SToR1,TToR1,CCZToCX,CRyToCX,CRxToCX,R1AdjToR1,RxAdjToRx,RyAdjToRy,RzAdjToRz},quake-to-cc-prep,func.func(memtoreg{quantum=0}),symbol-dce" # Tell the rest-qpu that we are generating OpenQASM 2.0. diff --git a/runtime/cudaq/platform/fermioniq/fermioniq.yml b/runtime/cudaq/platform/fermioniq/fermioniq.yml index ec87efd03f4..eed0959caaf 100644 --- a/runtime/cudaq/platform/fermioniq/fermioniq.yml +++ b/runtime/cudaq/platform/fermioniq/fermioniq.yml @@ -13,6 +13,8 @@ config: platform-qpu: fermioniq # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the fermioniq-qpu library to the link list link-libs: ["-lcudaq-fermioniq-qpu"] # Library mode is only for simulators, physical backends must turn this off diff --git a/runtime/test/FakeQuantumState.h b/runtime/test/FakeQuantumState.h deleted file mode 100644 index 87a177a6c21..00000000000 --- a/runtime/test/FakeQuantumState.h +++ /dev/null @@ -1,127 +0,0 @@ -/****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#include "cudaq/qis/state.h" -#include -#include - -/// @cond DO_NOT_DOCUMENT -/// @brief Fake simulation state to use in tests. -class FakeDeviceState : public cudaq::SimulationState { -private: - std::string kernelName; - std::vector args; - std::size_t size = 0; - void *data = 0; - -public: - virtual std::unique_ptr - createFromSizeAndPtr(std::size_t size, void *data, - std::size_t dataType) override { - throw std::runtime_error("Not implemented"); - } - - FakeDeviceState() = default; - FakeDeviceState(std::size_t size, void *data) : size(size), data(data) {} - FakeDeviceState(const std::string &kernelName, const std::vector args) - : kernelName(kernelName), args(args) {} - FakeDeviceState(const FakeDeviceState &other) - : kernelName(other.kernelName), args(other.args) {} - - virtual std::unique_ptr - createFromData(const cudaq::state_data &data) override { - throw std::runtime_error("Not implemented"); - } - - virtual bool hasData() const override { return data != nullptr; } - - virtual std::optional>> - getKernelInfo() const override { - return std::make_pair(kernelName, args); - } - - virtual Tensor getTensor(std::size_t tensorIdx = 0) const override { - throw std::runtime_error("Not implemented"); - } - - virtual std::vector getTensors() const override { - throw std::runtime_error("Not implemented"); - } - - virtual std::size_t getNumTensors() const override { - if (hasData()) - return 1; - throw std::runtime_error("Not implemented"); - } - - virtual std::size_t getNumQubits() const override { - if (hasData()) - return std::countr_zero(size); - throw std::runtime_error("Not implemented"); - } - - virtual std::complex overlap(const SimulationState &other) override { - throw std::runtime_error("Not implemented"); - } - - virtual std::complex - getAmplitude(const std::vector &basisState) override { - throw std::runtime_error("Not implemented"); - } - - virtual std::vector> - getAmplitudes(const std::vector> &basisStates) override { - throw std::runtime_error("Not implemented"); - } - - virtual void dump(std::ostream &os) const override { - throw std::runtime_error("Not implemented"); - } - - virtual precision getPrecision() const override { - if (hasData()) - return cudaq::SimulationState::precision::fp64; - throw std::runtime_error("Not implemented"); - } - - virtual void destroyState() override {} - - virtual std::complex - operator()(std::size_t tensorIdx, - const std::vector &indices) override { - if (hasData()) { - assert(tensorIdx == 0); - assert(indices.size() == 1); - return *(static_cast *>(data) + indices[0]); - } - throw std::runtime_error("Not implemented"); - } - - virtual std::size_t getNumElements() const override { - if (hasData()) - return size; - throw std::runtime_error("Not implemented"); - } - - virtual bool isDeviceData() const override { return false; } - - virtual bool isArrayLike() const override { return true; } - - virtual void toHost(std::complex *clientAllocatedData, - std::size_t numElements) const override { - throw std::runtime_error("Not implemented"); - } - - virtual void toHost(std::complex *clientAllocatedData, - std::size_t numElements) const override { - throw std::runtime_error("Not implemented"); - } - - virtual ~FakeDeviceState() override {} -}; -/// @endcond diff --git a/runtime/test/FakeSimulationState.h b/runtime/test/FakeSimulationState.h deleted file mode 100644 index 53e2b0bf936..00000000000 --- a/runtime/test/FakeSimulationState.h +++ /dev/null @@ -1,101 +0,0 @@ -/****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#include "cudaq/qis/state.h" -#include -#include - -/// @cond DO_NOT_DOCUMENT -/// @brief Fake simulation state to use in tests. -class FakeSimulationState : public cudaq::SimulationState { -private: - std::size_t size = 0; - void *data = 0; - -public: - virtual std::unique_ptr - createFromSizeAndPtr(std::size_t size, void *data, - std::size_t dataType) override { - throw std::runtime_error("Not implemented"); - } - - FakeSimulationState() = default; - FakeSimulationState(std::size_t size, void *data) : size(size), data(data) {} - - virtual std::unique_ptr - createFromData(const cudaq::state_data &data) override { - throw std::runtime_error("Not implemented"); - } - - virtual Tensor getTensor(std::size_t tensorIdx = 0) const override { - throw std::runtime_error("Not implemented"); - } - - virtual std::vector getTensors() const override { - throw std::runtime_error("Not implemented"); - } - - virtual std::size_t getNumTensors() const override { return 1; } - - virtual std::size_t getNumQubits() const override { - return std::countr_zero(size); - } - - virtual std::complex overlap(const SimulationState &other) override { - throw std::runtime_error("Not implemented"); - } - - virtual std::complex - getAmplitude(const std::vector &basisState) override { - throw std::runtime_error("Not implemented"); - } - - virtual std::vector> - getAmplitudes(const std::vector> &basisStates) override { - throw std::runtime_error("Not implemented"); - } - - virtual void dump(std::ostream &os) const override { - throw std::runtime_error("Not implemented"); - } - - virtual precision getPrecision() const override { - return cudaq::SimulationState::precision::fp64; - } - - virtual void destroyState() override {} - - virtual std::complex - operator()(std::size_t tensorIdx, - const std::vector &indices) override { - assert(tensorIdx == 0); - assert(indices.size() == 1); - return *(static_cast *>(data) + indices[0]); - } - - virtual std::size_t getNumElements() const override { return size; } - - virtual bool isDeviceData() const override { return false; } - - virtual bool isArrayLike() const override { return true; } - - virtual void toHost(std::complex *clientAllocatedData, - std::size_t numElements) const override { - throw std::runtime_error( - "SimulationState::toHost complex128 not implemented."); - } - - virtual void toHost(std::complex *clientAllocatedData, - std::size_t numElements) const override { - throw std::runtime_error( - "SimulationState::toHost complex64 not implemented."); - } - - virtual ~FakeSimulationState() {} -}; -/// @endcond diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 795f3947dc7..edc9793a5c8 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -25,7 +25,7 @@ #include /// @cond DO_NOT_DOCUMENT -/// @brief Fake simulation state to use in tests. +/// @brief Fake simulation or quantum device state to use in tests. class FakeDeviceState : public cudaq::SimulationState { private: std::string kernelName; @@ -56,7 +56,9 @@ class FakeDeviceState : public cudaq::SimulationState { virtual std::optional>> getKernelInfo() const override { - return std::make_pair(kernelName, args); + if (!hasData()) + return std::make_pair(kernelName, args); + throw std::runtime_error("Not implemented"); } virtual Tensor getTensor(std::size_t tensorIdx = 0) const override { @@ -771,7 +773,7 @@ void test_combinations(mlir::MLIRContext *ctx) { 0., 0., 0., 0.}; std::vector x = {0.5, 0.6}; - cudaq::state y{new FakeSimulationState(data.size(), data.data())}; + cudaq::state y{new FakeDeviceState(data.size(), data.data())}; std::vector z = { cudaq::pauli_word{"XX"}, cudaq::pauli_word{"XY"}, diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index 482440b4b8f..bd1eee026f7 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -18,6 +18,7 @@ // RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s +// RUN: if %braket_avail; then nvq++ %cpp_std --target braket --emulate %s -o %t && %t | FileCheck %s; fi // clang-format on #include @@ -39,33 +40,6 @@ struct test_state_param { } }; -struct test_state_param2 { - void operator()(cudaq::state *state, cudaq::pauli_word w) __qpu__ { - cudaq::qvector q(state); - cudaq::exp_pauli(1.0, q, w); - } -}; - -struct test_state_param3 { - void operator()(cudaq::state *state, - std::vector &words) __qpu__ { - cudaq::qvector q(state); - for (std::size_t i = 0; i < words.size(); ++i) { - cudaq::exp_pauli(1.0, q, words[i]); - } - } -}; - -struct test_state_param4 { - void operator()(cudaq::state *state, std::vector &coefficients, - std::vector &words) __qpu__ { - cudaq::qvector q(state); - for (std::size_t i = 0; i < words.size(); ++i) { - cudaq::exp_pauli(coefficients[i], q, words[i]); - } - } -}; - void printCounts(cudaq::sample_result &result) { std::vector values{}; for (auto &&[bits, counts] : result) { @@ -133,23 +107,6 @@ int main() { // CHECK: 11111 // clang-format on - { - std::cout << "Passing state from another kernel as argument" - " with pauli word arg (kernel mode)" - << std::endl; - auto state = cudaq::get_state(test_init_state{}, 2); - auto counts = - cudaq::sample(test_state_param2{}, &state, cudaq::pauli_word{"XX"}); - printCounts(counts); - } - // clang-format off - // CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) - // CHECK: 00 - // CHECK: 01 - // CHECK: 10 - // CHECK: 11 - // clang-format on - { std::cout << "Passing state from another kernel as argument iteratively " "(kernel mode)" @@ -177,6 +134,4 @@ int main() { // CHECK: 00 // CHECK: 10 // clang-format on - - // TODO: add tests for vectors of pauli words after we can lifts the arrays of pauli words. } diff --git a/targettests/execution/qvector_init_from_state_pauli.cpp b/targettests/execution/qvector_init_from_state_pauli.cpp new file mode 100644 index 00000000000..6d83170c17b --- /dev/null +++ b/targettests/execution/qvector_init_from_state_pauli.cpp @@ -0,0 +1,78 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// Simulators +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s + +// Quantum emulators +// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target anyon --emulate %s -o %t && %t | FileCheck %s +// 2 different IQM machines for 2 different topologies +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s +// clang-format on + +#include +#include +#include +#include + +struct test_init_state { + void operator()(int n) __qpu__ { + cudaq::qvector q(n); + ry(M_PI / 2.0, q[0]); + } +}; + +struct test_state_param { + void operator()(cudaq::state *state, cudaq::pauli_word w) __qpu__ { + cudaq::qvector q(state); + cudaq::exp_pauli(1.0, q, w); + } +}; + +void printCounts(cudaq::sample_result &result) { + std::vector values{}; + for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { + std::cout << bits << std::endl; + } +} + +int main() { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; + std::vector vec1{0., 0., 0., 0., + 0., 0., M_SQRT1_2, M_SQRT1_2}; + auto state = cudaq::state::from_data(vec); + auto state1 = cudaq::state::from_data(vec1); + { + std::cout << "Passing state from another kernel as argument" + " with pauli word arg (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + auto counts = + cudaq::sample(test_state_param{}, &state, cudaq::pauli_word{"XX"}); + printCounts(counts); + } + // clang-format off + // CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) + // CHECK: 00 + // CHECK: 01 + // CHECK: 10 + // CHECK: 11 + // clang-format on + + // TODO: add tests for vectors of pauli words after we can lifts the arrays of pauli words. +} From 7cf306ad05dd16ee99db88f1e6de79077f071275 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 18 Feb 2025 10:06:55 -0800 Subject: [PATCH 33/54] Address more CR comments Signed-off-by: Anna Gringauze --- runtime/cudaq/CMakeLists.txt | 2 +- runtime/cudaq/algorithms/get_state.h | 10 ++--- .../qpu_state.cpp} | 45 +++++++++---------- .../quantum_state.h => platform/qpu_state.h} | 10 ++--- 4 files changed, 33 insertions(+), 34 deletions(-) rename runtime/cudaq/{qis/quantum_state.cpp => platform/qpu_state.cpp} (67%) rename runtime/cudaq/{qis/quantum_state.h => platform/qpu_state.h} (96%) diff --git a/runtime/cudaq/CMakeLists.txt b/runtime/cudaq/CMakeLists.txt index 7561ca904f6..89f76c68354 100644 --- a/runtime/cudaq/CMakeLists.txt +++ b/runtime/cudaq/CMakeLists.txt @@ -17,10 +17,10 @@ add_library(${LIBRARY_NAME} SHARED cudaq.cpp target_control.cpp algorithms/draw.cpp + platform/qpu_state.cpp platform/quantum_platform.cpp qis/execution_manager_c_api.cpp qis/execution_manager.cpp - qis/quantum_state.cpp qis/remote_state.cpp qis/state.cpp utils/cudaq_utils.cpp diff --git a/runtime/cudaq/algorithms/get_state.h b/runtime/cudaq/algorithms/get_state.h index caec195715d..79202f98b48 100644 --- a/runtime/cudaq/algorithms/get_state.h +++ b/runtime/cudaq/algorithms/get_state.h @@ -14,7 +14,7 @@ #include "cudaq/host_config.h" #include "cudaq/platform.h" #include "cudaq/platform/QuantumExecutionQueue.h" -#include "cudaq/qis/quantum_state.h" +#include "cudaq/platform/qpu_state.h" #include "cudaq/qis/remote_state.h" #include "cudaq/qis/state.h" #include @@ -123,8 +123,8 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { #if defined(CUDAQ_QUANTUM_DEVICE) && !defined(CUDAQ_LIBRARY_MODE) // Store kernel name and arguments for quantum states. if (!cudaq::get_quake_by_name(cudaq::getKernelName(kernel), false).empty()) - return state(new QuantumState(std::forward(kernel), - std::forward(args)...)); + return state(new QPUState(std::forward(kernel), + std::forward(args)...)); throw std::runtime_error( "cudaq::state* argument synthesis is not supported for quantum hardware" " for c-like functions, use class kernels instead"); @@ -132,8 +132,8 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { #if defined(CUDAQ_QUANTUM_DEVICE) // Kernel builder is MLIR-based kernel. if constexpr (has_name::value) - return state(new QuantumState(std::forward(kernel), - std::forward(args)...)); + return state(new QPUState(std::forward(kernel), + std::forward(args)...)); throw std::runtime_error( "cudaq::state* argument synthesis is not supported for quantum hardware" diff --git a/runtime/cudaq/qis/quantum_state.cpp b/runtime/cudaq/platform/qpu_state.cpp similarity index 67% rename from runtime/cudaq/qis/quantum_state.cpp rename to runtime/cudaq/platform/qpu_state.cpp index faaae5b510a..0561ca29ddb 100644 --- a/runtime/cudaq/qis/quantum_state.cpp +++ b/runtime/cudaq/platform/qpu_state.cpp @@ -1,17 +1,17 @@ /******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ -#include "quantum_state.h" +#include "qpu_state.h" #include "common/Logger.h" namespace cudaq { -QuantumState::~QuantumState() { +QPUState::~QPUState() { if (!platformExecutionLog.empty()) { // Flush any info log from the remote execution printf("%s\n", platformExecutionLog.c_str()); @@ -25,89 +25,88 @@ QuantumState::~QuantumState() { deleters.clear(); } -std::size_t QuantumState::getNumQubits() const { +std::size_t QPUState::getNumQubits() const { throw std::runtime_error( "getNumQubits is not implemented for quantum hardware"); } cudaq::SimulationState::Tensor -QuantumState::getTensor(std::size_t tensorIdx) const { +QPUState::getTensor(std::size_t tensorIdx) const { throw std::runtime_error("getTensor is not implemented for quantum hardware"); } /// @brief Return all tensors that represent this state -std::vector QuantumState::getTensors() const { +std::vector QPUState::getTensors() const { throw std::runtime_error( "getTensors is not implemented for quantum hardware"); return {getTensor()}; } /// @brief Return the number of tensors that represent this state. -std::size_t QuantumState::getNumTensors() const { +std::size_t QPUState::getNumTensors() const { throw std::runtime_error( "getNumTensors is not implemented for quantum hardware"); } std::complex -QuantumState::operator()(std::size_t tensorIdx, - const std::vector &indices) { +QPUState::operator()(std::size_t tensorIdx, + const std::vector &indices) { throw std::runtime_error( "operator() is not implemented for quantum hardware"); } std::unique_ptr -QuantumState::createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) { +QPUState::createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) { throw std::runtime_error( "createFromSizeAndPtr is not implemented for quantum hardware"); } -void QuantumState::dump(std::ostream &os) const { +void QPUState::dump(std::ostream &os) const { throw std::runtime_error("dump is not implemented for quantum hardware"); } -cudaq::SimulationState::precision QuantumState::getPrecision() const { +cudaq::SimulationState::precision QPUState::getPrecision() const { throw std::runtime_error( "getPrecision is not implemented for quantum hardware"); } -void QuantumState::destroyState() { +void QPUState::destroyState() { // There is no state data so nothing to destroy. } -bool QuantumState::isDeviceData() const { +bool QPUState::isDeviceData() const { throw std::runtime_error( "isDeviceData is not implemented for quantum hardware"); } -void QuantumState::toHost(std::complex *clientAllocatedData, - std::size_t numElements) const { +void QPUState::toHost(std::complex *clientAllocatedData, + std::size_t numElements) const { throw std::runtime_error("toHost is not implemented for quantum hardware"); } -void QuantumState::toHost(std::complex *clientAllocatedData, - std::size_t numElements) const { +void QPUState::toHost(std::complex *clientAllocatedData, + std::size_t numElements) const { throw std::runtime_error("toHost is not implemented for quantum hardware"); } std::optional>> -QuantumState::getKernelInfo() const { +QPUState::getKernelInfo() const { return std::make_pair(kernelName, args); } std::vector> -QuantumState::getAmplitudes(const std::vector> &basisStates) { +QPUState::getAmplitudes(const std::vector> &basisStates) { throw std::runtime_error( "getAmplitudes is not implemented for quantum hardware"); } std::complex -QuantumState::getAmplitude(const std::vector &basisState) { +QPUState::getAmplitude(const std::vector &basisState) { throw std::runtime_error( "getAmplitudes is not implemented for quantum hardware"); } -std::complex -QuantumState::overlap(const cudaq::SimulationState &other) { +std::complex QPUState::overlap(const cudaq::SimulationState &other) { throw std::runtime_error("overlap is not implemented for quantum hardware"); } } // namespace cudaq diff --git a/runtime/cudaq/qis/quantum_state.h b/runtime/cudaq/platform/qpu_state.h similarity index 96% rename from runtime/cudaq/qis/quantum_state.h rename to runtime/cudaq/platform/qpu_state.h index c9b1b30029b..a13ac6f7b40 100644 --- a/runtime/cudaq/qis/quantum_state.h +++ b/runtime/cudaq/platform/qpu_state.h @@ -17,7 +17,7 @@ namespace cudaq { // The state is represented by a quantum kernel. // Quantum state contains all the information we need to replicate a // call to kernel that created the state. -class QuantumState : public cudaq::SimulationState { +class QPUState : public cudaq::SimulationState { protected: std::string kernelName; // Lazily-evaluated state data (just keeping the kernel name and arguments). @@ -65,7 +65,7 @@ class QuantumState : public cudaq::SimulationState { /// @brief Constructor template - QuantumState(QuantumKernel &&kernel, Args &&...args) { + QPUState(QuantumKernel &&kernel, Args &&...args) { if constexpr (has_name::value) { // kernel_builder kernel: need to JIT code to get it registered. static_cast(kernel).jitCode(); @@ -75,10 +75,10 @@ class QuantumState : public cudaq::SimulationState { } (addArgument(args), ...); } - QuantumState() = default; - QuantumState(const QuantumState &other) + QPUState() = default; + QPUState(const QPUState &other) : kernelName(other.kernelName), args(other.args), deleters() {} - virtual ~QuantumState(); + virtual ~QPUState(); /// @brief True if the state has amplitudes or density matrix available. virtual bool hasData() const override { return false; } From 140247143053d9140794f8a9b7eb006b155b8d00 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 19 Feb 2025 17:01:52 -0800 Subject: [PATCH 34/54] Store new functions in subst module and update synthesis Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.td | 2 +- .../Transforms/ArgumentSynthesis.cpp | 132 ++++++---- runtime/common/ArgumentConversion.cpp | 46 ++-- runtime/common/ArgumentConversion.h | 7 +- runtime/test/test_argument_conversion.cpp | 246 ++++++++++++++---- test/Quake/arg_subst-5.txt | 14 +- test/Quake/arg_subst_func.qke | 8 - 7 files changed, 318 insertions(+), 137 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 1351b3bdf17..3d22756d404 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -65,7 +65,7 @@ def ApplySpecialization : Pass<"apply-op-specialization", "mlir::ModuleOp"> { ]; } -def ArgumentSynthesis : Pass<"argument-synthesis", "mlir::func::FuncOp"> { +def ArgumentSynthesis : Pass<"argument-synthesis", "mlir::ModuleOp"> { let summary = "Specialize a function by replacing arguments with constants"; let description = [{ This pass takes a list of functions and argument substitutions. For each diff --git a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp index 76a3ac36ca8..9159fdee9de 100644 --- a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp +++ b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp @@ -14,6 +14,7 @@ #include "mlir/Parser/Parser.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" +#include namespace cudaq::opt { #define GEN_PASS_DEF_ARGUMENTSYNTHESIS @@ -25,69 +26,97 @@ namespace cudaq::opt { using namespace mlir; namespace { + +class Analysis { +public: + Analysis(MLIRContext *ctx, mlir::Pass::ListOption &funcList) + : ctx(ctx), funcList(funcList) { + parseSubstModules(); + } + + void parseSubstModules() { + for (auto &item : funcList) { + auto pos = item.find(':'); + if (pos == std::string::npos) + continue; + + std::string funcName = item.substr(0, pos); + std::string text = item.substr(pos + 1); + + // If there are no substitutions, continue to the next subst + if (text.empty()) { + LLVM_DEBUG(llvm::dbgs() << funcName << " has no substitutions."); + continue; + } + + // If we're here, we have a FuncOp and we have substitutions that can be + // applied. + // + // 1. Create a Module with the substitutions that we'll be making. + LLVM_DEBUG(llvm::dbgs() + << funcName << " : substitution pattern: '" << text << "'\n"); + auto substModule = [&]() -> OwningOpRef { + if (text.front() == '*') { + // Substitutions are a raw string after the '*' character. + return parseSourceString(text.substr(1), ctx); + } + // Substitutions are in a text file (command-line usage). + return parseSourceFile(text, ctx); + }(); + assert(*substModule && "module must have been created"); + auto &name = funcNames.emplace_back(funcName); + substModules.try_emplace(name, std::move(substModule)); + // substModules[funcName]->dump(); + } + } + + MLIRContext *ctx; + mlir::Pass::ListOption &funcList; + std::list funcNames; + DenseMap> substModules; +}; + class ArgumentSynthesisPass : public cudaq::opt::impl::ArgumentSynthesisBase { public: using ArgumentSynthesisBase::ArgumentSynthesisBase; - void runOnOperation() override { - func::FuncOp func = getOperation(); - StringRef funcName = func.getName(); - std::string text; - if (std::find_if(funcList.begin(), funcList.end(), - [&](const std::string &item) { - auto pos = item.find(':'); - if (pos == std::string::npos) - return false; - std::string itemName = item.substr(0, pos); - bool result = itemName == funcName; - if (result) - text = item.substr(pos + 1); - return result; - }) == funcList.end()) { - // If the function isn't on the list, do nothing. - LLVM_DEBUG(llvm::dbgs() << funcName << " not in list.\n"); - return; + void mergeSymbols(ModuleOp mod, Analysis &analysis) { + for (auto &[funcName, substMod] : analysis.substModules) { + // 2. Go through the Module and merge in all its symbols. + for (auto &op : *substMod) { + if (auto symInterface = dyn_cast(op)) { + auto name = symInterface.getName(); + auto obj = mod.lookupSymbol(name); + if (!obj) + mod.getBody()->push_back(op.clone()); + } + } } + } - // If there are no substitutions, we're done. - if (text.empty()) { - LLVM_DEBUG(llvm::dbgs() << funcName << " has no substitutions."); + void processFunction(func::FuncOp func, Analysis &analysis) { + MLIRContext *ctx = func.getContext(); + auto funcName = func.getName(); + LLVM_DEBUG(llvm::dbgs() << "processing : '" << funcName << "'\n"); + + auto it = analysis.substModules.find(funcName); + if (it == analysis.substModules.end()) { + // If the function isn't on the list, do nothing. + LLVM_DEBUG(llvm::dbgs() << funcName << " has no substitutions.\n"); return; } - - // If we're here, we have a FuncOp and we have substitutions that can be - // applied. - // - // 1. Create a Module with the substitutions that we'll be making. - auto *ctx = func.getContext(); - LLVM_DEBUG(llvm::dbgs() << "substitution pattern: '" << text << "'\n"); - auto substMod = [&]() -> OwningOpRef { - if (text.front() == '*') { - // Substitutions are a raw string after the '*' character. - return parseSourceString(text.substr(1), ctx); - } - // Substitutions are in a text file (command-line usage). - return parseSourceFile(text, ctx); - }(); - assert(*substMod && "module must have been created"); + auto substMod = *(it->second); // 2. Go through the Module and process each substitution. SmallVector processedArgs(func.getFunctionType().getNumInputs()); SmallVector> replacements; BitVector replacedArgs(processedArgs.size()); - for (auto &op : *substMod) { + for (auto &op : substMod) { auto subst = dyn_cast(op); - if (!subst) { - if (auto symInterface = dyn_cast(op)) { - auto name = symInterface.getName(); - auto srcMod = func->getParentOfType(); - auto obj = srcMod.lookupSymbol(name); - if (!obj) - srcMod.getBody()->push_back(op.clone()); - } + if (!subst) continue; - } + auto pos = subst.getPosition(); if (pos >= processedArgs.size()) { func.emitError("Argument " + std::to_string(pos) + " is invalid."); @@ -147,6 +176,15 @@ class ArgumentSynthesisPass // substituted. func.eraseArguments(replacedArgs); } + + void runOnOperation() override { + ModuleOp mod = getOperation(); + Analysis analysis(mod.getContext(), funcList); + + mergeSymbols(mod, analysis); + + mod->walk([&](func::FuncOp func) { processFunction(func, analysis); }); + } }; } // namespace diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 6078fb45125..daf84544ee3 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -329,7 +329,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, cudaq::state_helper::getSimulationState(const_cast(v)); auto kernelName = converter.getKernelName(); - auto sourceMod = converter.getSourceModule(); + // auto sourceMod = converter.getSourceModule(); auto substMod = converter.getSubstitutionModule(); // If the state has amplitude data, we materialize the data as a state @@ -491,29 +491,35 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); assert(calleeFunc && "callee func is missing"); - static unsigned counter = 0; - auto initName = calleeName + ".init_" + std::to_string(counter); - auto numQubitsName = - calleeName + ".num_qubits_" + std::to_string(counter++); + // Use the state pointer as a hash to create the new kernel names. + // We can reuse the functions previously created from the same state. + auto hash = std::to_string(reinterpret_cast(v)); + auto initName = calleeName + ".init_" + hash; + auto numQubitsName = calleeName + ".num_qubits_" + hash; + + // Function names in the IR auto initKernelName = cudaq::runtime::cudaqGenPrefixName + initName; auto numQubitsKernelName = cudaq::runtime::cudaqGenPrefixName + numQubitsName; - // Create `callee.init_N` and `callee.num_qubits_N` used for - // `quake.get_state` replacement later in ReplaceStateWithKernel pass - createInitFunc(builder, sourceMod, calleeFunc, initKernelName); - createNumQubitsFunc(builder, sourceMod, calleeFunc, numQubitsKernelName); - - // Create and register names for new `init` and `num_qubits` kernels so - // ArgumentConverters can keep a string reference to a valid memory. - auto ®isteredInitName = - cudaq::opt::ArgumentConverter::registerKernelName(initName); - auto ®isteredNumQubitsName = - cudaq::opt::ArgumentConverter::registerKernelName(numQubitsName); - - // Convert arguments for `callee.init_N` and `callee.num_qubits_N`. - converter.genCallee(registeredInitName, calleeArgs); - converter.genCallee(registeredNumQubitsName, calleeArgs); + if (!cudaq::opt::ArgumentConverter::isRegisteredKernelName(initName) || + !cudaq::opt::ArgumentConverter::isRegisteredKernelName(numQubitsName)) { + // Create `callee.init_N` and `callee.num_qubits_N` used for + // `quake.get_state` replacement later in ReplaceStateWithKernel pass + createInitFunc(builder, substMod, calleeFunc, initKernelName); + createNumQubitsFunc(builder, substMod, calleeFunc, numQubitsKernelName); + + // Create and register names for new `init` and `num_qubits` kernels so + // ArgumentConverters can keep a string reference to a valid memory. + auto ®isteredInitName = + cudaq::opt::ArgumentConverter::registerKernelName(initName); + auto ®isteredNumQubitsName = + cudaq::opt::ArgumentConverter::registerKernelName(numQubitsName); + + // Convert arguments for `callee.init_N` and `callee.num_qubits_N`. + converter.genCallee(registeredInitName, calleeArgs); + converter.genCallee(registeredNumQubitsName, calleeArgs); + } // Create a substitution for the state pointer. auto statePtrTy = diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 6d2b2135c37..677bc53b066 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -53,7 +53,7 @@ class ArgumentConverter { mlir::StringRef getKernelName() { return kernelName; } void genCallee(mlir::StringRef calleeName, std::vector &args) { - auto &converter = calleeConverters.emplace_back(calleeName, sourceModule); + auto &converter = calleeConverters.emplace_back(calleeName, substModule); converter.gen(args); } @@ -61,6 +61,11 @@ class ArgumentConverter { return calleeConverters; } + static bool isRegisteredKernelName(const std::string &kernelName) { + return std::find(kernelNameRegistry.begin(), kernelNameRegistry.end(), + kernelName) != kernelNameRegistry.end(); + } + static const std::string ®isterKernelName(const std::string &kernelName) { return kernelNameRegistry.emplace_back(kernelName); } diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index edc9793a5c8..bd3a7b2107b 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -11,8 +11,6 @@ // RUN: test_argument_conversion | FileCheck %s -// #include "FakeQuantumState.h" -// #include "FakeSimulationState.h" #include "common/ArgumentConversion.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" @@ -172,14 +170,13 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" + typeName + R"#() -> () return })#"; + // Create the Module auto mod = mlir::parseSourceString(code, ctx); llvm::outs() << "Source module:\n" << *mod << '\n'; cudaq::opt::ArgumentConverter ab{"testy", *mod}; // Create the argument conversions ab.gen(args); - // Dump the modified source module - llvm::outs() << "Source module (after):\n" << *mod << '\n'; // Dump all conversions dumpSubstitutionModules(ab); } @@ -223,8 +220,6 @@ void doTest(mlir::MLIRContext *ctx, std::vector &typeNames, cudaq::opt::ArgumentConverter ab{"testy", *mod}; // Create the argument conversions ab.gen_drop_front(args, startingArgIdx); - // Dump the modified source module - llvm::outs() << "Source module (after):\n" << *mod << '\n'; // Dump all conversions dumpSubstitutionModules(ab); } @@ -525,21 +520,31 @@ void test_simulation_state(mlir::MLIRContext *ctx) { void test_quantum_state(mlir::MLIRContext *ctx) { { - auto kernel = "init"; - auto kernelCode = + // @cudaq.kernel + // def init(n: int): + // q = cudaq.qvector(n) + // x(q[0]) + // + // def kernel(s: cudaq.State): + // ... + // + // s = cudaq.get_state(init, 2) + // cudaq.sample(kernel, s) + auto init = "init"; + auto initCode = "func.func private @__nvqpp__mlirgen__init(%arg0: i64) {\n" " %0 = quake.alloca !quake.veq[%arg0 : i64]\n" " %1 = quake.extract_ref %0[0] : (!quake.veq) -> !quake.ref\n" " quake.x %1 : (!quake.ref) -> ()\n" " return\n" "}\n"; - __cudaq_deviceCodeHolderAdd(kernel, kernelCode); + __cudaq_deviceCodeHolderAdd(init, initCode); std::int64_t n = 2; std::vector a = {static_cast(&n)}; - auto x = cudaq::state(new FakeDeviceState(kernel, a)); - std::vector v = {static_cast(&x)}; - doSimpleTest(ctx, "!cc.ptr", v, kernelCode); + auto s = cudaq::state(new FakeDeviceState(init, a)); + std::vector v = {static_cast(&s)}; + doSimpleTest(ctx, "!cc.ptr", v, initCode); } // clang-format off @@ -552,15 +557,145 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: } // CHECK: func.func private @callee(!cc.ptr) -// CHECK: Source module (after): -// CHECK: func.func private @__nvqpp__mlirgen__init(%arg0: i64) { +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: testy +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init.init_[[HASH_0]] : !cc.ptr +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init.init_[[HASH_0]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = arith.subi %arg0, %[[VAL_1]] : i64 +// CHECK: %[[VAL_3:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_2]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: %[[VAL_5:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_3]][0] : (!quake.veq) -> !quake.ref +// CHECK: quake.x %[[VAL_6]] : (!quake.ref) -> () +// CHECK: %[[VAL_7:.*]] = arith.subi %[[VAL_5]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_8:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_7]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: return %[[VAL_8]] : !quake.veq +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init.num_qubits_[[HASH_0]](%arg0: i64) -> i64 { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: return %[[VAL_1]] : i64 +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init.init_[[HASH_0]] +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init.num_qubits_[[HASH_0]] +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK: } + // clang-format on + + { + // @cudaq.kernel + // def init(n: int): + // q = cudaq.qvector(n) + // x(q[0]) + // + // def state_param(s: cudaq.State) + // q = cudaq.qvector(s) + // x(q[0]) + // + // def kernel(s: cudaq.State): + // ... + // + // s0 = cudaq.get_state(init, 2) + // s1 = cudaq.get_state(state_param, s0) + // cudaq.sample(kernel, s1) + auto init = "init1"; + auto initCode = + "func.func private @__nvqpp__mlirgen__init1(%arg0: i64) {\n" + " %0 = quake.alloca !quake.veq[%arg0 : i64]\n" + " %1 = quake.extract_ref %0[0] : (!quake.veq) -> !quake.ref\n" + " quake.x %1 : (!quake.ref) -> ()\n" + " return\n" + "}\n"; + __cudaq_deviceCodeHolderAdd(init, initCode); + + auto stateParam = "state_param"; + auto stateParamCode = + "func.func private @__nvqpp__mlirgen__state_param(%arg0: " + "!cc.ptr) {\n" + " %0 = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> " + "i64\n" + " %1 = quake.alloca !quake.veq[%0 : i64]\n" + " %2 = quake.init_state %1, %arg0 : (!quake.veq, " + "!cc.ptr) -> !quake.veq\n" + " %3 = quake.extract_ref %2[0] : (!quake.veq) -> !quake.ref\n" + " quake.x %3 : (!quake.ref) -> ()\n" + " return\n" + "}\n"; + + __cudaq_deviceCodeHolderAdd(stateParam, stateParamCode); + + std::int64_t n = 2; + std::vector a = {static_cast(&n)}; + auto s0 = cudaq::state(new FakeDeviceState(init, a)); + std::vector v0 = {static_cast(&s0)}; + auto s1 = cudaq::state(new FakeDeviceState(stateParam, v0)); + std::vector v1 = {static_cast(&s1)}; + + auto code = std::string{initCode} + std::string{stateParamCode}; + doSimpleTest(ctx, "!cc.ptr", v1, code); + } + + // clang-format off +// CHECK: Source module: +// CHECK: func.func private @__nvqpp__mlirgen__init1(%arg0: i64) { // CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] // CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref // CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () // CHECK: return // CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__state_param(%arg0: !cc.ptr) { +// CHECK: %[[VAL_0:.*]] = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq[%[[VAL_0]] : i64] +// CHECK: %[[VAL_2:.*]] = quake.init_state %[[VAL_1]], %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq +// CHECK: return +// CHECK: } // CHECK: func.func private @callee(!cc.ptr) -// CHECK: func.func private @__nvqpp__mlirgen__init.init_0(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { + +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: testy +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %0 = quake.get_state @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__state_param.init_[[HASH_0]] : !cc.ptr +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__state_param.init_[[HASH_0]](%arg0: !cc.ptr, %arg1: !quake.veq) -> !quake.veq { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 +// CHECK: %[[VAL_3:.*]] = arith.subi %[[VAL_2]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_4:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_3]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: %[[VAL_5:.*]] = arith.addi %[[VAL_0]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_6:.*]] = arith.addi %[[VAL_0]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_7:.*]] = quake.init_state %[[VAL_4]], %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq +// CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_6]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_9:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_8]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: return %[[VAL_9]] : !quake.veq +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_0]](%arg0: !cc.ptr) -> i64 { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 +// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i64 +// CHECK: return %[[VAL_2]] : i64 +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: state_param.init_[[HASH_0]] +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %0 = quake.get_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1:.*]] @__nvqpp__mlirgen__init1.init_[[HASH_1]] : !cc.ptr +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init1.init_[[HASH_1]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 // CHECK: %[[VAL_2:.*]] = arith.subi %arg0, %[[VAL_1]] : i64 @@ -573,36 +708,51 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: %[[VAL_8:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_7]] : (!quake.veq, i64, i64) -> !quake.veq // CHECK: return %[[VAL_8]] : !quake.veq // CHECK: } -// CHECK: func.func private @__nvqpp__mlirgen__init.num_qubits_0(%arg0: i64) -> i64 { +// CHECK: func.func private @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1]](%arg0: i64) -> i64 { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 // CHECK: %[[VAL_1:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 // CHECK: return %[[VAL_1]] : i64 // CHECK: } - // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: testy +// CHECK: init1.init_[[HASH_1]] // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init.num_qubits_0 @__nvqpp__mlirgen__init.init_0 : !cc.ptr +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: init.init_0 +// CHECK: init1.num_qubits_[[HASH_1]] // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: init.num_qubits_0 +// CHECK: state_param.num_qubits_[[HASH_0]] // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1]] @__nvqpp__mlirgen__init1.init_[[HASH_1]] : !cc.ptr // CHECK: } + // clang-format on { - auto kernel = "init"; - auto kernelCode = - " func.func private @__nvqpp__mlirgen__init(%arg0: i64) {\n" + // @cudaq.kernel + // def init(n: int): + // q0 = cudaq.qvector(n) + // x(q0[0]) + // r = mz(q0[0]) + // if (r): + // q1 = cudaq.qvector(n) + // x(q1[0]) + // y(q0[0]) + // + // def kernel(s: cudaq.State): + // ... + // + // s = cudaq.get_state(init, 2) + // cudaq.sample(kernel, s) + auto init = "init2"; + auto initCode = + " func.func private @__nvqpp__mlirgen__init2(%arg0: i64) {\n" " %2 = quake.alloca !quake.veq[%arg0 : i64]\n" " %3 = quake.extract_ref %2[0] : (!quake.veq) -> !quake.ref\n" " quake.x %3 : (!quake.ref) -> ()\n" @@ -619,18 +769,18 @@ void test_quantum_state(mlir::MLIRContext *ctx) { " return\n" "}\n"; - __cudaq_deviceCodeHolderAdd(kernel, kernelCode); + __cudaq_deviceCodeHolderAdd(init, initCode); std::int64_t n = 2; std::vector a = {static_cast(&n)}; - auto x = cudaq::state(new FakeDeviceState(kernel, a)); - std::vector v = {static_cast(&x)}; - doSimpleTest(ctx, "!cc.ptr", v, kernelCode); + auto s = cudaq::state(new FakeDeviceState(init, a)); + std::vector v = {static_cast(&s)}; + doSimpleTest(ctx, "!cc.ptr", v, initCode); } // clang-format off // CHECK: Source module: -// CHECK: func.func private @__nvqpp__mlirgen__init(%arg0: i64) { +// CHECK: func.func private @__nvqpp__mlirgen__init2(%arg0: i64) { // CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] // CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref // CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () @@ -647,24 +797,13 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: } // CHECK: func.func private @callee(!cc.ptr) -// CHECK: Source module (after): -// CHECK: func.func private @__nvqpp__mlirgen__init(%arg0: i64) { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] -// CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref -// CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () -// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "q0" : (!quake.ref) -> !quake.measure -// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_2]] : (!quake.measure) -> i1 -// CHECK: cc.if(%[[VAL_3]]) { -// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%arg0 : i64] -// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_4]][0] : (!quake.veq) -> !quake.ref -// CHECK: quake.x %[[VAL_5]] : (!quake.ref) -> () -// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_0]][1] : (!quake.veq) -> !quake.ref -// CHECK: quake.y %[[VAL_6]] : (!quake.ref) -> () -// CHECK: } -// CHECK: return +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: testy +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init2.num_qubits_[[HASH_1:.*]] @__nvqpp__mlirgen__init2.init_[[HASH_1]] : !cc.ptr // CHECK: } -// CHECK: func.func private @callee(!cc.ptr) -// CHECK: func.func private @__nvqpp__mlirgen__init.init_1(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { +// CHECK: func.func private @__nvqpp__mlirgen__init2.init_[[HASH_1]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 // CHECK: %[[VAL_2:.*]] = arith.subi %arg0, %[[VAL_1]] : i64 @@ -686,27 +825,20 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: %[[VAL_10:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_9]] : (!quake.veq, i64, i64) -> !quake.veq // CHECK: return %[[VAL_10]] : !quake.veq // CHECK: } -// CHECK: func.func private @__nvqpp__mlirgen__init.num_qubits_1(%arg0: i64) -> i64 { +// CHECK: func.func private @__nvqpp__mlirgen__init2.num_qubits_[[HASH_1]](%arg0: i64) -> i64 { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 // CHECK: %[[VAL_1:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 // CHECK: return %[[VAL_1]] : i64 // CHECK: } - -// CHECK: ======================================== -// CHECK: Substitution module: -// CHECK: testy -// CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init.num_qubits_1 @__nvqpp__mlirgen__init.init_1 : !cc.ptr -// CHECK: } // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: init.init_1 +// CHECK: init2.init_[[HASH_1]] // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: init.num_qubits_1 +// CHECK: init2.num_qubits_[[HASH_1]] // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } diff --git a/test/Quake/arg_subst-5.txt b/test/Quake/arg_subst-5.txt index 5020e7fe096..959ec6ba364 100644 --- a/test/Quake/arg_subst-5.txt +++ b/test/Quake/arg_subst-5.txt @@ -6,6 +6,14 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -cc.arg_subst[0] { - %0 = quake.get_state @num_qubits @init : !cc.ptr -} +module { + cc.arg_subst[0] { + %0 = quake.get_state @num_qubits @init : !cc.ptr + } + func.func @init(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { + return %arg1 : !quake.veq + } + func.func @num_qubits(%arg0: i64) -> i64 { + return %arg0 : i64 + } +} \ No newline at end of file diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index 8df6c5e1433..dc9a28d9073 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -154,14 +154,6 @@ func.func @testy5(%arg0: !cc.ptr) { return } -func.func @init(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { - return %arg1 : !quake.veq -} - -func.func @num_qubits(%arg0: i64) -> i64 { - return %arg0 : i64 -} - // CHECK-LABEL: func.func @testy5() { // CHECK: %[[VAL_2:.*]] = quake.get_state @num_qubits @init : !cc.ptr // CHECK: %[[VAL_3:.*]] = quake.get_number_of_qubits %[[VAL_2]] : (!cc.ptr) -> i64 From 9a528dd2f8762cbe7738e6f78309a3049bc0f05d Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 20 Feb 2025 12:02:10 -0800 Subject: [PATCH 35/54] Make argument synthesis transitive Signed-off-by: Anna Gringauze --- .../Transforms/ArgumentSynthesis.cpp | 128 ++++++++---------- test/Quake/arg_subst-5.txt | 15 +- test/Quake/arg_subst-6.txt | 4 +- test/Quake/arg_subst-7.txt | 19 +++ test/Quake/arg_subst-8.txt | 11 ++ test/Quake/arg_subst_func.qke | 23 +++- 6 files changed, 111 insertions(+), 89 deletions(-) create mode 100644 test/Quake/arg_subst-7.txt create mode 100644 test/Quake/arg_subst-8.txt diff --git a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp index 9159fdee9de..187012a4799 100644 --- a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp +++ b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp @@ -26,82 +26,21 @@ namespace cudaq::opt { using namespace mlir; namespace { - -class Analysis { -public: - Analysis(MLIRContext *ctx, mlir::Pass::ListOption &funcList) - : ctx(ctx), funcList(funcList) { - parseSubstModules(); - } - - void parseSubstModules() { - for (auto &item : funcList) { - auto pos = item.find(':'); - if (pos == std::string::npos) - continue; - - std::string funcName = item.substr(0, pos); - std::string text = item.substr(pos + 1); - - // If there are no substitutions, continue to the next subst - if (text.empty()) { - LLVM_DEBUG(llvm::dbgs() << funcName << " has no substitutions."); - continue; - } - - // If we're here, we have a FuncOp and we have substitutions that can be - // applied. - // - // 1. Create a Module with the substitutions that we'll be making. - LLVM_DEBUG(llvm::dbgs() - << funcName << " : substitution pattern: '" << text << "'\n"); - auto substModule = [&]() -> OwningOpRef { - if (text.front() == '*') { - // Substitutions are a raw string after the '*' character. - return parseSourceString(text.substr(1), ctx); - } - // Substitutions are in a text file (command-line usage). - return parseSourceFile(text, ctx); - }(); - assert(*substModule && "module must have been created"); - auto &name = funcNames.emplace_back(funcName); - substModules.try_emplace(name, std::move(substModule)); - // substModules[funcName]->dump(); - } - } - - MLIRContext *ctx; - mlir::Pass::ListOption &funcList; - std::list funcNames; - DenseMap> substModules; -}; - class ArgumentSynthesisPass : public cudaq::opt::impl::ArgumentSynthesisBase { public: using ArgumentSynthesisBase::ArgumentSynthesisBase; - void mergeSymbols(ModuleOp mod, Analysis &analysis) { - for (auto &[funcName, substMod] : analysis.substModules) { - // 2. Go through the Module and merge in all its symbols. - for (auto &op : *substMod) { - if (auto symInterface = dyn_cast(op)) { - auto name = symInterface.getName(); - auto obj = mod.lookupSymbol(name); - if (!obj) - mod.getBody()->push_back(op.clone()); - } - } - } - } - - void processFunction(func::FuncOp func, Analysis &analysis) { + void + applySubstitutions(func::FuncOp func, + DenseMap> &substModules) { MLIRContext *ctx = func.getContext(); auto funcName = func.getName(); LLVM_DEBUG(llvm::dbgs() << "processing : '" << funcName << "'\n"); - auto it = analysis.substModules.find(funcName); - if (it == analysis.substModules.end()) { + // 1. Find substitution module with argument replacements for the function. + auto it = substModules.find(funcName); + if (it == substModules.end()) { // If the function isn't on the list, do nothing. LLVM_DEBUG(llvm::dbgs() << funcName << " has no substitutions.\n"); return; @@ -116,7 +55,6 @@ class ArgumentSynthesisPass auto subst = dyn_cast(op); if (!subst) continue; - auto pos = subst.getPosition(); if (pos >= processedArgs.size()) { func.emitError("Argument " + std::to_string(pos) + " is invalid."); @@ -179,11 +117,57 @@ class ArgumentSynthesisPass void runOnOperation() override { ModuleOp mod = getOperation(); - Analysis analysis(mod.getContext(), funcList); + MLIRContext *ctx = mod.getContext(); + + // 1. Collect all substitution modules. + std::list funcNames; + DenseMap> substModules; + + for (auto &item : funcList) { + auto pos = item.find(':'); + if (pos == std::string::npos) + continue; + + std::string funcName = item.substr(0, pos); + std::string text = item.substr(pos + 1); - mergeSymbols(mod, analysis); + if (text.empty()) { + LLVM_DEBUG(llvm::dbgs() << funcName << " has no substitutions."); + continue; + } + + // Create a Module with the substitutions that we'll be making. + LLVM_DEBUG(llvm::dbgs() + << funcName << " : substitution pattern: '" << text << "'\n"); + auto substModule = [&]() -> OwningOpRef { + if (text.front() == '*') { + // Substitutions are a raw string after the '*' character. + return parseSourceString(text.substr(1), ctx); + } + // Substitutions are in a text file (command-line usage). + return parseSourceFile(text, ctx); + }(); + assert(*substModule && "module must have been created"); + + auto &name = funcNames.emplace_back(funcName); + substModules.try_emplace(name, std::move(substModule)); + } + + // 2. Merge symbols from substitution modules into the source module. + for (auto &[funcName, substMod] : substModules) { + for (auto &op : *substMod) { + if (auto symInterface = dyn_cast(op)) { + auto name = symInterface.getName(); + auto obj = mod.lookupSymbol(name); + if (!obj) + mod.getBody()->push_back(op.clone()); + } + } + } - mod->walk([&](func::FuncOp func) { processFunction(func, analysis); }); + // 3. Apply all substitutions. + mod->walk( + [&](func::FuncOp func) { applySubstitutions(func, substModules); }); } }; } // namespace @@ -200,4 +184,4 @@ cudaq::opt::createArgumentSynthesisPass(ArrayRef funcNames, pairs.emplace_back(name.str() + ":*" + text.str()); return std::make_unique( ArgumentSynthesisOptions{pairs}); -} +} \ No newline at end of file diff --git a/test/Quake/arg_subst-5.txt b/test/Quake/arg_subst-5.txt index 959ec6ba364..21fb6ef0c2e 100644 --- a/test/Quake/arg_subst-5.txt +++ b/test/Quake/arg_subst-5.txt @@ -6,14 +6,9 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -module { - cc.arg_subst[0] { - %0 = quake.get_state @num_qubits @init : !cc.ptr - } - func.func @init(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { - return %arg1 : !quake.veq - } - func.func @num_qubits(%arg0: i64) -> i64 { - return %arg0 : i64 - } +cc.arg_subst[0] { + %0 = arith.constant 2 : i32 +} +func.func private @callee5(%arg0: i32) -> (i32) { + return %arg0: i32 } \ No newline at end of file diff --git a/test/Quake/arg_subst-6.txt b/test/Quake/arg_subst-6.txt index 7a53d0369de..ed5126e1d9b 100644 --- a/test/Quake/arg_subst-6.txt +++ b/test/Quake/arg_subst-6.txt @@ -7,5 +7,5 @@ // ========================================================================== // cc.arg_subst[0] { - %c2_i64 = arith.constant 2 : i64 -} + %c4_i64 = arith.constant 4 : i32 +} \ No newline at end of file diff --git a/test/Quake/arg_subst-7.txt b/test/Quake/arg_subst-7.txt new file mode 100644 index 00000000000..959ec6ba364 --- /dev/null +++ b/test/Quake/arg_subst-7.txt @@ -0,0 +1,19 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +module { + cc.arg_subst[0] { + %0 = quake.get_state @num_qubits @init : !cc.ptr + } + func.func @init(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { + return %arg1 : !quake.veq + } + func.func @num_qubits(%arg0: i64) -> i64 { + return %arg0 : i64 + } +} \ No newline at end of file diff --git a/test/Quake/arg_subst-8.txt b/test/Quake/arg_subst-8.txt new file mode 100644 index 00000000000..7a53d0369de --- /dev/null +++ b/test/Quake/arg_subst-8.txt @@ -0,0 +1,11 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +cc.arg_subst[0] { + %c2_i64 = arith.constant 2 : i64 +} diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index dc9a28d9073..b9a7f955981 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -6,7 +6,7 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt --argument-synthesis=functions=foo:%S/arg_subst.txt,blink:%S/arg_subst.txt,testy1:%S/arg_subst-1.txt,testy2:%S/arg_subst-2.txt,testy3:%S/arg_subst-3.txt,testy4:%S/arg_subst-4.txt,testy5:%S/arg_subst-5.txt,num_qubits:%S/arg_subst-6.txt,init:%S/arg_subst-6.txt --canonicalize %s | FileCheck %s +// RUN: cudaq-opt --argument-synthesis=functions=foo:%S/arg_subst.txt,blink:%S/arg_subst.txt,testy1:%S/arg_subst-1.txt,testy2:%S/arg_subst-2.txt,testy3:%S/arg_subst-3.txt,testy4:%S/arg_subst-4.txt,testy5:%S/arg_subst-5.txt,callee5:%S/arg_subst-6.txt,testy6:%S/arg_subst-7.txt,num_qubits:%S/arg_subst-8.txt,init:%S/arg_subst-8.txt --canonicalize %s | FileCheck %s func.func private @bar(i32) func.func private @baz(f32) @@ -147,24 +147,37 @@ func.func @testy4(%arg0: !cc.stdvec>) { // CHECK: return // CHECK: } -func.func @testy5(%arg0: !cc.ptr) { + +func.func @testy5(%arg0: i32) -> i32 { + return %arg0: i32 +} + +func.func @testy6(%arg0: !cc.ptr) { %0 = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 %1 = quake.alloca !quake.veq[%0 : i64] %5 = quake.init_state %1, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq return } -// CHECK-LABEL: func.func @testy5() { +// CHECK-LABEL: func.func @testy5() -> i32 { +// CHECK: %[[VAL_0:.*]] = arith.constant 2 : i32 +// CHECK: return %[[VAL_0]] : i32 +// CHECK: } +// CHECK-LABEL: func.func @testy6() { // CHECK: %[[VAL_2:.*]] = quake.get_state @num_qubits @init : !cc.ptr // CHECK: %[[VAL_3:.*]] = quake.get_number_of_qubits %[[VAL_2]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_5:.*]] = quake.init_state %[[VAL_4]], %[[VAL_2]] : (!quake.veq, !cc.ptr) -> !quake.veq // CHECK: return // CHECK: } -// CHECK: func.func @init(%arg0: !quake.veq) -> !quake.veq { +// CHECK-LABEL: func.func private @callee5() -> i32 { +// CHECK: %[[VAL_0:.*]] = arith.constant 4 : i32 +// CHECK: return %[[VAL_0]] : i32 +// CHECK: } +// CHECK-LABEL: func.func @init(%arg0: !quake.veq) -> !quake.veq { // CHECK: return %arg0 : !quake.veq // CHECK: } -// CHECK: func.func @num_qubits() -> i64 { +// CHECK-LABEL: func.func @num_qubits() -> i64 { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: return %[[VAL_0]] : i64 // CHECK: } From af2fd79e5180b9ea6feba52a76318dfce7060aab Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 20 Feb 2025 12:05:51 -0800 Subject: [PATCH 36/54] Update callers of synthesis Signed-off-by: Anna Gringauze --- python/runtime/cudaq/platform/py_alt_launch_kernel.cpp | 3 +-- runtime/common/BaseRemoteRESTQPU.h | 3 +-- runtime/common/BaseRestRemoteClient.h | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 74e7d676a49..083b31e4dde 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -552,8 +552,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, ss << argCon.getSubstitutionModule(); SmallVector substs = {substBuff}; PassManager pm(context); - pm.addNestedPass( - cudaq::opt::createArgumentSynthesisPass(kernels, substs)); + pm.addPass(opt::createArgumentSynthesisPass(kernels, substs)); pm.addNestedPass(createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index b69caad2766..409153c6181 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -492,8 +492,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { kernels.end()}; mlir::SmallVector substitutions{substs.begin(), substs.end()}; - pm.addNestedPass( - cudaq::opt::createArgumentSynthesisPass(funcNames, substitutions)); + pm.addPass(opt::createArgumentSynthesisPass(funcNames, substitutions)); pm.addPass(opt::createDeleteStates()); pm.addNestedPass( opt::createReplaceStateWithKernel()); diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index 5bcb89066af..ab8d3ba79d5 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -191,8 +191,7 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { llvm::raw_string_ostream ss(substBuff); ss << argCon.getSubstitutionModule(); mlir::SmallVector substs = {substBuff}; - pm.addNestedPass( - opt::createArgumentSynthesisPass(kernels, substs)); + pm.addPass(opt::createArgumentSynthesisPass(kernels, substs)); pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); } else if (args) { From 4d6f7ee3eb634fc9aced6d894bb302e74e292ca7 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 20 Feb 2025 12:21:30 -0800 Subject: [PATCH 37/54] Use PointerOf in quake defs Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Dialect/CC/CCTypes.td | 6 ------ include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td | 8 ++++---- lib/Optimizer/Transforms/ArgumentSynthesis.cpp | 2 +- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/include/cudaq/Optimizer/Dialect/CC/CCTypes.td b/include/cudaq/Optimizer/Dialect/CC/CCTypes.td index 03b8d9541d9..18bce4e156a 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCTypes.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCTypes.td @@ -313,10 +313,4 @@ def AnyStateInitLike : TypeConstraint; def AnyStateInitType : Type; -def AnyStatePointerType : Type< - And<[ - cc_PointerType.predicate, - CPred<"$_self.cast().getElementType().isa()"> - ]>, - "state pointer type">; #endif // CUDAQ_DIALECT_CC_TYPES_TD diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index cfb16bd100c..b70539acee8 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -1418,7 +1418,7 @@ def quake_CreateStateOp : QuakeOp<"create_state", [Pure]> { cc_PointerType:$data, AnySignlessInteger:$length ); - let results = (outs AnyStatePointerType:$result); + let results = (outs PointerOf<[cc_StateType]>:$result); let assemblyFormat = [{ $data `,` $length `:` functional-type(operands, results) attr-dict }]; @@ -1436,7 +1436,7 @@ def QuakeOp_DeleteStateOp : QuakeOp<"delete_state", [] > { ``` }]; - let arguments = (ins AnyStatePointerType:$state); + let arguments = (ins PointerOf<[cc_StateType]>:$state); let results = (outs); let assemblyFormat = [{ $state `:` type(operands) attr-dict @@ -1456,7 +1456,7 @@ def quake_GetNumberOfQubitsOp : QuakeOp<"get_number_of_qubits", [Pure] > { ``` }]; - let arguments = (ins AnyStatePointerType:$state); + let arguments = (ins PointerOf<[cc_StateType]>:$state); let results = (outs AnySignlessInteger:$result); let assemblyFormat = [{ $state `:` functional-type(operands, results) attr-dict @@ -1490,7 +1490,7 @@ def QuakeOp_GetStateOp : QuakeOp<"get_state", [Pure] > { FlatSymbolRefAttr:$numQubitsFunc, FlatSymbolRefAttr:$initFunc ); - let results = (outs AnyStatePointerType:$result); + let results = (outs PointerOf<[cc_StateType]>:$result); let assemblyFormat = [{ $numQubitsFunc $initFunc `:` qualified(type(results)) attr-dict }]; diff --git a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp index 187012a4799..b80bdfeea23 100644 --- a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp +++ b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp @@ -184,4 +184,4 @@ cudaq::opt::createArgumentSynthesisPass(ArrayRef funcNames, pairs.emplace_back(name.str() + ":*" + text.str()); return std::make_unique( ArgumentSynthesisOptions{pairs}); -} \ No newline at end of file +} From e7d95d54e92c931fc7e8bce4472dfbb241f6ce58 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 20 Feb 2025 14:04:56 -0800 Subject: [PATCH 38/54] Addressed more CR comments Signed-off-by: Anna Gringauze --- .../cudaq/Optimizer/Dialect/Quake/QuakeOps.td | 4 +- include/cudaq/Optimizer/Transforms/Passes.td | 31 +++++----- .../Transforms/ReplaceStateWithKernel.cpp | 57 +++++++++---------- runtime/common/ArgumentConversion.cpp | 23 ++++---- runtime/test/test_argument_conversion.cpp | 10 ++-- test/Quake/arg_subst-7.txt | 2 +- test/Quake/arg_subst_func.qke | 2 +- test/Quake/replace_state_with_kernel.qke | 6 +- 8 files changed, 65 insertions(+), 70 deletions(-) diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index b70539acee8..5dbc506fa73 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -1463,7 +1463,7 @@ def quake_GetNumberOfQubitsOp : QuakeOp<"get_number_of_qubits", [Pure] > { }]; } -def QuakeOp_GetStateOp : QuakeOp<"get_state", [Pure] > { +def QuakeOp_MaterializeStateOp : QuakeOp<"materialize_state", [Pure] > { let summary = "Get state from kernel with the provided name."; let description = [{ This operation is created by argument synthesis of state pointer arguments @@ -1482,7 +1482,7 @@ def QuakeOp_GetStateOp : QuakeOp<"get_state", [Pure] > { pass. ```mlir - %0 = quake.get_state @num_qubits @init : !cc.ptr + %0 = quake.materialize_state @num_qubits @init : !cc.ptr ``` }]; diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 3d22756d404..1a081409b62 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -856,18 +856,18 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func "Replace `quake.init_state` instructions with call to the kernel generating the state"; let description = [{ This optimization replaces `quake.init_state`, `quake.get_number_of_qubits`, - and `quake.get_state` operations invoked on state pointers during argument - synthesis for quantum devices. + and `quake.materialize_state` operations invoked on state pointers during + argument synthesis for quantum devices. Before this optimization, argument synthesis for state pointers for quantum - devices substituted a state created from the `quake.get_state` operation - for the state argument. + devices substituted a state created from the `quake.materialize_state` + operation for the state argument. - The `quake.get_state` operation accepts symbols for the synthesized kernels - `@num_qubits` and `@init` that argument synthesis generated from the original - kernel call that generated the state, e.g., the `cudaq::get_state` call that - refers to the result of a specific quantum kernel being invoked with a set - of parameters + The `quake.materialize_state` operation accepts symbols for the synthesized + kernels `@num_qubits` and `@init` that argument synthesis generated from + the original kernel call that generated the state, e.g., + the `cudaq::get_state` call that refers to the result of a specific quantum + kernel being invoked with a set of parameters For example, for the user code: ``` @@ -885,22 +885,23 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func The argument synthesis also substituted the state argument in the `caller` with: ``` - quake.get_state @callee_num_qubits @callee_init: !cc.ptr + quake.materialize_state @callee_num_qubits @callee_init: !cc.ptr ``` This optimization performs the replacements for the the following operations - that use a state produced by `quake.get_state @num_qubits @init` operation: + that use a state produced by `quake.materialize_state @num_qubits @init` + operation: - - Replace `quake.get_number_of_qubits` operation by the call to `@num_qubits` - - Replace `quake.init_state` operation by the call to `@init` - - Clean up unused `quake.get_state` operation + - Replace `quake.get_number_of_qubits` operation by call to `@num_qubits` + - Replace `quake.init_state` operation by call to `@init` + - Clean up unused `quake.materialize_state` operation For example: Before ReplaceStateWithKernel (replace-state-with-kernel): ``` func.func @foo() { - %0 = quake.get_state @callee.num_qubits_0 @callee.init_0: !cc.ptr + %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0: !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index d385f061738..a9cd1dd80e1 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -17,7 +17,6 @@ #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" -#include namespace cudaq::opt { #define GEN_PASS_DEF_REPLACESTATEWITHKERNEL @@ -33,11 +32,10 @@ namespace { /// Replace `quake.get_number_of_qubits` by a call to a function /// that computes the number of qubits for a state. /// -/// ``` -/// %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr +/// ```mlir +/// %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr /// %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 /// ─────────────────────────────────────────── -/// ... /// %1 = call @callee.num_qubits_0() : () -> i64 /// ``` // clang-format on @@ -50,16 +48,16 @@ class ReplaceGetNumQubitsPattern PatternRewriter &rewriter) const override { auto stateOp = numQubits.getOperand(); - if (auto getState = stateOp.getDefiningOp()) { - auto numQubitsFunc = getState.getNumQubitsFunc(); - - rewriter.setInsertionPoint(numQubits); - rewriter.replaceOpWithNewOp( - numQubits, numQubits.getType(), numQubitsFunc, mlir::ValueRange{}); - return success(); - } - return numQubits->emitError( - "ReplaceStateWithKernel: failed to replace `quake.get_num_qubits`"); + auto materializeState = stateOp.getDefiningOp(); + if (!materializeState) + return numQubits->emitError( + "ReplaceStateWithKernel: failed to replace `quake.get_num_qubits`"); + + auto numQubitsFunc = materializeState.getNumQubitsFunc(); + rewriter.setInsertionPoint(numQubits); + rewriter.replaceOpWithNewOp( + numQubits, numQubits.getType(), numQubitsFunc, mlir::ValueRange{}); + return success(); } }; @@ -67,11 +65,10 @@ class ReplaceGetNumQubitsPattern /// Replace `quake.init_state` by a call to a (modified) kernel that produced /// the state. /// -/// ``` -/// %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr +/// ```mlir +/// %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr /// %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq /// ─────────────────────────────────────────── -/// ... /// %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq /// ``` // clang-format on @@ -87,19 +84,19 @@ class ReplaceInitStatePattern if (auto ptrTy = dyn_cast(stateOp.getType())) { if (isa(ptrTy.getElementType())) { - if (auto getState = stateOp.getDefiningOp()) { - auto initName = getState.getInitFunc(); - - rewriter.setInsertionPoint(initState); - rewriter.replaceOpWithNewOp( - initState, initState.getType(), initName, - mlir::ValueRange{allocaOp}); - - return success(); - } - - return initState->emitError( - "ReplaceStateWithKernel: failed to replace `quake.init_state`"); + auto materializeState = + stateOp.getDefiningOp(); + if (!materializeState) + return initState->emitError( + "ReplaceStateWithKernel: failed to replace `quake.init_state`"); + + auto initName = materializeState.getInitFunc(); + rewriter.setInsertionPoint(initState); + rewriter.replaceOpWithNewOp(initState, + initState.getType(), initName, + mlir::ValueRange{allocaOp}); + + return success(); } } return failure(); diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index daf84544ee3..84f1c13fcfe 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -329,15 +329,10 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, cudaq::state_helper::getSimulationState(const_cast(v)); auto kernelName = converter.getKernelName(); - // auto sourceMod = converter.getSourceModule(); auto substMod = converter.getSubstitutionModule(); // If the state has amplitude data, we materialize the data as a state // vector and create a new state from it. - // TODO: add an option to use the kernel info if available, i.e. for - // remote simulators - // TODO: add an option of storing the kernel info on simulators if - // preferred i.e. to support synthesis of density matrices. if (simState->hasData()) { // The call below might cause lazy execution of the state kernel. // TODO: For lazy execution scenario on remote simulators, we have the @@ -391,7 +386,8 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // efficient) we aim at replacing states with calls to kernels (`callees`) // that generated them. This is done in 2 stages: // - // 1. Replace state by quake.get_state instruction during argument conversion: + // 1. Replace state by quake.materialize_state instruction during argument + // conversion: // // Create two functions: // - callee.num_qubits_N @@ -400,7 +396,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // Initializes the veq passed as a parameter // // Then replace the state with - // `quake.get_state @callee.num_qubits_0 @callee.init_0`: + // `quake.materialize_state @callee.num_qubits_0 @callee.init_0`: // // clang-format off // ``` @@ -429,7 +425,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // clang-format off // ``` // func.func @caller() { - // %0 = quake.get_state @callee.num_qubits_0 @callee.init_state_0 : !cc.ptr + // %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_state_0 : !cc.ptr // %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 // %2 = quake.alloca !quake.veq[%1 : i64] // %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq @@ -448,9 +444,9 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // ``` // clang-format on // - // 2. Replace the `quake.get_state` and ops that use its state with calls to - // the generated functions, synthesized with the arguments used to create the - // original state: + // 2. Replace the `quake.materialize_state` and ops that use its state with + // calls to the generated functions, synthesized with the arguments used to + // create the original state: // // After ReplaceStateWithKernel pass: // @@ -505,7 +501,8 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, if (!cudaq::opt::ArgumentConverter::isRegisteredKernelName(initName) || !cudaq::opt::ArgumentConverter::isRegisteredKernelName(numQubitsName)) { // Create `callee.init_N` and `callee.num_qubits_N` used for - // `quake.get_state` replacement later in ReplaceStateWithKernel pass + // `quake.materialize_state` replacement later in ReplaceStateWithKernel + // pass createInitFunc(builder, substMod, calleeFunc, initKernelName); createNumQubitsFunc(builder, substMod, calleeFunc, numQubitsKernelName); @@ -524,7 +521,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // Create a substitution for the state pointer. auto statePtrTy = cudaq::cc::PointerType::get(cudaq::cc::StateType::get(ctx)); - return builder.create( + return builder.create( loc, statePtrTy, builder.getStringAttr(numQubitsKernelName), builder.getStringAttr(initKernelName)); } diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index bd3a7b2107b..246802eb355 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -561,7 +561,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init.init_[[HASH_0]] : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init.init_[[HASH_0]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__init.init_[[HASH_0]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 @@ -668,7 +668,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %0 = quake.get_state @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__state_param.init_[[HASH_0]] : !cc.ptr +// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__state_param.init_[[HASH_0]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__state_param.init_[[HASH_0]](%arg0: !cc.ptr, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 @@ -693,7 +693,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: state_param.init_[[HASH_0]] // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %0 = quake.get_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1:.*]] @__nvqpp__mlirgen__init1.init_[[HASH_1]] : !cc.ptr +// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1:.*]] @__nvqpp__mlirgen__init1.init_[[HASH_1]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__init1.init_[[HASH_1]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 @@ -729,7 +729,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: state_param.num_qubits_[[HASH_0]] // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1]] @__nvqpp__mlirgen__init1.init_[[HASH_1]] : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1]] @__nvqpp__mlirgen__init1.init_[[HASH_1]] : !cc.ptr // CHECK: } // clang-format on @@ -801,7 +801,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.get_state @__nvqpp__mlirgen__init2.num_qubits_[[HASH_1:.*]] @__nvqpp__mlirgen__init2.init_[[HASH_1]] : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init2.num_qubits_[[HASH_1:.*]] @__nvqpp__mlirgen__init2.init_[[HASH_1]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__init2.init_[[HASH_1]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 diff --git a/test/Quake/arg_subst-7.txt b/test/Quake/arg_subst-7.txt index 959ec6ba364..a3ed90891ab 100644 --- a/test/Quake/arg_subst-7.txt +++ b/test/Quake/arg_subst-7.txt @@ -8,7 +8,7 @@ module { cc.arg_subst[0] { - %0 = quake.get_state @num_qubits @init : !cc.ptr + %0 = quake.materialize_state @num_qubits @init : !cc.ptr } func.func @init(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { return %arg1 : !quake.veq diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index b9a7f955981..92b2e712fa3 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -164,7 +164,7 @@ func.func @testy6(%arg0: !cc.ptr) { // CHECK: return %[[VAL_0]] : i32 // CHECK: } // CHECK-LABEL: func.func @testy6() { -// CHECK: %[[VAL_2:.*]] = quake.get_state @num_qubits @init : !cc.ptr +// CHECK: %[[VAL_2:.*]] = quake.materialize_state @num_qubits @init : !cc.ptr // CHECK: %[[VAL_3:.*]] = quake.get_number_of_qubits %[[VAL_2]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_5:.*]] = quake.init_state %[[VAL_4]], %[[VAL_2]] : (!quake.veq, !cc.ptr) -> !quake.veq diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 58b474a65b0..38b1c81d36d 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -23,7 +23,7 @@ module { } func.func @caller0() { - %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr + %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq @@ -38,7 +38,7 @@ module { // CHECK: } func.func @caller1(%arg0: i64) { - %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr + %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr %2 = quake.alloca !quake.veq[%arg0 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq return @@ -51,7 +51,7 @@ module { // CHECK: } func.func @caller2() -> i64 { - %0 = quake.get_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr + %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 return %1: i64 } From c4d600fd723e4eacacb71392d3f779ce876c330c Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 21 Feb 2025 09:13:18 -0800 Subject: [PATCH 39/54] Recursive with caching Signed-off-by: Anna Gringauze --- runtime/common/ArgumentConversion.cpp | 133 ++++++++++++++------------ runtime/common/ArgumentConversion.h | 61 ++++++++---- runtime/common/BaseRemoteRESTQPU.h | 4 +- 3 files changed, 120 insertions(+), 78 deletions(-) diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 84f1c13fcfe..66af2ce7b6b 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -102,6 +102,7 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, /// Create callee.init_N that initializes the state /// Callee (the kernel captured by state): // clang-format off +/// ```mlir /// func.func @__nvqpp__mlirgen__callee(%arg0: i64) { /// %0 = cc.alloca i64 /// cc.store %arg0, %0 : !cc.ptr @@ -118,11 +119,12 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, /// quake.x %1 : (f64, !quake.ref) -> () /// return %arg0: !quake.veq /// } +/// ``` // clang-format on -static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, +static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, func::FuncOp calleeFunc, StringRef initKernelName) { OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(sourceMod.getBody()); + builder.setInsertionPointToEnd(moduleOp.getBody()); auto ctx = builder.getContext(); auto loc = builder.getUnknownLoc(); @@ -226,6 +228,7 @@ static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, /// initialize the state /// Callee: (the kernel captured by state): // clang-format off +/// ```mlir /// func.func @callee(%arg0: i64) { /// %0 = cc.alloca i64 /// cc.store %arg0, %0 : !cc.ptr @@ -243,12 +246,13 @@ static void createInitFunc(OpBuilder &builder, ModuleOp sourceMod, /// %1 = cc.load %0 : !cc.ptr /// return %1 : i64 /// } +/// ``` // clang-format on -static void createNumQubitsFunc(OpBuilder &builder, ModuleOp sourceMod, +static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, func::FuncOp calleeFunc, StringRef numQubitsKernelName) { OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(sourceMod.getBody()); + builder.setInsertionPointToEnd(moduleOp.getBody()); auto ctx = builder.getContext(); auto loc = builder.getUnknownLoc(); @@ -384,19 +388,19 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // Otherwise (ie quantum hardware, where getting the amplitude data is not // efficient) we aim at replacing states with calls to kernels (`callees`) - // that generated them. This is done in 2 stages: + // that generated them. This is done in three stages: // - // 1. Replace state by quake.materialize_state instruction during argument - // conversion: + // 1) (done here) Generate @callee.num_qubits_0 @callee.init_0` for the callee + // function and its arguments stored in a state. + + // Create two functions: + // - callee.num_qubits_N + // Calculates the number of qubits needed for the veq allocation + // - callee.init_N + // Initializes the veq passed as a parameter // - // Create two functions: - // - callee.num_qubits_N - // Calculates the number of qubits needed for the veq allocation - // - callee.init_N - // Initializes the veq passed as a parameter - // - // Then replace the state with - // `quake.materialize_state @callee.num_qubits_0 @callee.init_0`: + // 2) (done here) Replace the state with + // `quake.get_state @callee.num_qubits_0 @callee.init_0`: // // clang-format off // ``` @@ -425,7 +429,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // clang-format off // ``` // func.func @caller() { - // %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_state_0 : !cc.ptr + // %0 = quake.get_state @callee.num_qubits_0 @callee.init_state_0 : !cc.ptr // %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 // %2 = quake.alloca !quake.veq[%1 : i64] // %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq @@ -444,32 +448,32 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // ``` // clang-format on // - // 2. Replace the `quake.materialize_state` and ops that use its state with - // calls to the generated functions, synthesized with the arguments used to - // create the original state: + // 3) (done in ReplaceStateWithKernel) Replace the `quake.get_state` and ops + // that use its state with calls to the generated functions, synthesized with + // the arguments used to create the original state: // // After ReplaceStateWithKernel pass: // // clang-format off - // ``` - // func.func @caller() { - // %1 = call callee.num_qubits_0() : () -> i64 - // %2 = quake.alloca !quake.veq[%1 : i64] - // %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq - // } - // - // func.func private @callee.num_qubits_0() -> i64 { - // %cst = arith.constant 2 : i64 - // return %cst : i64 - // } - // - // func.func private @callee.init_0(%arg0: !quake.veq): !quake.veq { - // %cst = arith.constant 1.5707963267948966 : f64 - // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref - // quake.ry (%cst) %1 : (f64, !quake.ref) -> () - // return %arg0 - // } - // ``` + // ``` + // func.func @caller() { + // %1 = call callee.num_qubits_0() : () -> i64 + // %2 = quake.alloca !quake.veq[%1 : i64] + // %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq + // } + // + // func.func private @callee.num_qubits_0() -> i64 { + // %cst = arith.constant 2 : i64 + // return %cst : i64 + // } + // + // func.func private @callee.init_0(%arg0: !quake.veq): !quake.veq { + // %cst = arith.constant 1.5707963267948966 : f64 + // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref + // quake.ry (%cst) %1 : (f64, !quake.ref) -> () + // return %arg0 + // } + // ``` // clang-format on if (simState->getKernelInfo().has_value()) { auto [calleeName, calleeArgs] = simState->getKernelInfo().value(); @@ -487,35 +491,31 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); assert(calleeFunc && "callee func is missing"); - // Use the state pointer as a hash to create the new kernel names. - // We can reuse the functions previously created from the same state. + // Use the state pointer as hash to look up the function name + // that was created using the same hash in StateAggregator. auto hash = std::to_string(reinterpret_cast(v)); auto initName = calleeName + ".init_" + hash; auto numQubitsName = calleeName + ".num_qubits_" + hash; - - // Function names in the IR auto initKernelName = cudaq::runtime::cudaqGenPrefixName + initName; auto numQubitsKernelName = cudaq::runtime::cudaqGenPrefixName + numQubitsName; - if (!cudaq::opt::ArgumentConverter::isRegisteredKernelName(initName) || - !cudaq::opt::ArgumentConverter::isRegisteredKernelName(numQubitsName)) { - // Create `callee.init_N` and `callee.num_qubits_N` used for - // `quake.materialize_state` replacement later in ReplaceStateWithKernel - // pass + // Create `callee.init_N` and `callee.num_qubits_N` used to replace + // `quake.materialize_state` in ReplaceStateWithKernel pass + if (!converter.isRegisteredKernel(initName) || + !converter.isRegisteredKernel(numQubitsName)) { createInitFunc(builder, substMod, calleeFunc, initKernelName); createNumQubitsFunc(builder, substMod, calleeFunc, numQubitsKernelName); - // Create and register names for new `init` and `num_qubits` kernels so - // ArgumentConverters can keep a string reference to a valid memory. - auto ®isteredInitName = - cudaq::opt::ArgumentConverter::registerKernelName(initName); - auto ®isteredNumQubitsName = - cudaq::opt::ArgumentConverter::registerKernelName(numQubitsName); + // Convert arguments for `callee.init_N`. + auto &initConverter = + cudaq::opt::createChildConverter(converter, initName); + initConverter.gen(calleeArgs); - // Convert arguments for `callee.init_N` and `callee.num_qubits_N`. - converter.genCallee(registeredInitName, calleeArgs); - converter.genCallee(registeredNumQubitsName, calleeArgs); + // Convert arguments for `callee.num_qubits_N`. + auto &numQubitsConverter = + cudaq::opt::createChildConverter(converter, numQubitsName); + numQubitsConverter.gen(calleeArgs); } // Create a substitution for the state pointer. @@ -699,13 +699,20 @@ Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy, //===----------------------------------------------------------------------===// -std::list cudaq::opt::ArgumentConverter::kernelNameRegistry = - std::list(); +std::list cudaq::opt::ArgumentConverter::emptyRegistry; cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, ModuleOp sourceModule) : sourceModule(sourceModule), builder(sourceModule.getContext()), - kernelName(kernelName) { + kernelName(kernelName), kernelRegistry(emptyRegistry) { + substModule = builder.create(builder.getUnknownLoc()); +} + +cudaq::opt::ArgumentConverter::ArgumentConverter( + std::list &kernelRegistry, StringRef kernelName, + ModuleOp sourceModule) + : sourceModule(sourceModule), builder(sourceModule.getContext()), + kernelName(kernelName), kernelRegistry(kernelRegistry) { substModule = builder.create(builder.getUnknownLoc()); } @@ -835,3 +842,11 @@ void cudaq::opt::ArgumentConverter::gen_drop_front( } gen(partialArgs); } + +cudaq::opt::ArgumentConverter & +cudaq::opt::createChildConverter(cudaq::opt::ArgumentConverter &parent, + std::string &calleeName) { + // Store the name in the kernel name cache before referencing it. + auto &name = parent.registerKernel(calleeName); + return parent.createCalleeConverter(name); +} diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 677bc53b066..2a95178ed1b 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -25,6 +25,12 @@ class ArgumentConverter { /// kernelName in \p sourceModule. ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule); + /// Build an instance to create argument substitutions for a specified \p + /// kernelName in \p sourceModule. Use \p kernelRegistry to store newly + /// generated functions. + ArgumentConverter(std::list &kernelRegistry, + mlir::StringRef kernelName, mlir::ModuleOp sourceModule); + /// Generate a substitution ModuleOp for the vector of arguments presented. /// The arguments are those presented to the kernel, kernelName. void gen(const std::vector &arguments); @@ -48,42 +54,61 @@ class ArgumentConverter { /// created. mlir::ModuleOp getSubstitutionModule() { return substModule; } - mlir::ModuleOp getSourceModule() { return sourceModule; } - + /// Kernel we are converting the arguments for. mlir::StringRef getKernelName() { return kernelName; } - void genCallee(mlir::StringRef calleeName, std::vector &args) { - auto &converter = calleeConverters.emplace_back(calleeName, substModule); - converter.gen(args); - } - + /// Return child converters for functions created from kernel used in state + /// arguments. std::vector &getCalleeConverters() { return calleeConverters; } - static bool isRegisteredKernelName(const std::string &kernelName) { - return std::find(kernelNameRegistry.begin(), kernelNameRegistry.end(), - kernelName) != kernelNameRegistry.end(); + /// Is kernel name already created? + bool isRegisteredKernel(const std::string &kernelName) { + return std::find(kernelRegistry.begin(), kernelRegistry.end(), + kernelName) != kernelRegistry.end(); } - static const std::string ®isterKernelName(const std::string &kernelName) { - return kernelNameRegistry.emplace_back(kernelName); + /// Store kernel name in memory for newly created kernels. + const std::string ®isterKernel(const std::string &kernelName) { + return kernelRegistry.emplace_back(kernelName); } private: - /// Keeps kernel names created during argument conversion in memory. - /// References to those names are used by the argument converters for - /// those kernels. - /// Note: use std::list to make sure we always return valid references - /// when registering new kernel names. - static std::list kernelNameRegistry; + /// Default registry to use when state synthesis is not needed. + static std::list emptyRegistry; + + /// Create a child converter for the new callee created from a + /// state argument. + ArgumentConverter &createCalleeConverter(mlir::StringRef calleeName) { + assert(&kernelRegistry != &emptyRegistry && + "Argument converter is missing a kernel registry"); + return calleeConverters.emplace_back(kernelRegistry, calleeName, + substModule); + } mlir::ModuleOp sourceModule; mlir::ModuleOp substModule; mlir::OpBuilder builder; mlir::StringRef kernelName; mlir::SmallVector substitutions; + + /// Converters for functions created during state argument conversion. std::vector calleeConverters; + + /// Keeps new kernel names created during argument conversion in memory. + /// References to the names are used by the argument converters for + /// their kernels. + /// NOTE: use `std::list` to make sure we always return valid references + /// when registering new kernel names, as the references are taken while + /// the list is growing. + std::list &kernelRegistry; + + friend ArgumentConverter &createChildConverter(ArgumentConverter &parent, + std::string &calleeName); }; +ArgumentConverter &createChildConverter(ArgumentConverter &parent, + std::string &calleeName); + } // namespace cudaq::opt diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 409153c6181..8424aa9999e 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -454,7 +454,8 @@ class BaseRemoteRESTQPU : public cudaq::QPU { mlir::PassManager pm(&context); if (!rawArgs.empty()) { cudaq::info("Run Argument Synth.\n"); - opt::ArgumentConverter argCon(kernelName, moduleOp); + std::list kernelRegistry; + opt::ArgumentConverter argCon(kernelRegistry, kernelName, moduleOp); argCon.gen(rawArgs); // For quantum devices, we've created a tree of ArgumentConverters @@ -488,6 +489,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { }; collect(argCon); + // Collect references for the argument synthesis. mlir::SmallVector funcNames{kernels.begin(), kernels.end()}; mlir::SmallVector substitutions{substs.begin(), From e58f5775ac055bea2e6e329cc58ac2f9124279a6 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 21 Feb 2025 15:40:58 -0800 Subject: [PATCH 40/54] StateAggregatorWithArgumentConverter Signed-off-by: Anna Gringauze --- runtime/common/ArgumentConversion.cpp | 287 ++------------- runtime/common/ArgumentConversion.h | 52 --- runtime/common/BaseRemoteRESTQPU.h | 78 ++-- runtime/common/CMakeLists.txt | 1 + runtime/common/StateAggregator.cpp | 422 ++++++++++++++++++++++ runtime/common/StateAggregator.h | 65 ++++ runtime/test/test_argument_conversion.cpp | 62 +++- 7 files changed, 608 insertions(+), 359 deletions(-) create mode 100644 runtime/common/StateAggregator.cpp create mode 100644 runtime/common/StateAggregator.h diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 66af2ce7b6b..1fe8d9747d5 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -20,6 +20,8 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/Parser/Parser.h" +#include + using namespace mlir; template @@ -99,233 +101,6 @@ static Value genConstant(OpBuilder &, cudaq::cc::StructType, void *, static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp substMod, llvm::DataLayout &); -/// Create callee.init_N that initializes the state -/// Callee (the kernel captured by state): -// clang-format off -/// ```mlir -/// func.func @__nvqpp__mlirgen__callee(%arg0: i64) { -/// %0 = cc.alloca i64 -/// cc.store %arg0, %0 : !cc.ptr -/// %1 = cc.load %0 : !cc.ptr -/// %2 = quake.alloca !quake.veq[%1 : i64] -/// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref -/// quake.x %3 : (!quake.ref) -> () -/// return -/// } -/// callee.init_N: -/// func.func private @callee.init_0(%arg0: !quake.veq, %arg0: i64) -> -/// !!quake.veq { -/// %1 = quake.extract_ref %arg0[1] : (!quake.veq<2>) -> !quake.ref -/// quake.x %1 : (f64, !quake.ref) -> () -/// return %arg0: !quake.veq -/// } -/// ``` -// clang-format on -static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, - func::FuncOp calleeFunc, StringRef initKernelName) { - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(moduleOp.getBody()); - - auto ctx = builder.getContext(); - auto loc = builder.getUnknownLoc(); - - auto initFunc = cast(builder.clone(*calleeFunc)); - - auto argTypes = calleeFunc.getArgumentTypes(); - auto retTy = quake::VeqType::getUnsized(ctx); - auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retTy}); - - initFunc.setName(initKernelName); - initFunc.setType(funcTy); - initFunc.setPrivate(); - - OpBuilder newBuilder(ctx); - - auto *entryBlock = &initFunc.getRegion().front(); - newBuilder.setInsertionPointToStart(entryBlock); - Value zero = newBuilder.create(loc, 0, 64); - Value one = newBuilder.create(loc, 1, 64); - Value begin = zero; - - auto argPos = initFunc.getArguments().size(); - - // Detect errors in kernel passed to get_state. - std::function processInner = [&](Block &block) { - for (auto &op : block) { - for (auto ®ion : op.getRegions()) - for (auto &b : region) - processInner(b); - - // Don't allow returns in inner scopes - if (auto retOp = dyn_cast(&op)) - calleeFunc.emitError("Encountered return in inner scope in a kernel " - "passed to get_state"); - } - }; - - for (auto &op : calleeFunc.getRegion().front()) - for (auto ®ion : op.getRegions()) - for (auto &b : region) - processInner(b); - - // Process outer block to initialize the allocation passed as an argument. - std::function process = [&](Block &block) { - SmallVector cleanUps; - Operation *replacedReturn = nullptr; - - Value arg; - Value subArg; - Value blockBegin = begin; - Value blockAllocSize = zero; - for (auto &op : block) { - if (auto alloc = dyn_cast(&op)) { - newBuilder.setInsertionPointAfter(alloc); - - if (!arg) { - initFunc.insertArgument(argPos, retTy, {}, loc); - arg = initFunc.getArgument(argPos); - } - - auto allocSize = alloc.getSize(); - auto offset = newBuilder.create(loc, allocSize, one); - subArg = - newBuilder.create(loc, retTy, arg, begin, offset); - alloc.replaceAllUsesWith(subArg); - cleanUps.push_back(alloc); - begin = newBuilder.create(loc, begin, allocSize); - blockAllocSize = - newBuilder.create(loc, blockAllocSize, allocSize); - } - - if (auto retOp = dyn_cast(&op)) { - if (retOp != replacedReturn) { - newBuilder.setInsertionPointAfter(retOp); - - auto offset = - newBuilder.create(loc, blockAllocSize, one); - Value ret = newBuilder.create(loc, retTy, arg, - blockBegin, offset); - - assert(arg && "No veq allocations found"); - replacedReturn = newBuilder.create(loc, ret); - cleanUps.push_back(retOp); - } - } - } - - for (auto &op : cleanUps) { - op->dropAllReferences(); - op->dropAllUses(); - op->erase(); - } - }; - - // Process the function body - process(initFunc.getRegion().front()); -} - -/// Create callee.num_qubits_N that calculates the number of qubits to -/// initialize the state -/// Callee: (the kernel captured by state): -// clang-format off -/// ```mlir -/// func.func @callee(%arg0: i64) { -/// %0 = cc.alloca i64 -/// cc.store %arg0, %0 : !cc.ptr -/// %1 = cc.load %0 : !cc.ptr -/// %2 = quake.alloca !quake.veq[%1 : i64] -/// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref -/// quake.x %3 : (!quake.ref) -> () -/// return -/// } -/// -/// callee.num_qubits_0: -/// func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { -/// %0 = cc.alloca i64 -/// cc.store %arg0, %0 : !cc.ptr -/// %1 = cc.load %0 : !cc.ptr -/// return %1 : i64 -/// } -/// ``` -// clang-format on -static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, - func::FuncOp calleeFunc, - StringRef numQubitsKernelName) { - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(moduleOp.getBody()); - - auto ctx = builder.getContext(); - auto loc = builder.getUnknownLoc(); - - auto numQubitsFunc = cast(builder.clone(*calleeFunc)); - - auto argTypes = calleeFunc.getArgumentTypes(); - auto retType = builder.getI64Type(); - auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retType}); - - numQubitsFunc.setName(numQubitsKernelName); - numQubitsFunc.setType(funcTy); - numQubitsFunc.setPrivate(); - - OpBuilder newBuilder(ctx); - - auto *entryBlock = &numQubitsFunc.getRegion().front(); - newBuilder.setInsertionPointToStart(entryBlock); - Value size = newBuilder.create(loc, 0, retType); - - // Process block recursively to calculate and return allocation size - // and remove everything else. - std::function process = [&](Block &block) { - SmallVector used; - Operation *replacedReturn = nullptr; - - for (auto &op : block) { - // Calculate allocation size (existing allocation size plus new one) - if (auto alloc = dyn_cast(&op)) { - auto allocSize = alloc.getSize(); - newBuilder.setInsertionPointAfter(alloc); - size = newBuilder.create(loc, size, allocSize); - } - - // Return allocation size - if (auto retOp = dyn_cast(&op)) { - if (retOp != replacedReturn) { - - newBuilder.setInsertionPointAfter(retOp); - auto newRet = newBuilder.create(loc, size); - replacedReturn = newRet; - used.push_back(newRet); - } - } - } - - // Collect all ops needed for size calculation - SmallVector keep; - while (!used.empty()) { - auto *op = used.pop_back_val(); - keep.push_back(op); - for (auto opnd : op->getOperands()) - if (auto defOp = opnd.getDefiningOp()) - used.push_back(defOp); - } - - // Remove the rest of the ops - SmallVector toErase; - for (auto &op : block) - if (std::find(keep.begin(), keep.end(), &op) == keep.end()) - toErase.push_back(&op); - - for (auto &op : toErase) { - op->dropAllReferences(); - op->dropAllUses(); - op->erase(); - } - }; - - // Process the function body - process(numQubitsFunc.getRegion().front()); -} - static Value genConstant(OpBuilder &builder, const cudaq::state *v, llvm::DataLayout &layout, cudaq::opt::ArgumentConverter &converter) { @@ -335,6 +110,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto kernelName = converter.getKernelName(); auto substMod = converter.getSubstitutionModule(); + // If the state has amplitude data, we materialize the data as a state // vector and create a new state from it. if (simState->hasData()) { @@ -385,7 +161,6 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, return builder.create(loc, statePtrTy, buffer, arrSize); } - // Otherwise (ie quantum hardware, where getting the amplitude data is not // efficient) we aim at replacing states with calls to kernels (`callees`) // that generated them. This is done in three stages: @@ -475,6 +250,7 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // } // ``` // clang-format on + if (simState->getKernelInfo().has_value()) { auto [calleeName, calleeArgs] = simState->getKernelInfo().value(); @@ -500,23 +276,23 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto numQubitsKernelName = cudaq::runtime::cudaqGenPrefixName + numQubitsName; - // Create `callee.init_N` and `callee.num_qubits_N` used to replace - // `quake.materialize_state` in ReplaceStateWithKernel pass - if (!converter.isRegisteredKernel(initName) || - !converter.isRegisteredKernel(numQubitsName)) { - createInitFunc(builder, substMod, calleeFunc, initKernelName); - createNumQubitsFunc(builder, substMod, calleeFunc, numQubitsKernelName); - - // Convert arguments for `callee.init_N`. - auto &initConverter = - cudaq::opt::createChildConverter(converter, initName); - initConverter.gen(calleeArgs); - - // Convert arguments for `callee.num_qubits_N`. - auto &numQubitsConverter = - cudaq::opt::createChildConverter(converter, numQubitsName); - numQubitsConverter.gen(calleeArgs); - } + // // Create `callee.init_N` and `callee.num_qubits_N` used to replace + // // `quake.materialize_state` in ReplaceStateWithKernel pass + // if (!converter.isRegisteredKernel(initName) || + // !converter.isRegisteredKernel(numQubitsName)) { + // createInitFunc(builder, substMod, calleeFunc, initKernelName); + // createNumQubitsFunc(builder, substMod, calleeFunc, numQubitsKernelName); + + // // Convert arguments for `callee.init_N`. + // auto &initConverter = + // cudaq::opt::createChildConverter(converter, initName); + // initConverter.gen(calleeArgs); + + // // Convert arguments for `callee.num_qubits_N`. + // auto &numQubitsConverter = + // cudaq::opt::createChildConverter(converter, numQubitsName); + // numQubitsConverter.gen(calleeArgs); + // } // Create a substitution for the state pointer. auto statePtrTy = @@ -699,20 +475,10 @@ Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy, //===----------------------------------------------------------------------===// -std::list cudaq::opt::ArgumentConverter::emptyRegistry; - cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, ModuleOp sourceModule) : sourceModule(sourceModule), builder(sourceModule.getContext()), - kernelName(kernelName), kernelRegistry(emptyRegistry) { - substModule = builder.create(builder.getUnknownLoc()); -} - -cudaq::opt::ArgumentConverter::ArgumentConverter( - std::list &kernelRegistry, StringRef kernelName, - ModuleOp sourceModule) - : sourceModule(sourceModule), builder(sourceModule.getContext()), - kernelName(kernelName), kernelRegistry(kernelRegistry) { + kernelName(kernelName) { substModule = builder.create(builder.getUnknownLoc()); } @@ -722,6 +488,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { auto fun = sourceModule.lookupSymbol( cudaq::runtime::cudaqGenPrefixName + kernelName.str()); + FunctionType fromFuncTy = fun.getFunctionType(); for (auto iter : llvm::enumerate(llvm::zip(fromFuncTy.getInputs(), arguments))) { @@ -842,11 +609,3 @@ void cudaq::opt::ArgumentConverter::gen_drop_front( } gen(partialArgs); } - -cudaq::opt::ArgumentConverter & -cudaq::opt::createChildConverter(cudaq::opt::ArgumentConverter &parent, - std::string &calleeName) { - // Store the name in the kernel name cache before referencing it. - auto &name = parent.registerKernel(calleeName); - return parent.createCalleeConverter(name); -} diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 2a95178ed1b..b876955385f 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -25,12 +25,6 @@ class ArgumentConverter { /// kernelName in \p sourceModule. ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule); - /// Build an instance to create argument substitutions for a specified \p - /// kernelName in \p sourceModule. Use \p kernelRegistry to store newly - /// generated functions. - ArgumentConverter(std::list &kernelRegistry, - mlir::StringRef kernelName, mlir::ModuleOp sourceModule); - /// Generate a substitution ModuleOp for the vector of arguments presented. /// The arguments are those presented to the kernel, kernelName. void gen(const std::vector &arguments); @@ -57,58 +51,12 @@ class ArgumentConverter { /// Kernel we are converting the arguments for. mlir::StringRef getKernelName() { return kernelName; } - /// Return child converters for functions created from kernel used in state - /// arguments. - std::vector &getCalleeConverters() { - return calleeConverters; - } - - /// Is kernel name already created? - bool isRegisteredKernel(const std::string &kernelName) { - return std::find(kernelRegistry.begin(), kernelRegistry.end(), - kernelName) != kernelRegistry.end(); - } - - /// Store kernel name in memory for newly created kernels. - const std::string ®isterKernel(const std::string &kernelName) { - return kernelRegistry.emplace_back(kernelName); - } - private: - /// Default registry to use when state synthesis is not needed. - static std::list emptyRegistry; - - /// Create a child converter for the new callee created from a - /// state argument. - ArgumentConverter &createCalleeConverter(mlir::StringRef calleeName) { - assert(&kernelRegistry != &emptyRegistry && - "Argument converter is missing a kernel registry"); - return calleeConverters.emplace_back(kernelRegistry, calleeName, - substModule); - } - mlir::ModuleOp sourceModule; mlir::ModuleOp substModule; mlir::OpBuilder builder; mlir::StringRef kernelName; mlir::SmallVector substitutions; - - /// Converters for functions created during state argument conversion. - std::vector calleeConverters; - - /// Keeps new kernel names created during argument conversion in memory. - /// References to the names are used by the argument converters for - /// their kernels. - /// NOTE: use `std::list` to make sure we always return valid references - /// when registering new kernel names, as the references are taken while - /// the list is growing. - std::list &kernelRegistry; - - friend ArgumentConverter &createChildConverter(ArgumentConverter &parent, - std::string &calleeName); }; -ArgumentConverter &createChildConverter(ArgumentConverter &parent, - std::string &calleeName); - } // namespace cudaq::opt diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 8424aa9999e..4b12d396126 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -9,6 +9,7 @@ #pragma once #include "common/ArgumentConversion.h" +#include "common/StateAggregator.h" #include "common/Environment.h" #include "common/ExecutionContext.h" #include "common/Executor.h" @@ -454,40 +455,69 @@ class BaseRemoteRESTQPU : public cudaq::QPU { mlir::PassManager pm(&context); if (!rawArgs.empty()) { cudaq::info("Run Argument Synth.\n"); - std::list kernelRegistry; - opt::ArgumentConverter argCon(kernelRegistry, kernelName, moduleOp); - argCon.gen(rawArgs); - - // For quantum devices, we've created a tree of ArgumentConverters + // For quantum devices, create a list of ArgumentConverters // with nodes corresponding to `init` and `num_qubits` functions // created from a kernel that generated the state argument. // Traverse the tree and collect substitutions for all those // functions. + cudaq::opt::StateAggregator aggregator; + aggregator.collect(moduleOp, kernelName, rawArgs); // Store kernel and substitution strings on the stack. // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; + for (auto &kInfo : aggregator.getKernelInfo()) { + auto con = kInfo.converter; + con.gen(kInfo.args); + { + auto name = con.getKernelName(); + std::string kernName = + cudaq::runtime::cudaqGenPrefixName + name.str(); + kernels.emplace_back(kernName); + } + { + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << con.getSubstitutionModule(); + substs.emplace_back(substBuff); + } + } - std::function collect = - [&kernels, &substs, &collect](opt::ArgumentConverter &con) { - { - auto name = con.getKernelName(); - std::string kernName = - cudaq::runtime::cudaqGenPrefixName + name.str(); - kernels.emplace_back(kernName); - } - { - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << con.getSubstitutionModule(); - substs.emplace_back(substBuff); - } - - for (auto &calleeCon : con.getCalleeConverters()) - collect(calleeCon); - }; - collect(argCon); + // std::list kernelRegistry; + // opt::ArgumentConverter argCon(kernelRegistry, kernelName, moduleOp); + // argCon.gen(rawArgs); + + // // For quantum devices, we've created a tree of ArgumentConverters + // // with nodes corresponding to `init` and `num_qubits` functions + // // created from a kernel that generated the state argument. + // // Traverse the tree and collect substitutions for all those + // // functions. + + // // Store kernel and substitution strings on the stack. + // // We pass string references to the `createArgumentSynthesisPass`. + // mlir::SmallVector kernels; + // mlir::SmallVector substs; + + // std::function collect = + // [&kernels, &substs, &collect](opt::ArgumentConverter &con) { + // { + // auto name = con.getKernelName(); + // std::string kernName = + // cudaq::runtime::cudaqGenPrefixName + name.str(); + // kernels.emplace_back(kernName); + // } + // { + // std::string substBuff; + // llvm::raw_string_ostream ss(substBuff); + // ss << con.getSubstitutionModule(); + // substs.emplace_back(substBuff); + // } + + // for (auto &calleeCon : con.getCalleeConverters()) + // collect(calleeCon); + // }; + // collect(argCon); // Collect references for the argument synthesis. mlir::SmallVector funcNames{kernels.begin(), diff --git a/runtime/common/CMakeLists.txt b/runtime/common/CMakeLists.txt index 3d6061f4ef0..8567416bf4a 100644 --- a/runtime/common/CMakeLists.txt +++ b/runtime/common/CMakeLists.txt @@ -90,6 +90,7 @@ add_library(cudaq-mlir-runtime JIT.cpp Logger.cpp RuntimeMLIR.cpp + StateAggregator.cpp ) set_property(GLOBAL APPEND PROPERTY CUDAQ_RUNTIME_LIBS cudaq-mlir-runtime) diff --git a/runtime/common/StateAggregator.cpp b/runtime/common/StateAggregator.cpp new file mode 100644 index 00000000000..80f6d30e0d0 --- /dev/null +++ b/runtime/common/StateAggregator.cpp @@ -0,0 +1,422 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + + #include "StateAggregator.h" + #include "cudaq.h" + #include "cudaq/Optimizer/Builder/Intrinsics.h" + #include "cudaq/Optimizer/Builder/Runtime.h" + #include "cudaq/Optimizer/Dialect/CC/CCOps.h" + #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" + #include "cudaq/Todo.h" + #include "cudaq/qis/pauli_word.h" + #include "cudaq/utils/registry.h" + #include "llvm/ADT/TypeSwitch.h" + #include "mlir/Dialect/Arith/IR/Arith.h" + #include "mlir/Dialect/Complex/IR/Complex.h" + #include "mlir/IR/BuiltinAttributes.h" + #include "mlir/Parser/Parser.h" + + #include + + using namespace mlir; + + /// Create callee.init_N that initializes the state + /// Callee (the kernel captured by state): + // clang-format off + /// func.func @callee(%arg0: i64) { + /// %0 = cc.alloca i64 + /// cc.store %arg0, %0 : !cc.ptr + /// %1 = cc.load %0 : !cc.ptr + /// %2 = quake.alloca !quake.veq[%1 : i64] + /// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref + /// quake.x %3 : (!quake.ref) -> () + /// return + /// } + /// callee.init_N: + /// func.func private @callee.init_0(%arg0: !quake.veq, %arg0: i64) -> + /// !!quake.veq { + /// %1 = quake.extract_ref %arg0[1] : (!quake.veq<2>) -> !quake.ref + /// quake.x %1 : (f64, !quake.ref) -> () + /// return %arg0: !quake.veq + /// } + // clang-format on + static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, + func::FuncOp calleeFunc, StringRef initKernelName) { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(moduleOp.getBody()); + + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); + + auto initFunc = cast(builder.clone(*calleeFunc)); + + auto argTypes = calleeFunc.getArgumentTypes(); + auto retTy = quake::VeqType::getUnsized(ctx); + auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retTy}); + + initFunc.setName(initKernelName); + initFunc.setType(funcTy); + initFunc.setPrivate(); + + OpBuilder newBuilder(ctx); + + auto *entryBlock = &initFunc.getRegion().front(); + newBuilder.setInsertionPointToStart(entryBlock); + Value zero = newBuilder.create(loc, 0, 64); + Value one = newBuilder.create(loc, 1, 64); + Value begin = zero; + + auto argPos = initFunc.getArguments().size(); + + // Detect errors in kernel passed to get_state. + std::function processInner = [&](Block &block) { + for (auto &op : block) { + for (auto ®ion : op.getRegions()) + for (auto &b : region) + processInner(b); + + // Don't allow returns in inner scopes + if (auto retOp = dyn_cast(&op)) + calleeFunc.emitError("Encountered return in inner scope in a kernel " + "passed to get_state"); + } + }; + + for (auto &op : calleeFunc.getRegion().front()) + for (auto ®ion : op.getRegions()) + for (auto &b : region) + processInner(b); + + // Process outer block to initialize the allocation passed as an argument. + std::function process = [&](Block &block) { + SmallVector cleanUps; + Operation *replacedReturn = nullptr; + + Value arg; + Value subArg; + Value blockBegin = begin; + Value blockAllocSize = zero; + for (auto &op : block) { + if (auto alloc = dyn_cast(&op)) { + newBuilder.setInsertionPointAfter(alloc); + + if (!arg) { + initFunc.insertArgument(argPos, retTy, {}, loc); + arg = initFunc.getArgument(argPos); + } + + auto allocSize = alloc.getSize(); + auto offset = newBuilder.create(loc, allocSize, one); + subArg = + newBuilder.create(loc, retTy, arg, begin, offset); + alloc.replaceAllUsesWith(subArg); + cleanUps.push_back(alloc); + begin = newBuilder.create(loc, begin, allocSize); + blockAllocSize = + newBuilder.create(loc, blockAllocSize, allocSize); + } + + if (auto retOp = dyn_cast(&op)) { + if (retOp != replacedReturn) { + newBuilder.setInsertionPointAfter(retOp); + + auto offset = + newBuilder.create(loc, blockAllocSize, one); + Value ret = newBuilder.create(loc, retTy, arg, + blockBegin, offset); + + assert(arg && "No veq allocations found"); + replacedReturn = newBuilder.create(loc, ret); + cleanUps.push_back(retOp); + } + } + } + + for (auto &op : cleanUps) { + op->dropAllReferences(); + op->dropAllUses(); + op->erase(); + } + }; + + // Process the function body + process(initFunc.getRegion().front()); + } + + /// Create callee.num_qubits_N that calculates the number of qubits to + /// initialize the state + /// Callee: (the kernel captured by state): + // clang-format off + /// func.func @callee(%arg0: i64) { + /// %0 = cc.alloca i64 + /// cc.store %arg0, %0 : !cc.ptr + /// %1 = cc.load %0 : !cc.ptr + /// %2 = quake.alloca !quake.veq[%1 : i64] + /// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref + /// quake.x %3 : (!quake.ref) -> () + /// return + /// } + /// + /// callee.num_qubits_0: + /// func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { + /// %0 = cc.alloca i64 + /// cc.store %arg0, %0 : !cc.ptr + /// %1 = cc.load %0 : !cc.ptr + /// return %1 : i64 + /// } + // clang-format on + static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, + func::FuncOp calleeFunc, + StringRef numQubitsKernelName) { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(moduleOp.getBody()); + + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); + + auto numQubitsFunc = cast(builder.clone(*calleeFunc)); + + auto argTypes = calleeFunc.getArgumentTypes(); + auto retType = builder.getI64Type(); + auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retType}); + + numQubitsFunc.setName(numQubitsKernelName); + numQubitsFunc.setType(funcTy); + numQubitsFunc.setPrivate(); + + OpBuilder newBuilder(ctx); + + auto *entryBlock = &numQubitsFunc.getRegion().front(); + newBuilder.setInsertionPointToStart(entryBlock); + Value size = newBuilder.create(loc, 0, retType); + + // Process block recursively to calculate and return allocation size + // and remove everything else. + std::function process = [&](Block &block) { + SmallVector used; + Operation *replacedReturn = nullptr; + + for (auto &op : block) { + // Calculate allocation size (existing allocation size plus new one) + if (auto alloc = dyn_cast(&op)) { + auto allocSize = alloc.getSize(); + newBuilder.setInsertionPointAfter(alloc); + size = newBuilder.create(loc, size, allocSize); + } + + // Return allocation size + if (auto retOp = dyn_cast(&op)) { + if (retOp != replacedReturn) { + + newBuilder.setInsertionPointAfter(retOp); + auto newRet = newBuilder.create(loc, size); + replacedReturn = newRet; + used.push_back(newRet); + } + } + } + + // Collect all ops needed for size calculation + SmallVector keep; + while (!used.empty()) { + auto *op = used.pop_back_val(); + keep.push_back(op); + for (auto opnd : op->getOperands()) + if (auto defOp = opnd.getDefiningOp()) + used.push_back(defOp); + } + + // Remove the rest of the ops + SmallVector toErase; + for (auto &op : block) + if (std::find(keep.begin(), keep.end(), &op) == keep.end()) + toErase.push_back(&op); + + for (auto &op : toErase) { + op->dropAllReferences(); + op->dropAllUses(); + op->erase(); + } + }; + + // Process the function body + process(numQubitsFunc.getRegion().front()); + } + + void cudaq::opt::StateAggregator::collectKernelInfo(ModuleOp moduleOp, const cudaq::state *v) { + auto simState = + cudaq::state_helper::getSimulationState(const_cast(v)); + + // If the state has amplitude data, we materialize the data as a state + // vector and create a new state from it in the ArgumentConverter. + // TODO: add an option to use the kernel info if available, i.e. for + // remote simulators + // TODO: add an option of storing the kernel info on simulators if + // preferred i.e. to support synthesis of density matrices. + if (simState->hasData()) { + return; + } + + // Otherwise (ie quantum hardware, where getting the amplitude data is not + // efficient) we aim at replacing states with calls to kernels (`callees`) + // that generated them. This is done in three stages: + // + // 1) (done here) Generate @callee.num_qubits_0 @callee.init_0` for the callee + // function and its arguments stored in a state. + + // Create two functions: + // - callee.num_qubits_N + // Calculates the number of qubits needed for the veq allocation + // - callee.init_N + // Initializes the veq passed as a parameter + // + // 2) (done in ArgumentConverter) Replace the state with + // `quake.get_state @callee.num_qubits_0 @callee.init_0`: + // + // clang-format off + // ``` + // func.func @caller(%arg0: !cc.ptr) { + // %1 = quake.get_number_of_qubits %arg0: (!cc.ptr) -> i64 + // %2 = quake.alloca !quake.veq[%1 : i64] + // %3 = quake.init_state %2, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq + // return + // } + // + // func.func private @callee(%arg0: i64) { + // %0 = quake.alloca !quake.veq[%arg0 : i64] + // %1 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref + // quake.x %1 : (!quake.ref) -> () + // return + // } + // + // Call from the user host code: + // state = cudaq.get_state(callee, 2) + // counts = cudaq.sample(caller, state) + // ``` + // clang-format on + // + // => after argument synthesis: + // + // clang-format off + // ``` + // func.func @caller() { + // %0 = quake.get_state @callee.num_qubits_0 @callee.init_state_0 : !cc.ptr + // %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 + // %2 = quake.alloca !quake.veq[%1 : i64] + // %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq + // return + // } + // + // func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { + // return %arg0 : i64 + // } + // + // func.func private @callee.init_0(%arg0: i64, %arg1: !quake.veq) { + // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref + // quake.x %1 : (f64, !quake.ref) -> () + // return + // } + // ``` + // clang-format on + // + // 3) (done in ReplaceStateWithKernel) Replace the `quake.get_state` and ops + // that use its state with calls to the generated functions, synthesized with + // the arguments used to create the original state: + // + // After ReplaceStateWithKernel pass: + // + // clang-format off + // ``` + // func.func @caller() { + // %1 = call callee.num_qubits_0() : () -> i64 + // %2 = quake.alloca !quake.veq[%1 : i64] + // %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq + // } + // + // func.func private @callee.num_qubits_0() -> i64 { + // %cst = arith.constant 2 : i64 + // return %cst : i64 + // } + // + // func.func private @callee.init_0(%arg0: !quake.veq): !quake.veq { + // %cst = arith.constant 1.5707963267948966 : f64 + // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref + // quake.ry (%cst) %1 : (f64, !quake.ref) -> () + // return %arg0 + // } + // ``` + // clang-format on + if (simState->getKernelInfo().has_value()) { + auto [calleeName, calleeArgs] = simState->getKernelInfo().value(); + + std::string calleeKernelName = + cudaq::runtime::cudaqGenPrefixName + calleeName; + + auto builder = IRBuilder(moduleOp); + auto ctx = builder.getContext(); + + auto code = cudaq::get_quake_by_name(calleeName, /*throwException=*/false); + assert(!code.empty() && "Quake code not found for callee"); + auto fromModule = parseSourceString(code, ctx); + + auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); + assert(calleeFunc && "callee func is missing"); + + // Use the state pointer as hash to store new function names + // so we can look them up later in ArgumentConverter. + auto hash = std::to_string(reinterpret_cast(v)); + auto initName = calleeName + ".init_" + hash; + auto numQubitsName = calleeName + ".num_qubits_" + hash; + + if (!hasKernelInfo(initName) && !hasKernelInfo(numQubitsName)) { + auto initKernelName = cudaq::runtime::cudaqGenPrefixName + initName; + auto numQubitsKernelName = + cudaq::runtime::cudaqGenPrefixName + numQubitsName; + + // Create `callee.init_N` and `callee.num_qubits_N` functions used to + // replace `quake.get_state` later in ReplaceStateWithKernel pass + createInitFunc(builder, moduleOp, calleeFunc, initKernelName); + createNumQubitsFunc(builder, moduleOp, calleeFunc, numQubitsKernelName); + + // Collect kernel info from the callee arguments recursively + collect(moduleOp, initName, calleeArgs); + collect(moduleOp, numQubitsName, calleeArgs); + } + return; + } + + TODO("cudaq::state* argument synthesis for quantum hardware for c functions"); + } + + //===----------------------------------------------------------------------===// + + + void cudaq::opt::StateAggregator::collect(ModuleOp moduleOp, + const std::string& kernelName, const std::vector &arguments) { + + auto &info = addKernelInfo(moduleOp, kernelName, arguments); + auto substModule = info.converter.getSubstitutionModule(); + auto *ctx = moduleOp.getContext(); + + auto fun = moduleOp.lookupSymbol( + cudaq::runtime::cudaqGenPrefixName + kernelName); + assert(fun && "callee func is missing in state aggregator"); + + FunctionType fromFuncTy = fun.getFunctionType(); + for (auto iter : + llvm::enumerate(llvm::zip(fromFuncTy.getInputs(), arguments))) { + void *argPtr = std::get<1>(iter.value()); + if (!argPtr) + continue; + Type argTy = std::get<0>(iter.value()); + + if (auto ptrTy = dyn_cast(argTy)) + if (ptrTy.getElementType() == cc::StateType::get(ctx)) + collectKernelInfo(substModule, static_cast(argPtr)); + } + } \ No newline at end of file diff --git a/runtime/common/StateAggregator.h b/runtime/common/StateAggregator.h new file mode 100644 index 00000000000..69dd1ca621f --- /dev/null +++ b/runtime/common/StateAggregator.h @@ -0,0 +1,65 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + + #pragma once + + #include "cudaq/Optimizer/Dialect/CC/CCOps.h" + #include "cudaq/Optimizer/Dialect/CC/CCTypes.h" + #include "cudaq/qis/state.h" + #include "mlir/IR/Builders.h" + #include "mlir/IR/Types.h" + #include + #include + #include + #include "ArgumentConversion.h" + + namespace cudaq::opt { + struct KernelInfo { + ArgumentConverter converter; + const std::vector args; + }; + + class StateAggregator { + public: + /// Create an instance of the state aggregator for a specified \p + /// sourceModule. + StateAggregator(){} + + /// Collect kernel names and arguments for all state arguments. + void collect(mlir::ModuleOp moduleOp, const std::string& kernelName, + const std::vector &arguments); + + /// Get the map of kernel names to their kernel info that + /// were collected by `collect()`. + std::list& getKernelInfo() { + return kernelInfo; + } + + private: + void collectKernelInfo(mlir::ModuleOp moduleOp, const cudaq::state *v); + + bool hasKernelInfo(const std::string &kernelName) { + return std::find(nameRegistry.begin(), nameRegistry.end(), kernelName) != nameRegistry.end(); + } + + KernelInfo& addKernelInfo(mlir::ModuleOp moduleOp, const std::string &kernelName, + const std::vector &args) { + auto &name = nameRegistry.emplace_back(kernelName); + return kernelInfo.emplace_back(std::move(ArgumentConverter(name, moduleOp)), args); + } + + private: + /// Memory to store new kernel names generated during argument conversion. + std::list nameRegistry; + + /// Kernel info for kernels we are converting the arguments for, including + /// new kernels generated from state arguments. + std::list kernelInfo; + }; + + } // namespace cudaq::opt \ No newline at end of file diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 246802eb355..5ab571f46fb 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -12,6 +12,7 @@ // RUN: test_argument_conversion | FileCheck %s #include "common/ArgumentConversion.h" +#include "common/StateAggregator.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" #include "cudaq/Optimizer/InitAllDialects.h" @@ -142,20 +143,12 @@ class FakeDeviceState : public cudaq::SimulationState { extern "C" void __cudaq_deviceCodeHolderAdd(const char *, const char *); -void dumpSubstitutionModules(cudaq::opt::ArgumentConverter &ab) { - std::function dump = - [&dump](cudaq::opt::ArgumentConverter &con) { - // Dump the conversions - llvm::outs() << "========================================\n" - "Substitution module:\n" - << con.getKernelName() << "\n" - << con.getSubstitutionModule() << '\n'; - - for (auto &calleeCon : con.getCalleeConverters()) - dump(calleeCon); - }; - - dump(ab); +void dumpSubstitutionModule(cudaq::opt::ArgumentConverter &con) { + // Dump the conversions + llvm::outs() << "========================================\n" + "Substitution module:\n" + << con.getKernelName() << "\n" + << con.getSubstitutionModule() << '\n'; } void doSimpleTest(mlir::MLIRContext *ctx, const std::string &typeName, @@ -178,7 +171,38 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" + // Create the argument conversions ab.gen(args); // Dump all conversions - dumpSubstitutionModules(ab); + dumpSubstitutionModule(ab); +} + + +void doStateAggregationTest(mlir::MLIRContext *ctx, const std::string &typeName, + std::vector args, + const std::string &additionalCode = "") { +std::string code = additionalCode + R"#( +func.func private @callee(%0: )#" + + typeName + R"#() +func.func @__nvqpp__mlirgen__testy(%0: )#" + + typeName + R"#() { +call @callee(%0) : ()#" + + typeName + R"#() -> () +return +})#"; + +// Create the Module +auto mod = mlir::parseSourceString(code, ctx); +llvm::outs() << "Source module:\n" << *mod << '\n'; + + // Create the argument conversions for state arguments + cudaq::opt::StateAggregator sa; + sa.collect(*mod, "testy", args); + + for (auto &kInfo : sa.getKernelInfo()) { + cudaq::opt::ArgumentConverter &cab = kInfo.converter; + // Create the argument conversions for callee kernels from state arguments + cab.gen(kInfo.args); + // Dump all conversions + dumpSubstitutionModule(cab); + } } void doTest(mlir::MLIRContext *ctx, std::vector &typeNames, @@ -221,7 +245,7 @@ void doTest(mlir::MLIRContext *ctx, std::vector &typeNames, // Create the argument conversions ab.gen_drop_front(args, startingArgIdx); // Dump all conversions - dumpSubstitutionModules(ab); + dumpSubstitutionModule(ab); } void test_scalars(mlir::MLIRContext *ctx) { @@ -544,7 +568,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::vector a = {static_cast(&n)}; auto s = cudaq::state(new FakeDeviceState(init, a)); std::vector v = {static_cast(&s)}; - doSimpleTest(ctx, "!cc.ptr", v, initCode); + doStateAggregationTest(ctx, "!cc.ptr", v, initCode); } // clang-format off @@ -645,7 +669,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::vector v1 = {static_cast(&s1)}; auto code = std::string{initCode} + std::string{stateParamCode}; - doSimpleTest(ctx, "!cc.ptr", v1, code); + doStateAggregationTest(ctx, "!cc.ptr", v1, code); } // clang-format off @@ -775,7 +799,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::vector a = {static_cast(&n)}; auto s = cudaq::state(new FakeDeviceState(init, a)); std::vector v = {static_cast(&s)}; - doSimpleTest(ctx, "!cc.ptr", v, initCode); + doStateAggregationTest(ctx, "!cc.ptr", v, initCode); } // clang-format off From 6c0dd7d36c64c24dbd1c414ed32f55d0db09e9b0 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 25 Feb 2025 12:05:07 -0800 Subject: [PATCH 41/54] Make ArgumentConverter handle the state call tree Signed-off-by: Anna Gringauze --- .../cudaq/platform/py_alt_launch_kernel.cpp | 37 +- runtime/common/ArgumentConversion.cpp | 273 ++++++++++- runtime/common/ArgumentConversion.h | 70 ++- runtime/common/BaseRemoteRESTQPU.h | 64 +-- runtime/common/BaseRestRemoteClient.h | 35 +- runtime/common/CMakeLists.txt | 1 - runtime/common/StateAggregator.cpp | 422 ------------------ runtime/common/StateAggregator.h | 65 --- runtime/test/test_argument_conversion.cpp | 56 +-- 9 files changed, 394 insertions(+), 629 deletions(-) delete mode 100644 runtime/common/StateAggregator.cpp delete mode 100644 runtime/common/StateAggregator.h diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 083b31e4dde..3f15beac689 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -543,18 +543,39 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, auto isLocalSimulator = platform.is_simulator() && !platform.is_emulated(); auto isSimulator = isLocalSimulator || isRemoteSimulator; - cudaq::opt::ArgumentConverter argCon(name, unwrap(module)); + auto argCon = cudaq::opt::ArgumentConverter(name, unwrap(module)); argCon.gen(runtimeArgs.getArgs()); - std::string kernName = cudaq::runtime::cudaqGenPrefixName + name; - SmallVector kernels = {kernName}; - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << argCon.getSubstitutionModule(); - SmallVector substs = {substBuff}; + + // Store kernel and substitution strings on the stack. + // We pass string references to the `createArgumentSynthesisPass`. + mlir::SmallVector kernels; + mlir::SmallVector substs; + for (auto &[kName, kInfo] : argCon.getKernelInfo()) { + { + std::string kernName = + cudaq::runtime::cudaqGenPrefixName + kName.str(); + kernels.emplace_back(kernName); + } + { + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << kInfo.getSubstitutionModule(); + substs.emplace_back(substBuff); + } + } + + // Collect references for the argument synthesis. + mlir::SmallVector kernelRefs{kernels.begin(), + kernels.end()}; + mlir::SmallVector substRefs{substs.begin(), + substs.end()}; + PassManager pm(context); - pm.addPass(opt::createArgumentSynthesisPass(kernels, substs)); + pm.addPass(opt::createArgumentSynthesisPass(kernelRefs, substRefs)); pm.addNestedPass(createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); + pm.addNestedPass(opt::createReplaceStateWithKernel()); + pm.addPass(mlir::createSymbolDCEPass()); // Run state preparation for quantum devices (or their emulation) only. // Simulators have direct implementation of state initialization diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 1fe8d9747d5..3b0efa4fe70 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -101,15 +101,238 @@ static Value genConstant(OpBuilder &, cudaq::cc::StructType, void *, static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp substMod, llvm::DataLayout &); + + /// Create callee.init_N that initializes the state + /// Callee (the kernel captured by state): + // clang-format off + /// func.func @callee(%arg0: i64) { + /// %0 = cc.alloca i64 + /// cc.store %arg0, %0 : !cc.ptr + /// %1 = cc.load %0 : !cc.ptr + /// %2 = quake.alloca !quake.veq[%1 : i64] + /// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref + /// quake.x %3 : (!quake.ref) -> () + /// return + /// } + /// callee.init_N: + /// func.func private @callee.init_0(%arg0: !quake.veq, %arg0: i64) -> + /// !!quake.veq { + /// %1 = quake.extract_ref %arg0[1] : (!quake.veq<2>) -> !quake.ref + /// quake.x %1 : (f64, !quake.ref) -> () + /// return %arg0: !quake.veq + /// } + // clang-format on + static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, + func::FuncOp calleeFunc, StringRef initKernelName) { +OpBuilder::InsertionGuard guard(builder); +builder.setInsertionPointToEnd(moduleOp.getBody()); + +auto ctx = builder.getContext(); +auto loc = builder.getUnknownLoc(); + +auto initFunc = cast(builder.clone(*calleeFunc)); + +auto argTypes = calleeFunc.getArgumentTypes(); +auto retTy = quake::VeqType::getUnsized(ctx); +auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retTy}); + +initFunc.setName(initKernelName); +initFunc.setType(funcTy); +initFunc.setPrivate(); + +OpBuilder newBuilder(ctx); + +auto *entryBlock = &initFunc.getRegion().front(); +newBuilder.setInsertionPointToStart(entryBlock); +Value zero = newBuilder.create(loc, 0, 64); +Value one = newBuilder.create(loc, 1, 64); +Value begin = zero; + +auto argPos = initFunc.getArguments().size(); + +// Detect errors in kernel passed to get_state. +std::function processInner = [&](Block &block) { +for (auto &op : block) { +for (auto ®ion : op.getRegions()) +for (auto &b : region) +processInner(b); + +// Don't allow returns in inner scopes +if (auto retOp = dyn_cast(&op)) +calleeFunc.emitError("Encountered return in inner scope in a kernel " + "passed to get_state"); +} +}; + +for (auto &op : calleeFunc.getRegion().front()) +for (auto ®ion : op.getRegions()) +for (auto &b : region) +processInner(b); + +// Process outer block to initialize the allocation passed as an argument. +std::function process = [&](Block &block) { +SmallVector cleanUps; +Operation *replacedReturn = nullptr; + +Value arg; +Value subArg; +Value blockBegin = begin; +Value blockAllocSize = zero; +for (auto &op : block) { +if (auto alloc = dyn_cast(&op)) { +newBuilder.setInsertionPointAfter(alloc); + +if (!arg) { +initFunc.insertArgument(argPos, retTy, {}, loc); +arg = initFunc.getArgument(argPos); +} + +auto allocSize = alloc.getSize(); +auto offset = newBuilder.create(loc, allocSize, one); +subArg = +newBuilder.create(loc, retTy, arg, begin, offset); +alloc.replaceAllUsesWith(subArg); +cleanUps.push_back(alloc); +begin = newBuilder.create(loc, begin, allocSize); +blockAllocSize = +newBuilder.create(loc, blockAllocSize, allocSize); +} + +if (auto retOp = dyn_cast(&op)) { +if (retOp != replacedReturn) { +newBuilder.setInsertionPointAfter(retOp); + +auto offset = +newBuilder.create(loc, blockAllocSize, one); +Value ret = newBuilder.create(loc, retTy, arg, + blockBegin, offset); + +assert(arg && "No veq allocations found"); +replacedReturn = newBuilder.create(loc, ret); +cleanUps.push_back(retOp); +} +} +} + +for (auto &op : cleanUps) { +op->dropAllReferences(); +op->dropAllUses(); +op->erase(); +} +}; + +// Process the function body +process(initFunc.getRegion().front()); +} + +/// Create callee.num_qubits_N that calculates the number of qubits to +/// initialize the state +/// Callee: (the kernel captured by state): +// clang-format off +/// func.func @callee(%arg0: i64) { +/// %0 = cc.alloca i64 +/// cc.store %arg0, %0 : !cc.ptr +/// %1 = cc.load %0 : !cc.ptr +/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref +/// quake.x %3 : (!quake.ref) -> () +/// return +/// } +/// +/// callee.num_qubits_0: +/// func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { +/// %0 = cc.alloca i64 +/// cc.store %arg0, %0 : !cc.ptr +/// %1 = cc.load %0 : !cc.ptr +/// return %1 : i64 +/// } +// clang-format on +static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, + func::FuncOp calleeFunc, + StringRef numQubitsKernelName) { +OpBuilder::InsertionGuard guard(builder); +builder.setInsertionPointToEnd(moduleOp.getBody()); + +auto ctx = builder.getContext(); +auto loc = builder.getUnknownLoc(); + +auto numQubitsFunc = cast(builder.clone(*calleeFunc)); + +auto argTypes = calleeFunc.getArgumentTypes(); +auto retType = builder.getI64Type(); +auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retType}); + +numQubitsFunc.setName(numQubitsKernelName); +numQubitsFunc.setType(funcTy); +numQubitsFunc.setPrivate(); + +OpBuilder newBuilder(ctx); + +auto *entryBlock = &numQubitsFunc.getRegion().front(); +newBuilder.setInsertionPointToStart(entryBlock); +Value size = newBuilder.create(loc, 0, retType); + +// Process block recursively to calculate and return allocation size +// and remove everything else. +std::function process = [&](Block &block) { +SmallVector used; +Operation *replacedReturn = nullptr; + +for (auto &op : block) { +// Calculate allocation size (existing allocation size plus new one) +if (auto alloc = dyn_cast(&op)) { +auto allocSize = alloc.getSize(); +newBuilder.setInsertionPointAfter(alloc); +size = newBuilder.create(loc, size, allocSize); +} + +// Return allocation size +if (auto retOp = dyn_cast(&op)) { +if (retOp != replacedReturn) { + +newBuilder.setInsertionPointAfter(retOp); +auto newRet = newBuilder.create(loc, size); +replacedReturn = newRet; +used.push_back(newRet); +} +} +} + +// Collect all ops needed for size calculation +SmallVector keep; +while (!used.empty()) { +auto *op = used.pop_back_val(); +keep.push_back(op); +for (auto opnd : op->getOperands()) +if (auto defOp = opnd.getDefiningOp()) +used.push_back(defOp); +} + +// Remove the rest of the ops +SmallVector toErase; +for (auto &op : block) +if (std::find(keep.begin(), keep.end(), &op) == keep.end()) +toErase.push_back(&op); + +for (auto &op : toErase) { +op->dropAllReferences(); +op->dropAllUses(); +op->erase(); +} +}; + +// Process the function body +process(numQubitsFunc.getRegion().front()); +} + static Value genConstant(OpBuilder &builder, const cudaq::state *v, - llvm::DataLayout &layout, + llvm::DataLayout &layout, StringRef kernelName, ModuleOp substMod, cudaq::opt::ArgumentConverter &converter) { auto simState = cudaq::state_helper::getSimulationState(const_cast(v)); - auto kernelName = converter.getKernelName(); - auto substMod = converter.getSubstitutionModule(); - + //auto kernelName = converter.getKernelName(); + //auto substMod = converter.getSubstitutionModule(); // If the state has amplitude data, we materialize the data as a state // vector and create a new state from it. @@ -276,23 +499,21 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto numQubitsKernelName = cudaq::runtime::cudaqGenPrefixName + numQubitsName; - // // Create `callee.init_N` and `callee.num_qubits_N` used to replace - // // `quake.materialize_state` in ReplaceStateWithKernel pass - // if (!converter.isRegisteredKernel(initName) || - // !converter.isRegisteredKernel(numQubitsName)) { - // createInitFunc(builder, substMod, calleeFunc, initKernelName); - // createNumQubitsFunc(builder, substMod, calleeFunc, numQubitsKernelName); + // Create `callee.init_N` and `callee.num_qubits_N` used to replace + // `quake.materialize_state` in ReplaceStateWithKernel pass + if (!converter.isRegisteredKernel(initName) || + !converter.isRegisteredKernel(numQubitsName)) { + createInitFunc(builder, substMod, calleeFunc, initKernelName); + createNumQubitsFunc(builder, substMod, calleeFunc, numQubitsKernelName); - // // Convert arguments for `callee.init_N`. - // auto &initConverter = - // cudaq::opt::createChildConverter(converter, initName); - // initConverter.gen(calleeArgs); + // Convert arguments for `callee.init_N`. + auto ®isteredInitName = converter.registerKernel(initName); + converter.gen(registeredInitName, calleeArgs); - // // Convert arguments for `callee.num_qubits_N`. - // auto &numQubitsConverter = - // cudaq::opt::createChildConverter(converter, numQubitsName); - // numQubitsConverter.gen(calleeArgs); - // } + // Convert arguments for `callee.num_qubits_N`. + auto ®isteredNumQubitsName = converter.registerKernel(initName); + converter.gen(registeredNumQubitsName, calleeArgs); + } // Create a substitution for the state pointer. auto statePtrTy = @@ -479,15 +700,23 @@ cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, ModuleOp sourceModule) : sourceModule(sourceModule), builder(sourceModule.getContext()), kernelName(kernelName) { - substModule = builder.create(builder.getUnknownLoc()); } void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { + gen(kernelName, arguments); +} + +void cudaq::opt::ArgumentConverter::gen(StringRef kernelName, const std::vector &arguments) { auto *ctx = builder.getContext(); // We should look up the input type signature here. + auto &kernelInfo = addKernelInfo(kernelName); + auto substModule = kernelInfo.getSubstitutionModule(); auto fun = sourceModule.lookupSymbol( cudaq::runtime::cudaqGenPrefixName + kernelName.str()); + if (!fun) { + throw std::runtime_error("missing fun in argument conversion: " + kernelName.str()); + } FunctionType fromFuncTy = fun.getFunctionType(); for (auto iter : @@ -556,7 +785,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { .Case([&](cc::PointerType ptrTy) -> cc::ArgumentSubstitutionOp { if (ptrTy.getElementType() == cc::StateType::get(ctx)) return buildSubst(static_cast(argPtr), - dataLayout, *this); + dataLayout, kernelName, substModule, *this); return {}; }) .Case([&](cc::StdvecType ty) { @@ -574,7 +803,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { }) .Default({}); if (subst) - substitutions.emplace_back(std::move(subst)); + kernelInfo.getSubstitutions().emplace_back(std::move(subst)); } } diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index b876955385f..2be7ba579dc 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -19,6 +19,34 @@ namespace cudaq::opt { + +class KernelInfo { + public: + KernelInfo(mlir::OpBuilder builder, mlir::StringRef kernelName) + : kernelName(kernelName) { + substModule = builder.create(builder.getUnknownLoc()); + } + + /// Some substitutions may generate global constant information. Use this + /// interface to access both the substitutions and any global constants + /// created. + mlir::ModuleOp getSubstitutionModule() { + return substModule; + } + + /// Get the list of substitutions for this kernel that were generated + /// by `ArgumentConverter::gen()`. + mlir::SmallVector &getSubstitutions() { + return substitutions; + } + + private: + mlir::ModuleOp substModule; + mlir::StringRef kernelName; + mlir::SmallVector substitutions; + }; + + class ArgumentConverter { public: /// Build an instance to create argument substitutions for a specified \p @@ -29,6 +57,10 @@ class ArgumentConverter { /// The arguments are those presented to the kernel, kernelName. void gen(const std::vector &arguments); + /// Generate a substitution ModuleOp for the vector of arguments presented. + /// The arguments are those presented to the kernel, kernelName. + void gen(mlir::StringRef kernelName, const std::vector &arguments); + /// Generate a substitution ModuleOp but include only the arguments that do /// not appear in the set of \p exclusions. void gen(const std::vector &arguments, @@ -38,25 +70,39 @@ class ArgumentConverter { /// and thereby exclude them from the substitutions. void gen_drop_front(const std::vector &arguments, unsigned numDrop); - /// Get the list of substitutions that were generated by `gen()`. - mlir::SmallVector &getSubstitutions() { - return substitutions; + /// Kernel we are converting the arguments for. + mlir::StringRef getKernelName() { return kernelName; } + + /// Get the map of kernel names to their kernel info that + /// were collected by `collect()`. + mlir::DenseMap& getKernelInfo() { + return kernelInfo; } - /// Some substitutions may generate global constant information. Use this - /// interface to access both the substitutions and any global constants - /// created. - mlir::ModuleOp getSubstitutionModule() { return substModule; } + bool isRegisteredKernel(const std::string &kernelName) { + return std::find(nameRegistry.begin(), nameRegistry.end(), kernelName) != nameRegistry.end(); + } - /// Kernel we are converting the arguments for. - mlir::StringRef getKernelName() { return kernelName; } + std::string ®isterKernel(const std::string &kernelName) { + return nameRegistry.emplace_back(kernelName); + } + + KernelInfo& addKernelInfo(mlir::StringRef kernelName) { + auto [it,b] = kernelInfo.try_emplace(kernelName, std::move(KernelInfo(builder, kernelName))); + return it->second; + } + + private: + /// Memory to store new kernel names generated during argument conversion. + std::list nameRegistry; + + /// Kernel info for kernels we are converting the arguments for, including + /// new kernels generated from state arguments. + mlir::DenseMap kernelInfo; -private: mlir::ModuleOp sourceModule; - mlir::ModuleOp substModule; mlir::OpBuilder builder; mlir::StringRef kernelName; - mlir::SmallVector substitutions; }; } // namespace cudaq::opt diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 4b12d396126..1e6654e6132 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -9,7 +9,6 @@ #pragma once #include "common/ArgumentConversion.h" -#include "common/StateAggregator.h" #include "common/Environment.h" #include "common/ExecutionContext.h" #include "common/Executor.h" @@ -458,73 +457,35 @@ class BaseRemoteRESTQPU : public cudaq::QPU { // For quantum devices, create a list of ArgumentConverters // with nodes corresponding to `init` and `num_qubits` functions // created from a kernel that generated the state argument. - // Traverse the tree and collect substitutions for all those + // Traverse the list and collect substitutions for all those // functions. - cudaq::opt::StateAggregator aggregator; - aggregator.collect(moduleOp, kernelName, rawArgs); + auto argCon = cudaq::opt::ArgumentConverter(kernelName, moduleOp); + argCon.gen(rawArgs); // Store kernel and substitution strings on the stack. // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; - for (auto &kInfo : aggregator.getKernelInfo()) { - auto con = kInfo.converter; - con.gen(kInfo.args); + for (auto &[kName, kInfo] : argCon.getKernelInfo()) { { - auto name = con.getKernelName(); std::string kernName = - cudaq::runtime::cudaqGenPrefixName + name.str(); + cudaq::runtime::cudaqGenPrefixName + kName.str(); kernels.emplace_back(kernName); } { std::string substBuff; llvm::raw_string_ostream ss(substBuff); - ss << con.getSubstitutionModule(); + ss << kInfo.getSubstitutionModule(); substs.emplace_back(substBuff); } } - // std::list kernelRegistry; - // opt::ArgumentConverter argCon(kernelRegistry, kernelName, moduleOp); - // argCon.gen(rawArgs); - - // // For quantum devices, we've created a tree of ArgumentConverters - // // with nodes corresponding to `init` and `num_qubits` functions - // // created from a kernel that generated the state argument. - // // Traverse the tree and collect substitutions for all those - // // functions. - - // // Store kernel and substitution strings on the stack. - // // We pass string references to the `createArgumentSynthesisPass`. - // mlir::SmallVector kernels; - // mlir::SmallVector substs; - - // std::function collect = - // [&kernels, &substs, &collect](opt::ArgumentConverter &con) { - // { - // auto name = con.getKernelName(); - // std::string kernName = - // cudaq::runtime::cudaqGenPrefixName + name.str(); - // kernels.emplace_back(kernName); - // } - // { - // std::string substBuff; - // llvm::raw_string_ostream ss(substBuff); - // ss << con.getSubstitutionModule(); - // substs.emplace_back(substBuff); - // } - - // for (auto &calleeCon : con.getCalleeConverters()) - // collect(calleeCon); - // }; - // collect(argCon); - // Collect references for the argument synthesis. - mlir::SmallVector funcNames{kernels.begin(), + mlir::SmallVector kernelRefs{kernels.begin(), kernels.end()}; - mlir::SmallVector substitutions{substs.begin(), + mlir::SmallVector substRefs{substs.begin(), substs.end()}; - pm.addPass(opt::createArgumentSynthesisPass(funcNames, substitutions)); + pm.addPass(opt::createArgumentSynthesisPass(kernelRefs, substRefs)); pm.addPass(opt::createDeleteStates()); pm.addNestedPass( opt::createReplaceStateWithKernel()); @@ -625,6 +586,8 @@ class BaseRemoteRESTQPU : public cudaq::QPU { } else modules.emplace_back(kernelName, moduleOp); + std::cout << "Modules: " << modules.size() << std::endl; + if (emulate) { // If we are in emulation mode, we need to first get a full QIR // representation of the code. Then we'll map to an LLVM Module, create a @@ -750,7 +713,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { std::vector results; // If seed is 0, then it has not been set. - if (seed > 0) + if (seed == 0) cudaq::set_random_seed(seed); bool hasConditionals = @@ -758,6 +721,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (hasConditionals && isObserve) throw std::runtime_error("error: spin_ops not yet supported with " "kernels containing conditionals"); + if (hasConditionals) { executor->setShots(1); // run one shot at a time @@ -783,6 +747,8 @@ class BaseRemoteRESTQPU : public cudaq::QPU { counts.sequential_data(regName); } } + localJIT.clear(); + return cudaq::sample_result(results); } for (std::size_t i = 0; i < codes.size(); i++) { diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index ab8d3ba79d5..f5f63d132b2 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -185,15 +185,36 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { cudaq::info("Run Argument Synth.\n"); opt::ArgumentConverter argCon(name, moduleOp); argCon.gen_drop_front(*rawArgs, startingArgIdx); - std::string kernName = runtime::cudaqGenPrefixName + name; - mlir::SmallVector kernels = {kernName}; - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << argCon.getSubstitutionModule(); - mlir::SmallVector substs = {substBuff}; - pm.addPass(opt::createArgumentSynthesisPass(kernels, substs)); + + // Store kernel and substitution strings on the stack. + // We pass string references to the `createArgumentSynthesisPass`. + mlir::SmallVector kernels; + mlir::SmallVector substs; + for (auto &[kName, kInfo] : argCon.getKernelInfo()) { + { + std::string kernName = + cudaq::runtime::cudaqGenPrefixName + kName.str(); + kernels.emplace_back(kernName); + } + { + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << kInfo.getSubstitutionModule(); + substs.emplace_back(substBuff); + } + } + + // Collect references for the argument synthesis. + mlir::SmallVector kernelRefs{kernels.begin(), + kernels.end()}; + mlir::SmallVector substRefs{substs.begin(), + substs.end()}; + pm.addPass(opt::createArgumentSynthesisPass(kernelRefs, substRefs)); pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); + pm.addNestedPass( + opt::createReplaceStateWithKernel()); + pm.addPass(mlir::createSymbolDCEPass()); } else if (args) { cudaq::info("Run Quake Synth.\n"); pm.addPass(opt::createQuakeSynthesizer(name, args, startingArgIdx)); diff --git a/runtime/common/CMakeLists.txt b/runtime/common/CMakeLists.txt index 8567416bf4a..3d6061f4ef0 100644 --- a/runtime/common/CMakeLists.txt +++ b/runtime/common/CMakeLists.txt @@ -90,7 +90,6 @@ add_library(cudaq-mlir-runtime JIT.cpp Logger.cpp RuntimeMLIR.cpp - StateAggregator.cpp ) set_property(GLOBAL APPEND PROPERTY CUDAQ_RUNTIME_LIBS cudaq-mlir-runtime) diff --git a/runtime/common/StateAggregator.cpp b/runtime/common/StateAggregator.cpp deleted file mode 100644 index 80f6d30e0d0..00000000000 --- a/runtime/common/StateAggregator.cpp +++ /dev/null @@ -1,422 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - - #include "StateAggregator.h" - #include "cudaq.h" - #include "cudaq/Optimizer/Builder/Intrinsics.h" - #include "cudaq/Optimizer/Builder/Runtime.h" - #include "cudaq/Optimizer/Dialect/CC/CCOps.h" - #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" - #include "cudaq/Todo.h" - #include "cudaq/qis/pauli_word.h" - #include "cudaq/utils/registry.h" - #include "llvm/ADT/TypeSwitch.h" - #include "mlir/Dialect/Arith/IR/Arith.h" - #include "mlir/Dialect/Complex/IR/Complex.h" - #include "mlir/IR/BuiltinAttributes.h" - #include "mlir/Parser/Parser.h" - - #include - - using namespace mlir; - - /// Create callee.init_N that initializes the state - /// Callee (the kernel captured by state): - // clang-format off - /// func.func @callee(%arg0: i64) { - /// %0 = cc.alloca i64 - /// cc.store %arg0, %0 : !cc.ptr - /// %1 = cc.load %0 : !cc.ptr - /// %2 = quake.alloca !quake.veq[%1 : i64] - /// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref - /// quake.x %3 : (!quake.ref) -> () - /// return - /// } - /// callee.init_N: - /// func.func private @callee.init_0(%arg0: !quake.veq, %arg0: i64) -> - /// !!quake.veq { - /// %1 = quake.extract_ref %arg0[1] : (!quake.veq<2>) -> !quake.ref - /// quake.x %1 : (f64, !quake.ref) -> () - /// return %arg0: !quake.veq - /// } - // clang-format on - static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, - func::FuncOp calleeFunc, StringRef initKernelName) { - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(moduleOp.getBody()); - - auto ctx = builder.getContext(); - auto loc = builder.getUnknownLoc(); - - auto initFunc = cast(builder.clone(*calleeFunc)); - - auto argTypes = calleeFunc.getArgumentTypes(); - auto retTy = quake::VeqType::getUnsized(ctx); - auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retTy}); - - initFunc.setName(initKernelName); - initFunc.setType(funcTy); - initFunc.setPrivate(); - - OpBuilder newBuilder(ctx); - - auto *entryBlock = &initFunc.getRegion().front(); - newBuilder.setInsertionPointToStart(entryBlock); - Value zero = newBuilder.create(loc, 0, 64); - Value one = newBuilder.create(loc, 1, 64); - Value begin = zero; - - auto argPos = initFunc.getArguments().size(); - - // Detect errors in kernel passed to get_state. - std::function processInner = [&](Block &block) { - for (auto &op : block) { - for (auto ®ion : op.getRegions()) - for (auto &b : region) - processInner(b); - - // Don't allow returns in inner scopes - if (auto retOp = dyn_cast(&op)) - calleeFunc.emitError("Encountered return in inner scope in a kernel " - "passed to get_state"); - } - }; - - for (auto &op : calleeFunc.getRegion().front()) - for (auto ®ion : op.getRegions()) - for (auto &b : region) - processInner(b); - - // Process outer block to initialize the allocation passed as an argument. - std::function process = [&](Block &block) { - SmallVector cleanUps; - Operation *replacedReturn = nullptr; - - Value arg; - Value subArg; - Value blockBegin = begin; - Value blockAllocSize = zero; - for (auto &op : block) { - if (auto alloc = dyn_cast(&op)) { - newBuilder.setInsertionPointAfter(alloc); - - if (!arg) { - initFunc.insertArgument(argPos, retTy, {}, loc); - arg = initFunc.getArgument(argPos); - } - - auto allocSize = alloc.getSize(); - auto offset = newBuilder.create(loc, allocSize, one); - subArg = - newBuilder.create(loc, retTy, arg, begin, offset); - alloc.replaceAllUsesWith(subArg); - cleanUps.push_back(alloc); - begin = newBuilder.create(loc, begin, allocSize); - blockAllocSize = - newBuilder.create(loc, blockAllocSize, allocSize); - } - - if (auto retOp = dyn_cast(&op)) { - if (retOp != replacedReturn) { - newBuilder.setInsertionPointAfter(retOp); - - auto offset = - newBuilder.create(loc, blockAllocSize, one); - Value ret = newBuilder.create(loc, retTy, arg, - blockBegin, offset); - - assert(arg && "No veq allocations found"); - replacedReturn = newBuilder.create(loc, ret); - cleanUps.push_back(retOp); - } - } - } - - for (auto &op : cleanUps) { - op->dropAllReferences(); - op->dropAllUses(); - op->erase(); - } - }; - - // Process the function body - process(initFunc.getRegion().front()); - } - - /// Create callee.num_qubits_N that calculates the number of qubits to - /// initialize the state - /// Callee: (the kernel captured by state): - // clang-format off - /// func.func @callee(%arg0: i64) { - /// %0 = cc.alloca i64 - /// cc.store %arg0, %0 : !cc.ptr - /// %1 = cc.load %0 : !cc.ptr - /// %2 = quake.alloca !quake.veq[%1 : i64] - /// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref - /// quake.x %3 : (!quake.ref) -> () - /// return - /// } - /// - /// callee.num_qubits_0: - /// func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { - /// %0 = cc.alloca i64 - /// cc.store %arg0, %0 : !cc.ptr - /// %1 = cc.load %0 : !cc.ptr - /// return %1 : i64 - /// } - // clang-format on - static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, - func::FuncOp calleeFunc, - StringRef numQubitsKernelName) { - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToEnd(moduleOp.getBody()); - - auto ctx = builder.getContext(); - auto loc = builder.getUnknownLoc(); - - auto numQubitsFunc = cast(builder.clone(*calleeFunc)); - - auto argTypes = calleeFunc.getArgumentTypes(); - auto retType = builder.getI64Type(); - auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retType}); - - numQubitsFunc.setName(numQubitsKernelName); - numQubitsFunc.setType(funcTy); - numQubitsFunc.setPrivate(); - - OpBuilder newBuilder(ctx); - - auto *entryBlock = &numQubitsFunc.getRegion().front(); - newBuilder.setInsertionPointToStart(entryBlock); - Value size = newBuilder.create(loc, 0, retType); - - // Process block recursively to calculate and return allocation size - // and remove everything else. - std::function process = [&](Block &block) { - SmallVector used; - Operation *replacedReturn = nullptr; - - for (auto &op : block) { - // Calculate allocation size (existing allocation size plus new one) - if (auto alloc = dyn_cast(&op)) { - auto allocSize = alloc.getSize(); - newBuilder.setInsertionPointAfter(alloc); - size = newBuilder.create(loc, size, allocSize); - } - - // Return allocation size - if (auto retOp = dyn_cast(&op)) { - if (retOp != replacedReturn) { - - newBuilder.setInsertionPointAfter(retOp); - auto newRet = newBuilder.create(loc, size); - replacedReturn = newRet; - used.push_back(newRet); - } - } - } - - // Collect all ops needed for size calculation - SmallVector keep; - while (!used.empty()) { - auto *op = used.pop_back_val(); - keep.push_back(op); - for (auto opnd : op->getOperands()) - if (auto defOp = opnd.getDefiningOp()) - used.push_back(defOp); - } - - // Remove the rest of the ops - SmallVector toErase; - for (auto &op : block) - if (std::find(keep.begin(), keep.end(), &op) == keep.end()) - toErase.push_back(&op); - - for (auto &op : toErase) { - op->dropAllReferences(); - op->dropAllUses(); - op->erase(); - } - }; - - // Process the function body - process(numQubitsFunc.getRegion().front()); - } - - void cudaq::opt::StateAggregator::collectKernelInfo(ModuleOp moduleOp, const cudaq::state *v) { - auto simState = - cudaq::state_helper::getSimulationState(const_cast(v)); - - // If the state has amplitude data, we materialize the data as a state - // vector and create a new state from it in the ArgumentConverter. - // TODO: add an option to use the kernel info if available, i.e. for - // remote simulators - // TODO: add an option of storing the kernel info on simulators if - // preferred i.e. to support synthesis of density matrices. - if (simState->hasData()) { - return; - } - - // Otherwise (ie quantum hardware, where getting the amplitude data is not - // efficient) we aim at replacing states with calls to kernels (`callees`) - // that generated them. This is done in three stages: - // - // 1) (done here) Generate @callee.num_qubits_0 @callee.init_0` for the callee - // function and its arguments stored in a state. - - // Create two functions: - // - callee.num_qubits_N - // Calculates the number of qubits needed for the veq allocation - // - callee.init_N - // Initializes the veq passed as a parameter - // - // 2) (done in ArgumentConverter) Replace the state with - // `quake.get_state @callee.num_qubits_0 @callee.init_0`: - // - // clang-format off - // ``` - // func.func @caller(%arg0: !cc.ptr) { - // %1 = quake.get_number_of_qubits %arg0: (!cc.ptr) -> i64 - // %2 = quake.alloca !quake.veq[%1 : i64] - // %3 = quake.init_state %2, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq - // return - // } - // - // func.func private @callee(%arg0: i64) { - // %0 = quake.alloca !quake.veq[%arg0 : i64] - // %1 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref - // quake.x %1 : (!quake.ref) -> () - // return - // } - // - // Call from the user host code: - // state = cudaq.get_state(callee, 2) - // counts = cudaq.sample(caller, state) - // ``` - // clang-format on - // - // => after argument synthesis: - // - // clang-format off - // ``` - // func.func @caller() { - // %0 = quake.get_state @callee.num_qubits_0 @callee.init_state_0 : !cc.ptr - // %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 - // %2 = quake.alloca !quake.veq[%1 : i64] - // %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq - // return - // } - // - // func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { - // return %arg0 : i64 - // } - // - // func.func private @callee.init_0(%arg0: i64, %arg1: !quake.veq) { - // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref - // quake.x %1 : (f64, !quake.ref) -> () - // return - // } - // ``` - // clang-format on - // - // 3) (done in ReplaceStateWithKernel) Replace the `quake.get_state` and ops - // that use its state with calls to the generated functions, synthesized with - // the arguments used to create the original state: - // - // After ReplaceStateWithKernel pass: - // - // clang-format off - // ``` - // func.func @caller() { - // %1 = call callee.num_qubits_0() : () -> i64 - // %2 = quake.alloca !quake.veq[%1 : i64] - // %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq - // } - // - // func.func private @callee.num_qubits_0() -> i64 { - // %cst = arith.constant 2 : i64 - // return %cst : i64 - // } - // - // func.func private @callee.init_0(%arg0: !quake.veq): !quake.veq { - // %cst = arith.constant 1.5707963267948966 : f64 - // %1 = quake.extract_ref %arg0[0] : (!quake.veq<2>) -> !quake.ref - // quake.ry (%cst) %1 : (f64, !quake.ref) -> () - // return %arg0 - // } - // ``` - // clang-format on - if (simState->getKernelInfo().has_value()) { - auto [calleeName, calleeArgs] = simState->getKernelInfo().value(); - - std::string calleeKernelName = - cudaq::runtime::cudaqGenPrefixName + calleeName; - - auto builder = IRBuilder(moduleOp); - auto ctx = builder.getContext(); - - auto code = cudaq::get_quake_by_name(calleeName, /*throwException=*/false); - assert(!code.empty() && "Quake code not found for callee"); - auto fromModule = parseSourceString(code, ctx); - - auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); - assert(calleeFunc && "callee func is missing"); - - // Use the state pointer as hash to store new function names - // so we can look them up later in ArgumentConverter. - auto hash = std::to_string(reinterpret_cast(v)); - auto initName = calleeName + ".init_" + hash; - auto numQubitsName = calleeName + ".num_qubits_" + hash; - - if (!hasKernelInfo(initName) && !hasKernelInfo(numQubitsName)) { - auto initKernelName = cudaq::runtime::cudaqGenPrefixName + initName; - auto numQubitsKernelName = - cudaq::runtime::cudaqGenPrefixName + numQubitsName; - - // Create `callee.init_N` and `callee.num_qubits_N` functions used to - // replace `quake.get_state` later in ReplaceStateWithKernel pass - createInitFunc(builder, moduleOp, calleeFunc, initKernelName); - createNumQubitsFunc(builder, moduleOp, calleeFunc, numQubitsKernelName); - - // Collect kernel info from the callee arguments recursively - collect(moduleOp, initName, calleeArgs); - collect(moduleOp, numQubitsName, calleeArgs); - } - return; - } - - TODO("cudaq::state* argument synthesis for quantum hardware for c functions"); - } - - //===----------------------------------------------------------------------===// - - - void cudaq::opt::StateAggregator::collect(ModuleOp moduleOp, - const std::string& kernelName, const std::vector &arguments) { - - auto &info = addKernelInfo(moduleOp, kernelName, arguments); - auto substModule = info.converter.getSubstitutionModule(); - auto *ctx = moduleOp.getContext(); - - auto fun = moduleOp.lookupSymbol( - cudaq::runtime::cudaqGenPrefixName + kernelName); - assert(fun && "callee func is missing in state aggregator"); - - FunctionType fromFuncTy = fun.getFunctionType(); - for (auto iter : - llvm::enumerate(llvm::zip(fromFuncTy.getInputs(), arguments))) { - void *argPtr = std::get<1>(iter.value()); - if (!argPtr) - continue; - Type argTy = std::get<0>(iter.value()); - - if (auto ptrTy = dyn_cast(argTy)) - if (ptrTy.getElementType() == cc::StateType::get(ctx)) - collectKernelInfo(substModule, static_cast(argPtr)); - } - } \ No newline at end of file diff --git a/runtime/common/StateAggregator.h b/runtime/common/StateAggregator.h deleted file mode 100644 index 69dd1ca621f..00000000000 --- a/runtime/common/StateAggregator.h +++ /dev/null @@ -1,65 +0,0 @@ -/****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - - #pragma once - - #include "cudaq/Optimizer/Dialect/CC/CCOps.h" - #include "cudaq/Optimizer/Dialect/CC/CCTypes.h" - #include "cudaq/qis/state.h" - #include "mlir/IR/Builders.h" - #include "mlir/IR/Types.h" - #include - #include - #include - #include "ArgumentConversion.h" - - namespace cudaq::opt { - struct KernelInfo { - ArgumentConverter converter; - const std::vector args; - }; - - class StateAggregator { - public: - /// Create an instance of the state aggregator for a specified \p - /// sourceModule. - StateAggregator(){} - - /// Collect kernel names and arguments for all state arguments. - void collect(mlir::ModuleOp moduleOp, const std::string& kernelName, - const std::vector &arguments); - - /// Get the map of kernel names to their kernel info that - /// were collected by `collect()`. - std::list& getKernelInfo() { - return kernelInfo; - } - - private: - void collectKernelInfo(mlir::ModuleOp moduleOp, const cudaq::state *v); - - bool hasKernelInfo(const std::string &kernelName) { - return std::find(nameRegistry.begin(), nameRegistry.end(), kernelName) != nameRegistry.end(); - } - - KernelInfo& addKernelInfo(mlir::ModuleOp moduleOp, const std::string &kernelName, - const std::vector &args) { - auto &name = nameRegistry.emplace_back(kernelName); - return kernelInfo.emplace_back(std::move(ArgumentConverter(name, moduleOp)), args); - } - - private: - /// Memory to store new kernel names generated during argument conversion. - std::list nameRegistry; - - /// Kernel info for kernels we are converting the arguments for, including - /// new kernels generated from state arguments. - std::list kernelInfo; - }; - - } // namespace cudaq::opt \ No newline at end of file diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 5ab571f46fb..afe24478d11 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -12,7 +12,6 @@ // RUN: test_argument_conversion | FileCheck %s #include "common/ArgumentConversion.h" -#include "common/StateAggregator.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" #include "cudaq/Optimizer/InitAllDialects.h" @@ -143,12 +142,14 @@ class FakeDeviceState : public cudaq::SimulationState { extern "C" void __cudaq_deviceCodeHolderAdd(const char *, const char *); -void dumpSubstitutionModule(cudaq::opt::ArgumentConverter &con) { - // Dump the conversions - llvm::outs() << "========================================\n" - "Substitution module:\n" - << con.getKernelName() << "\n" - << con.getSubstitutionModule() << '\n'; +void dumpSubstitutionModules(cudaq::opt::ArgumentConverter &con) { + for (auto &[kName, kInfo] : con.getKernelInfo()) { + // Dump the conversions + llvm::outs() << "========================================\n" + "Substitution module:\n" + << kName << "\n" + << kInfo.getSubstitutionModule() << '\n'; + } } void doSimpleTest(mlir::MLIRContext *ctx, const std::string &typeName, @@ -171,38 +172,7 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" + // Create the argument conversions ab.gen(args); // Dump all conversions - dumpSubstitutionModule(ab); -} - - -void doStateAggregationTest(mlir::MLIRContext *ctx, const std::string &typeName, - std::vector args, - const std::string &additionalCode = "") { -std::string code = additionalCode + R"#( -func.func private @callee(%0: )#" + - typeName + R"#() -func.func @__nvqpp__mlirgen__testy(%0: )#" + - typeName + R"#() { -call @callee(%0) : ()#" + - typeName + R"#() -> () -return -})#"; - -// Create the Module -auto mod = mlir::parseSourceString(code, ctx); -llvm::outs() << "Source module:\n" << *mod << '\n'; - - // Create the argument conversions for state arguments - cudaq::opt::StateAggregator sa; - sa.collect(*mod, "testy", args); - - for (auto &kInfo : sa.getKernelInfo()) { - cudaq::opt::ArgumentConverter &cab = kInfo.converter; - // Create the argument conversions for callee kernels from state arguments - cab.gen(kInfo.args); - // Dump all conversions - dumpSubstitutionModule(cab); - } + dumpSubstitutionModules(ab); } void doTest(mlir::MLIRContext *ctx, std::vector &typeNames, @@ -245,7 +215,7 @@ void doTest(mlir::MLIRContext *ctx, std::vector &typeNames, // Create the argument conversions ab.gen_drop_front(args, startingArgIdx); // Dump all conversions - dumpSubstitutionModule(ab); + dumpSubstitutionModules(ab); } void test_scalars(mlir::MLIRContext *ctx) { @@ -568,7 +538,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::vector a = {static_cast(&n)}; auto s = cudaq::state(new FakeDeviceState(init, a)); std::vector v = {static_cast(&s)}; - doStateAggregationTest(ctx, "!cc.ptr", v, initCode); + doSimpleTest(ctx, "!cc.ptr", v, initCode); } // clang-format off @@ -669,7 +639,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::vector v1 = {static_cast(&s1)}; auto code = std::string{initCode} + std::string{stateParamCode}; - doStateAggregationTest(ctx, "!cc.ptr", v1, code); + doSimpleTest(ctx, "!cc.ptr", v1, code); } // clang-format off @@ -799,7 +769,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::vector a = {static_cast(&n)}; auto s = cudaq::state(new FakeDeviceState(init, a)); std::vector v = {static_cast(&s)}; - doStateAggregationTest(ctx, "!cc.ptr", v, initCode); + doSimpleTest(ctx, "!cc.ptr", v, initCode); } // clang-format off From 5ef4c3d0ec1c24a58d3ae98fd27689a433b49578 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 3 Mar 2025 14:10:16 -0800 Subject: [PATCH 42/54] Make argument converter handle kernels created from states Signed-off-by: Anna Gringauze --- .../Transforms/ArgumentSynthesis.cpp | 203 ++++----- .../cudaq/platform/py_alt_launch_kernel.cpp | 10 +- runtime/common/ArgumentConversion.cpp | 404 +++++++++--------- runtime/common/ArgumentConversion.h | 91 ++-- runtime/common/BaseRemoteRESTQPU.h | 10 +- runtime/common/BaseRestRemoteClient.h | 14 +- runtime/test/test_argument_conversion.cpp | 6 +- 7 files changed, 356 insertions(+), 382 deletions(-) diff --git a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp index b80bdfeea23..51c771eb45e 100644 --- a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp +++ b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp @@ -14,7 +14,6 @@ #include "mlir/Parser/Parser.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" -#include namespace cudaq::opt { #define GEN_PASS_DEF_ARGUMENTSYNTHESIS @@ -31,99 +30,9 @@ class ArgumentSynthesisPass public: using ArgumentSynthesisBase::ArgumentSynthesisBase; - void - applySubstitutions(func::FuncOp func, - DenseMap> &substModules) { - MLIRContext *ctx = func.getContext(); - auto funcName = func.getName(); - LLVM_DEBUG(llvm::dbgs() << "processing : '" << funcName << "'\n"); - - // 1. Find substitution module with argument replacements for the function. - auto it = substModules.find(funcName); - if (it == substModules.end()) { - // If the function isn't on the list, do nothing. - LLVM_DEBUG(llvm::dbgs() << funcName << " has no substitutions.\n"); - return; - } - auto substMod = *(it->second); - - // 2. Go through the Module and process each substitution. - SmallVector processedArgs(func.getFunctionType().getNumInputs()); - SmallVector> replacements; - BitVector replacedArgs(processedArgs.size()); - for (auto &op : substMod) { - auto subst = dyn_cast(op); - if (!subst) - continue; - auto pos = subst.getPosition(); - if (pos >= processedArgs.size()) { - func.emitError("Argument " + std::to_string(pos) + " is invalid."); - signalPassFailure(); - return; - } - if (processedArgs[pos]) { - func.emitError("Argument " + std::to_string(pos) + - " was already substituted."); - signalPassFailure(); - return; - } - - // OK, substitute the code for the argument. - Block &entry = func.getRegion().front(); - processedArgs[pos] = true; - if (subst.getBody().front().empty()) { - // No code is present. Erase the argument if it is not used. - const auto numUses = - std::distance(entry.getArgument(pos).getUses().begin(), - entry.getArgument(pos).getUses().end()); - LLVM_DEBUG(llvm::dbgs() << "maybe erasing an unused argument (" - << std::to_string(numUses) << ")\n"); - if (numUses == 0) - replacedArgs.set(pos); - continue; - } - OpBuilder builder{ctx}; - Block *splitBlock = entry.splitBlock(entry.begin()); - builder.setInsertionPointToEnd(&entry); - builder.create(func.getLoc(), &subst.getBody().front()); - Operation *lastOp = &subst.getBody().front().back(); - builder.setInsertionPointToEnd(&subst.getBody().front()); - builder.create(func.getLoc(), splitBlock); - func.getBlocks().splice(Region::iterator{splitBlock}, - subst.getBody().getBlocks()); - if (lastOp && - lastOp->getResult(0).getType() == entry.getArgument(pos).getType()) { - LLVM_DEBUG(llvm::dbgs() - << funcName << " argument " << std::to_string(pos) - << " was substituted.\n"); - replacements.emplace_back(pos, entry.getArgument(pos), - lastOp->getResult(0)); - } - } - - // Note: if we exited before here, any code that was cloned into the - // function is still dead and can be removed by a DCE. - - // 3. Replace the block argument values with the freshly inserted new code. - for (auto [pos, fromVal, toVal] : replacements) { - replacedArgs.set(pos); - fromVal.replaceAllUsesWith(toVal); - } - - // 4. Finish specializing func and erase any of func's arguments that were - // substituted. - func.eraseArguments(replacedArgs); - } - void runOnOperation() override { - ModuleOp mod = getOperation(); - MLIRContext *ctx = mod.getContext(); - - // 1. Collect all substitution modules. - std::list funcNames; - DenseMap> substModules; - - for (auto &item : funcList) { + ModuleOp moduleOp = getOperation(); + for (auto item : funcList) { auto pos = item.find(':'); if (pos == std::string::npos) continue; @@ -131,15 +40,27 @@ class ArgumentSynthesisPass std::string funcName = item.substr(0, pos); std::string text = item.substr(pos + 1); + auto *op = moduleOp.lookupSymbol(funcName); + func::FuncOp func = dyn_cast_if_present(op); + + if (!func) { + LLVM_DEBUG(llvm::dbgs() << funcName << " is not in the module."); + continue; + } + + // If there are no substitutions, we're done. if (text.empty()) { LLVM_DEBUG(llvm::dbgs() << funcName << " has no substitutions."); continue; } - // Create a Module with the substitutions that we'll be making. - LLVM_DEBUG(llvm::dbgs() - << funcName << " : substitution pattern: '" << text << "'\n"); - auto substModule = [&]() -> OwningOpRef { + // If we're here, we have a FuncOp and we have substitutions that can be + // applied. + // + // 1. Create a Module with the substitutions that we'll be making. + auto *ctx = func.getContext(); + LLVM_DEBUG(llvm::dbgs() << "substitution pattern: '" << text << "'\n"); + auto substMod = [&]() -> OwningOpRef { if (text.front() == '*') { // Substitutions are a raw string after the '*' character. return parseSourceString(text.substr(1), ctx); @@ -147,27 +68,83 @@ class ArgumentSynthesisPass // Substitutions are in a text file (command-line usage). return parseSourceFile(text, ctx); }(); - assert(*substModule && "module must have been created"); + assert(*substMod && "module must have been created"); - auto &name = funcNames.emplace_back(funcName); - substModules.try_emplace(name, std::move(substModule)); - } - - // 2. Merge symbols from substitution modules into the source module. - for (auto &[funcName, substMod] : substModules) { + // 2. Go through the Module and process each substitution. + SmallVector processedArgs(func.getFunctionType().getNumInputs()); + SmallVector> replacements; + BitVector replacedArgs(processedArgs.size()); for (auto &op : *substMod) { - if (auto symInterface = dyn_cast(op)) { - auto name = symInterface.getName(); - auto obj = mod.lookupSymbol(name); - if (!obj) - mod.getBody()->push_back(op.clone()); + auto subst = dyn_cast(op); + if (!subst) { + if (auto symInterface = dyn_cast(op)) { + auto name = symInterface.getName(); + auto obj = moduleOp.lookupSymbol(name); + if (!obj) + moduleOp.getBody()->push_back(op.clone()); + } + continue; + } + auto pos = subst.getPosition(); + if (pos >= processedArgs.size()) { + func.emitError("Argument " + std::to_string(pos) + " is invalid."); + signalPassFailure(); + return; + } + if (processedArgs[pos]) { + func.emitError("Argument " + std::to_string(pos) + + " was already substituted."); + signalPassFailure(); + return; + } + + // OK, substitute the code for the argument. + Block &entry = func.getRegion().front(); + processedArgs[pos] = true; + if (subst.getBody().front().empty()) { + // No code is present. Erase the argument if it is not used. + const auto numUses = + std::distance(entry.getArgument(pos).getUses().begin(), + entry.getArgument(pos).getUses().end()); + LLVM_DEBUG(llvm::dbgs() << "maybe erasing an unused argument (" + << std::to_string(numUses) << ")\n"); + if (numUses == 0) + replacedArgs.set(pos); + continue; + } + OpBuilder builder{ctx}; + Block *splitBlock = entry.splitBlock(entry.begin()); + builder.setInsertionPointToEnd(&entry); + builder.create(func.getLoc(), &subst.getBody().front()); + Operation *lastOp = &subst.getBody().front().back(); + builder.setInsertionPointToEnd(&subst.getBody().front()); + builder.create(func.getLoc(), splitBlock); + func.getBlocks().splice(Region::iterator{splitBlock}, + subst.getBody().getBlocks()); + if (lastOp && lastOp->getResult(0).getType() == + entry.getArgument(pos).getType()) { + LLVM_DEBUG(llvm::dbgs() + << funcName << " argument " << std::to_string(pos) + << " was substituted.\n"); + replacements.emplace_back(pos, entry.getArgument(pos), + lastOp->getResult(0)); } } - } - // 3. Apply all substitutions. - mod->walk( - [&](func::FuncOp func) { applySubstitutions(func, substModules); }); + // Note: if we exited before here, any code that was cloned into the + // function is still dead and can be removed by a DCE. + + // 3. Replace the block argument values with the freshly inserted new + // code. + for (auto [pos, fromVal, toVal] : replacements) { + replacedArgs.set(pos); + fromVal.replaceAllUsesWith(toVal); + } + + // 4. Finish specializing func and erase any of func's arguments that were + // substituted. + func.eraseArguments(replacedArgs); + } } }; } // namespace diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 3f15beac689..c2294035ad5 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -550,10 +550,10 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; - for (auto &[kName, kInfo] : argCon.getKernelInfo()) { + for (auto &kInfo : argCon.getKernelSubstitutions()) { { std::string kernName = - cudaq::runtime::cudaqGenPrefixName + kName.str(); + cudaq::runtime::cudaqGenPrefixName + kInfo.getKernelName().str(); kernels.emplace_back(kernName); } { @@ -565,10 +565,8 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, } // Collect references for the argument synthesis. - mlir::SmallVector kernelRefs{kernels.begin(), - kernels.end()}; - mlir::SmallVector substRefs{substs.begin(), - substs.end()}; + mlir::SmallVector kernelRefs{kernels.begin(), kernels.end()}; + mlir::SmallVector substRefs{substs.begin(), substs.end()}; PassManager pm(context); pm.addPass(opt::createArgumentSynthesisPass(kernelRefs, substRefs)); diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 3b0efa4fe70..e563a90f99f 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -20,8 +20,6 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/Parser/Parser.h" -#include - using namespace mlir; template @@ -101,128 +99,127 @@ static Value genConstant(OpBuilder &, cudaq::cc::StructType, void *, static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp substMod, llvm::DataLayout &); +/// Create callee.init_N that initializes the state +/// Callee (the kernel captured by state): +// clang-format off +/// func.func @callee(%arg0: i64) { +/// %0 = cc.alloca i64 +/// cc.store %arg0, %0 : !cc.ptr +/// %1 = cc.load %0 : !cc.ptr +/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref +/// quake.x %3 : (!quake.ref) -> () +/// return +/// } +/// callee.init_N: +/// func.func private @callee.init_0(%arg0: !quake.veq, %arg0: i64) -> +/// !!quake.veq { +/// %1 = quake.extract_ref %arg0[1] : (!quake.veq<2>) -> !quake.ref +/// quake.x %1 : (f64, !quake.ref) -> () +/// return %arg0: !quake.veq +/// } +// clang-format on +static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, + func::FuncOp calleeFunc, StringRef initKernelName) { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(moduleOp.getBody()); - /// Create callee.init_N that initializes the state - /// Callee (the kernel captured by state): - // clang-format off - /// func.func @callee(%arg0: i64) { - /// %0 = cc.alloca i64 - /// cc.store %arg0, %0 : !cc.ptr - /// %1 = cc.load %0 : !cc.ptr - /// %2 = quake.alloca !quake.veq[%1 : i64] - /// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref - /// quake.x %3 : (!quake.ref) -> () - /// return - /// } - /// callee.init_N: - /// func.func private @callee.init_0(%arg0: !quake.veq, %arg0: i64) -> - /// !!quake.veq { - /// %1 = quake.extract_ref %arg0[1] : (!quake.veq<2>) -> !quake.ref - /// quake.x %1 : (f64, !quake.ref) -> () - /// return %arg0: !quake.veq - /// } - // clang-format on - static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, - func::FuncOp calleeFunc, StringRef initKernelName) { -OpBuilder::InsertionGuard guard(builder); -builder.setInsertionPointToEnd(moduleOp.getBody()); - -auto ctx = builder.getContext(); -auto loc = builder.getUnknownLoc(); - -auto initFunc = cast(builder.clone(*calleeFunc)); - -auto argTypes = calleeFunc.getArgumentTypes(); -auto retTy = quake::VeqType::getUnsized(ctx); -auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retTy}); - -initFunc.setName(initKernelName); -initFunc.setType(funcTy); -initFunc.setPrivate(); - -OpBuilder newBuilder(ctx); - -auto *entryBlock = &initFunc.getRegion().front(); -newBuilder.setInsertionPointToStart(entryBlock); -Value zero = newBuilder.create(loc, 0, 64); -Value one = newBuilder.create(loc, 1, 64); -Value begin = zero; - -auto argPos = initFunc.getArguments().size(); - -// Detect errors in kernel passed to get_state. -std::function processInner = [&](Block &block) { -for (auto &op : block) { -for (auto ®ion : op.getRegions()) -for (auto &b : region) -processInner(b); - -// Don't allow returns in inner scopes -if (auto retOp = dyn_cast(&op)) -calleeFunc.emitError("Encountered return in inner scope in a kernel " - "passed to get_state"); -} -}; - -for (auto &op : calleeFunc.getRegion().front()) -for (auto ®ion : op.getRegions()) -for (auto &b : region) -processInner(b); - -// Process outer block to initialize the allocation passed as an argument. -std::function process = [&](Block &block) { -SmallVector cleanUps; -Operation *replacedReturn = nullptr; - -Value arg; -Value subArg; -Value blockBegin = begin; -Value blockAllocSize = zero; -for (auto &op : block) { -if (auto alloc = dyn_cast(&op)) { -newBuilder.setInsertionPointAfter(alloc); - -if (!arg) { -initFunc.insertArgument(argPos, retTy, {}, loc); -arg = initFunc.getArgument(argPos); -} + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); -auto allocSize = alloc.getSize(); -auto offset = newBuilder.create(loc, allocSize, one); -subArg = -newBuilder.create(loc, retTy, arg, begin, offset); -alloc.replaceAllUsesWith(subArg); -cleanUps.push_back(alloc); -begin = newBuilder.create(loc, begin, allocSize); -blockAllocSize = -newBuilder.create(loc, blockAllocSize, allocSize); -} + auto initFunc = cast(builder.clone(*calleeFunc)); -if (auto retOp = dyn_cast(&op)) { -if (retOp != replacedReturn) { -newBuilder.setInsertionPointAfter(retOp); + auto argTypes = calleeFunc.getArgumentTypes(); + auto retTy = quake::VeqType::getUnsized(ctx); + auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retTy}); -auto offset = -newBuilder.create(loc, blockAllocSize, one); -Value ret = newBuilder.create(loc, retTy, arg, - blockBegin, offset); + initFunc.setName(initKernelName); + initFunc.setType(funcTy); + initFunc.setPrivate(); -assert(arg && "No veq allocations found"); -replacedReturn = newBuilder.create(loc, ret); -cleanUps.push_back(retOp); -} -} -} + OpBuilder newBuilder(ctx); -for (auto &op : cleanUps) { -op->dropAllReferences(); -op->dropAllUses(); -op->erase(); -} -}; + auto *entryBlock = &initFunc.getRegion().front(); + newBuilder.setInsertionPointToStart(entryBlock); + Value zero = newBuilder.create(loc, 0, 64); + Value one = newBuilder.create(loc, 1, 64); + Value begin = zero; + + auto argPos = initFunc.getArguments().size(); + + // Detect errors in kernel passed to get_state. + std::function processInner = [&](Block &block) { + for (auto &op : block) { + for (auto ®ion : op.getRegions()) + for (auto &b : region) + processInner(b); + + // Don't allow returns in inner scopes + if (auto retOp = dyn_cast(&op)) + calleeFunc.emitError("Encountered return in inner scope in a kernel " + "passed to get_state"); + } + }; + + for (auto &op : calleeFunc.getRegion().front()) + for (auto ®ion : op.getRegions()) + for (auto &b : region) + processInner(b); + + // Process outer block to initialize the allocation passed as an argument. + std::function process = [&](Block &block) { + SmallVector cleanUps; + Operation *replacedReturn = nullptr; + + Value arg; + Value subArg; + Value blockBegin = begin; + Value blockAllocSize = zero; + for (auto &op : block) { + if (auto alloc = dyn_cast(&op)) { + newBuilder.setInsertionPointAfter(alloc); + + if (!arg) { + initFunc.insertArgument(argPos, retTy, {}, loc); + arg = initFunc.getArgument(argPos); + } + + auto allocSize = alloc.getSize(); + auto offset = newBuilder.create(loc, allocSize, one); + subArg = + newBuilder.create(loc, retTy, arg, begin, offset); + alloc.replaceAllUsesWith(subArg); + cleanUps.push_back(alloc); + begin = newBuilder.create(loc, begin, allocSize); + blockAllocSize = + newBuilder.create(loc, blockAllocSize, allocSize); + } + + if (auto retOp = dyn_cast(&op)) { + if (retOp != replacedReturn) { + newBuilder.setInsertionPointAfter(retOp); + + auto offset = + newBuilder.create(loc, blockAllocSize, one); + Value ret = newBuilder.create(loc, retTy, arg, + blockBegin, offset); + + assert(arg && "No veq allocations found"); + replacedReturn = newBuilder.create(loc, ret); + cleanUps.push_back(retOp); + } + } + } + + for (auto &op : cleanUps) { + op->dropAllReferences(); + op->dropAllUses(); + op->erase(); + } + }; -// Process the function body -process(initFunc.getRegion().front()); + // Process the function body + process(initFunc.getRegion().front()); } /// Create callee.num_qubits_N that calculates the number of qubits to @@ -248,92 +245,90 @@ process(initFunc.getRegion().front()); /// } // clang-format on static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, - func::FuncOp calleeFunc, - StringRef numQubitsKernelName) { -OpBuilder::InsertionGuard guard(builder); -builder.setInsertionPointToEnd(moduleOp.getBody()); - -auto ctx = builder.getContext(); -auto loc = builder.getUnknownLoc(); - -auto numQubitsFunc = cast(builder.clone(*calleeFunc)); - -auto argTypes = calleeFunc.getArgumentTypes(); -auto retType = builder.getI64Type(); -auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retType}); - -numQubitsFunc.setName(numQubitsKernelName); -numQubitsFunc.setType(funcTy); -numQubitsFunc.setPrivate(); - -OpBuilder newBuilder(ctx); - -auto *entryBlock = &numQubitsFunc.getRegion().front(); -newBuilder.setInsertionPointToStart(entryBlock); -Value size = newBuilder.create(loc, 0, retType); - -// Process block recursively to calculate and return allocation size -// and remove everything else. -std::function process = [&](Block &block) { -SmallVector used; -Operation *replacedReturn = nullptr; - -for (auto &op : block) { -// Calculate allocation size (existing allocation size plus new one) -if (auto alloc = dyn_cast(&op)) { -auto allocSize = alloc.getSize(); -newBuilder.setInsertionPointAfter(alloc); -size = newBuilder.create(loc, size, allocSize); -} + func::FuncOp calleeFunc, + StringRef numQubitsKernelName) { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(moduleOp.getBody()); -// Return allocation size -if (auto retOp = dyn_cast(&op)) { -if (retOp != replacedReturn) { + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); -newBuilder.setInsertionPointAfter(retOp); -auto newRet = newBuilder.create(loc, size); -replacedReturn = newRet; -used.push_back(newRet); -} -} -} + auto numQubitsFunc = cast(builder.clone(*calleeFunc)); -// Collect all ops needed for size calculation -SmallVector keep; -while (!used.empty()) { -auto *op = used.pop_back_val(); -keep.push_back(op); -for (auto opnd : op->getOperands()) -if (auto defOp = opnd.getDefiningOp()) -used.push_back(defOp); -} + auto argTypes = calleeFunc.getArgumentTypes(); + auto retType = builder.getI64Type(); + auto funcTy = FunctionType::get(ctx, argTypes, TypeRange{retType}); -// Remove the rest of the ops -SmallVector toErase; -for (auto &op : block) -if (std::find(keep.begin(), keep.end(), &op) == keep.end()) -toErase.push_back(&op); + numQubitsFunc.setName(numQubitsKernelName); + numQubitsFunc.setType(funcTy); + numQubitsFunc.setPrivate(); -for (auto &op : toErase) { -op->dropAllReferences(); -op->dropAllUses(); -op->erase(); -} -}; + OpBuilder newBuilder(ctx); + + auto *entryBlock = &numQubitsFunc.getRegion().front(); + newBuilder.setInsertionPointToStart(entryBlock); + Value size = newBuilder.create(loc, 0, retType); + + // Process block recursively to calculate and return allocation size + // and remove everything else. + std::function process = [&](Block &block) { + SmallVector used; + Operation *replacedReturn = nullptr; + + for (auto &op : block) { + // Calculate allocation size (existing allocation size plus new one) + if (auto alloc = dyn_cast(&op)) { + auto allocSize = alloc.getSize(); + newBuilder.setInsertionPointAfter(alloc); + size = newBuilder.create(loc, size, allocSize); + } -// Process the function body -process(numQubitsFunc.getRegion().front()); + // Return allocation size + if (auto retOp = dyn_cast(&op)) { + if (retOp != replacedReturn) { + + newBuilder.setInsertionPointAfter(retOp); + auto newRet = newBuilder.create(loc, size); + replacedReturn = newRet; + used.push_back(newRet); + } + } + } + + // Collect all ops needed for size calculation + SmallVector keep; + while (!used.empty()) { + auto *op = used.pop_back_val(); + keep.push_back(op); + for (auto opnd : op->getOperands()) + if (auto defOp = opnd.getDefiningOp()) + used.push_back(defOp); + } + + // Remove the rest of the ops + SmallVector toErase; + for (auto &op : block) + if (std::find(keep.begin(), keep.end(), &op) == keep.end()) + toErase.push_back(&op); + + for (auto &op : toErase) { + op->dropAllReferences(); + op->dropAllUses(); + op->erase(); + } + }; + + // Process the function body + process(numQubitsFunc.getRegion().front()); } static Value genConstant(OpBuilder &builder, const cudaq::state *v, - llvm::DataLayout &layout, StringRef kernelName, ModuleOp substMod, + llvm::DataLayout &layout, StringRef kernelName, + ModuleOp substMod, cudaq::opt::ArgumentConverter &converter) { auto simState = cudaq::state_helper::getSimulationState(const_cast(v)); - //auto kernelName = converter.getKernelName(); - //auto substMod = converter.getSubstitutionModule(); - // If the state has amplitude data, we materialize the data as a state // vector and create a new state from it. if (simState->hasData()) { @@ -508,11 +503,11 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, // Convert arguments for `callee.init_N`. auto ®isteredInitName = converter.registerKernel(initName); - converter.gen(registeredInitName, calleeArgs); + converter.gen(registeredInitName, substMod, calleeArgs); // Convert arguments for `callee.num_qubits_N`. - auto ®isteredNumQubitsName = converter.registerKernel(initName); - converter.gen(registeredNumQubitsName, calleeArgs); + auto ®isteredNumQubitsName = converter.registerKernel(numQubitsName); + converter.gen(registeredNumQubitsName, substMod, calleeArgs); } // Create a substitution for the state pointer. @@ -698,24 +693,27 @@ Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy, cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, ModuleOp sourceModule) - : sourceModule(sourceModule), builder(sourceModule.getContext()), - kernelName(kernelName) { -} + : sourceModule(sourceModule), kernelName(kernelName) {} void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { - gen(kernelName, arguments); + gen(kernelName, sourceModule, arguments); } -void cudaq::opt::ArgumentConverter::gen(StringRef kernelName, const std::vector &arguments) { - auto *ctx = builder.getContext(); - // We should look up the input type signature here. - auto &kernelInfo = addKernelInfo(kernelName); - auto substModule = kernelInfo.getSubstitutionModule(); +void cudaq::opt::ArgumentConverter::gen(StringRef kernelName, + ModuleOp sourceModule, + const std::vector &arguments) { + auto *ctx = sourceModule.getContext(); + OpBuilder builder(ctx); + ModuleOp substModule = + builder.create(builder.getUnknownLoc()); + auto &kernelInfo = addKernelInfo(kernelName, substModule); + // We should look up the input type signature here. auto fun = sourceModule.lookupSymbol( cudaq::runtime::cudaqGenPrefixName + kernelName.str()); if (!fun) { - throw std::runtime_error("missing fun in argument conversion: " + kernelName.str()); + throw std::runtime_error("missing fun in argument conversion: " + + kernelName.str()); } FunctionType fromFuncTy = fun.getFunctionType(); diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 2be7ba579dc..9252ee1b8a2 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -19,34 +19,34 @@ namespace cudaq::opt { - -class KernelInfo { - public: - KernelInfo(mlir::OpBuilder builder, mlir::StringRef kernelName) - : kernelName(kernelName) { - substModule = builder.create(builder.getUnknownLoc()); - } - - /// Some substitutions may generate global constant information. Use this - /// interface to access both the substitutions and any global constants - /// created. - mlir::ModuleOp getSubstitutionModule() { - return substModule; - } - - /// Get the list of substitutions for this kernel that were generated - /// by `ArgumentConverter::gen()`. - mlir::SmallVector &getSubstitutions() { - return substitutions; - } - - private: - mlir::ModuleOp substModule; - mlir::StringRef kernelName; - mlir::SmallVector substitutions; - }; - - +class ArgumentConverter; + +class KernelSubstitutionInfo { +public: + KernelSubstitutionInfo(mlir::StringRef kernelName, mlir::ModuleOp substModule) + : kernelName(kernelName), substModule(substModule) {} + + /// Some substitutions may generate global constant information. Use this + /// interface to access both the substitutions and any global constants + /// created. + mlir::ModuleOp getSubstitutionModule() { return substModule; } + + /// Get the list of substitutions for this kernel that were generated + /// by `ArgumentConverter::gen()`. + mlir::SmallVector &getSubstitutions() { + return substitutions; + } + + mlir::StringRef getKernelName() { return kernelName; } + +private: + mlir::StringRef kernelName; + mlir::ModuleOp substModule; + mlir::SmallVector substitutions; + + friend ArgumentConverter; +}; + class ArgumentConverter { public: /// Build an instance to create argument substitutions for a specified \p @@ -59,7 +59,8 @@ class ArgumentConverter { /// Generate a substitution ModuleOp for the vector of arguments presented. /// The arguments are those presented to the kernel, kernelName. - void gen(mlir::StringRef kernelName, const std::vector &arguments); + void gen(mlir::StringRef kernelName, mlir::ModuleOp sourceModule, + const std::vector &arguments); /// Generate a substitution ModuleOp but include only the arguments that do /// not appear in the set of \p exclusions. @@ -70,38 +71,38 @@ class ArgumentConverter { /// and thereby exclude them from the substitutions. void gen_drop_front(const std::vector &arguments, unsigned numDrop); - /// Kernel we are converting the arguments for. - mlir::StringRef getKernelName() { return kernelName; } - - /// Get the map of kernel names to their kernel info that - /// were collected by `collect()`. - mlir::DenseMap& getKernelInfo() { - return kernelInfo; + /// Get the kernel info that were collected by `gen()`. + std::list &getKernelSubstitutions() { + return kernelSubstitutions; } bool isRegisteredKernel(const std::string &kernelName) { - return std::find(nameRegistry.begin(), nameRegistry.end(), kernelName) != nameRegistry.end(); + return std::find(nameRegistry.begin(), nameRegistry.end(), kernelName) != + nameRegistry.end(); } std::string ®isterKernel(const std::string &kernelName) { return nameRegistry.emplace_back(kernelName); } - KernelInfo& addKernelInfo(mlir::StringRef kernelName) { - auto [it,b] = kernelInfo.try_emplace(kernelName, std::move(KernelInfo(builder, kernelName))); - return it->second; +private: + KernelSubstitutionInfo &addKernelInfo(mlir::StringRef kernelName, + mlir::ModuleOp substModule) { + return kernelSubstitutions.emplace_back(kernelName, substModule); } - private: /// Memory to store new kernel names generated during argument conversion. + /// Use list here to keep references to those elements valid. std::list nameRegistry; - /// Kernel info for kernels we are converting the arguments for, including - /// new kernels generated from state arguments. - mlir::DenseMap kernelInfo; + /// Memory to store new kernel info generated during argument conversion. + /// Use list here to keep elements sorted in order of creation. + std::list kernelSubstitutions; + /// Original module before substitutions. mlir::ModuleOp sourceModule; - mlir::OpBuilder builder; + + /// Kernel we are substituting the arguments for. mlir::StringRef kernelName; }; diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 6937c43233e..94ebdbaab2d 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -466,10 +466,10 @@ class BaseRemoteRESTQPU : public cudaq::QPU { // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; - for (auto &[kName, kInfo] : argCon.getKernelInfo()) { + for (auto &kInfo : argCon.getKernelSubstitutions()) { { - std::string kernName = - cudaq::runtime::cudaqGenPrefixName + kName.str(); + std::string kernName = cudaq::runtime::cudaqGenPrefixName + + kInfo.getKernelName().str(); kernels.emplace_back(kernName); } { @@ -482,9 +482,9 @@ class BaseRemoteRESTQPU : public cudaq::QPU { // Collect references for the argument synthesis. mlir::SmallVector kernelRefs{kernels.begin(), - kernels.end()}; + kernels.end()}; mlir::SmallVector substRefs{substs.begin(), - substs.end()}; + substs.end()}; pm.addPass(opt::createArgumentSynthesisPass(kernelRefs, substRefs)); pm.addPass(opt::createDeleteStates()); pm.addNestedPass( diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index f5f63d132b2..fdb34719420 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -190,10 +190,10 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; - for (auto &[kName, kInfo] : argCon.getKernelInfo()) { + for (auto &kInfo : argCon.getKernelSubstitutions()) { { - std::string kernName = - cudaq::runtime::cudaqGenPrefixName + kName.str(); + std::string kernName = cudaq::runtime::cudaqGenPrefixName + + kInfo.getKernelName().str(); kernels.emplace_back(kernName); } { @@ -203,17 +203,17 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { substs.emplace_back(substBuff); } } - + // Collect references for the argument synthesis. mlir::SmallVector kernelRefs{kernels.begin(), - kernels.end()}; + kernels.end()}; mlir::SmallVector substRefs{substs.begin(), - substs.end()}; + substs.end()}; pm.addPass(opt::createArgumentSynthesisPass(kernelRefs, substRefs)); pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); pm.addNestedPass( - opt::createReplaceStateWithKernel()); + opt::createReplaceStateWithKernel()); pm.addPass(mlir::createSymbolDCEPass()); } else if (args) { cudaq::info("Run Quake Synth.\n"); diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index afe24478d11..75e7eaf96b8 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -143,12 +143,12 @@ class FakeDeviceState : public cudaq::SimulationState { extern "C" void __cudaq_deviceCodeHolderAdd(const char *, const char *); void dumpSubstitutionModules(cudaq::opt::ArgumentConverter &con) { - for (auto &[kName, kInfo] : con.getKernelInfo()) { + for (auto &kInfo : con.getKernelSubstitutions()) { // Dump the conversions llvm::outs() << "========================================\n" "Substitution module:\n" - << kName << "\n" - << kInfo.getSubstitutionModule() << '\n'; + << kInfo.getKernelName() << "\n" + << kInfo.getSubstitutionModule() << '\n'; } } From 6fefc2715efebad2b25be8628860f7e36d17b5e2 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 10 Mar 2025 14:40:39 -0700 Subject: [PATCH 43/54] Fix null alloc size and add tests Signed-off-by: Anna Gringauze --- runtime/common/ArgumentConversion.cpp | 7 ++ runtime/test/test_argument_conversion.cpp | 92 +++++++++++++++++++---- 2 files changed, 85 insertions(+), 14 deletions(-) diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index e563a90f99f..80440e3d57a 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -185,6 +185,10 @@ static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, } auto allocSize = alloc.getSize(); + if (!allocSize) + allocSize = newBuilder.create( + loc, quake::getAllocationSize(alloc.getType()), 64); + auto offset = newBuilder.create(loc, allocSize, one); subArg = newBuilder.create(loc, retTy, arg, begin, offset); @@ -279,6 +283,9 @@ static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, // Calculate allocation size (existing allocation size plus new one) if (auto alloc = dyn_cast(&op)) { auto allocSize = alloc.getSize(); + if (!allocSize) + allocSize = newBuilder.create( + loc, quake::getAllocationSize(alloc.getType()), 64); newBuilder.setInsertionPointAfter(alloc); size = newBuilder.create(loc, size, allocSize); } diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 75e7eaf96b8..825e08152ed 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -515,18 +515,82 @@ void test_simulation_state(mlir::MLIRContext *ctx) { void test_quantum_state(mlir::MLIRContext *ctx) { { // @cudaq.kernel - // def init(n: int): + // def init(): + // q = cudaq.qvector(2) + // + // def kernel(s: cudaq.State): + // ... + // + // s = cudaq.get_state(init) + // cudaq.sample(kernel, s) + auto init = "init"; + auto initCode = "func.func private @__nvqpp__mlirgen__init() {\n" + " %0 = quake.alloca !quake.veq<2>\n" + " return\n" + "}\n"; + __cudaq_deviceCodeHolderAdd(init, initCode); + + std::int64_t n = 2; + std::vector a = {static_cast(&n)}; + auto s = cudaq::state(new FakeDeviceState(init, a)); + std::vector v = {static_cast(&s)}; + doSimpleTest(ctx, "!cc.ptr", v, initCode); + } + + // clang-format off +// CHECK: Source module: +// CHECK: func.func private @__nvqpp__mlirgen__init() { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> +// CHECK: return +// CHECK: } +// CHECK: func.func private @callee(!cc.ptr) + +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: testy +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init.init_[[HASH_0]] : !cc.ptr +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init.init_[[HASH_0]](%arg0: !quake.veq) -> !quake.veq { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_3:.*]] = arith.subi %[[VAL_2]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_4:.*]] = quake.subveq %arg0, %[[VAL_0]], %[[VAL_3]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: %[[VAL_5:.*]] = arith.addi %[[VAL_0]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_6:.*]] = arith.addi %[[VAL_0]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_7:.*]] = arith.subi %[[VAL_6]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_8:.*]] = quake.subveq %arg0, %[[VAL_0]], %[[VAL_7]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: return %[[VAL_8]] : !quake.veq +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init.num_qubits_[[HASH_0]]() -> i64 { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i64 +// CHECK: return %[[VAL_2]] : i64 +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init.init_[[HASH_0]] +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init.num_qubits_[[HASH_0]] + // clang-format on + + { + // @cudaq.kernel + // def init0(n: int): // q = cudaq.qvector(n) // x(q[0]) // // def kernel(s: cudaq.State): // ... // - // s = cudaq.get_state(init, 2) + // s = cudaq.get_state(init0, 2) // cudaq.sample(kernel, s) - auto init = "init"; + auto init = "init0"; auto initCode = - "func.func private @__nvqpp__mlirgen__init(%arg0: i64) {\n" + "func.func private @__nvqpp__mlirgen__init0(%arg0: i64) {\n" " %0 = quake.alloca !quake.veq[%arg0 : i64]\n" " %1 = quake.extract_ref %0[0] : (!quake.veq) -> !quake.ref\n" " quake.x %1 : (!quake.ref) -> ()\n" @@ -543,7 +607,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // clang-format off // CHECK: Source module: -// CHECK: func.func private @__nvqpp__mlirgen__init(%arg0: i64) { +// CHECK: func.func private @__nvqpp__mlirgen__init0(%arg0: i64) { // CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] // CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref // CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () @@ -555,9 +619,9 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init.init_[[HASH_0]] : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init0.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init0.init_[[HASH_0]] : !cc.ptr // CHECK: } -// CHECK: func.func private @__nvqpp__mlirgen__init.init_[[HASH_0]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { +// CHECK: func.func private @__nvqpp__mlirgen__init0.init_[[HASH_0]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 // CHECK: %[[VAL_2:.*]] = arith.subi %arg0, %[[VAL_1]] : i64 @@ -570,20 +634,20 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: %[[VAL_8:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_7]] : (!quake.veq, i64, i64) -> !quake.veq // CHECK: return %[[VAL_8]] : !quake.veq // CHECK: } -// CHECK: func.func private @__nvqpp__mlirgen__init.num_qubits_[[HASH_0]](%arg0: i64) -> i64 { +// CHECK: func.func private @__nvqpp__mlirgen__init0.num_qubits_[[HASH_0]](%arg0: i64) -> i64 { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 // CHECK: %[[VAL_1:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 // CHECK: return %[[VAL_1]] : i64 // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: init.init_[[HASH_0]] +// CHECK: init0.init_[[HASH_0]] // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: init.num_qubits_[[HASH_0]] +// CHECK: init0.num_qubits_[[HASH_0]] // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } @@ -591,7 +655,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { { // @cudaq.kernel - // def init(n: int): + // def init1(n: int): // q = cudaq.qvector(n) // x(q[0]) // @@ -602,7 +666,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // def kernel(s: cudaq.State): // ... // - // s0 = cudaq.get_state(init, 2) + // s0 = cudaq.get_state(init1, 2) // s1 = cudaq.get_state(state_param, s0) // cudaq.sample(kernel, s1) auto init = "init1"; @@ -730,7 +794,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { { // @cudaq.kernel - // def init(n: int): + // def init2(n: int): // q0 = cudaq.qvector(n) // x(q0[0]) // r = mz(q0[0]) @@ -742,7 +806,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // def kernel(s: cudaq.State): // ... // - // s = cudaq.get_state(init, 2) + // s = cudaq.get_state(init2, 2) // cudaq.sample(kernel, s) auto init = "init2"; auto initCode = From 55e25dd33ae98019549123d581ad2a2fffc6955e Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 12 Mar 2025 16:25:36 -0700 Subject: [PATCH 44/54] Keep storing ops when generating numSubits func Signed-off-by: Anna Gringauze --- runtime/common/ArgumentConversion.cpp | 33 ++++--- runtime/test/test_argument_conversion.cpp | 111 ++++++++++++++-------- 2 files changed, 90 insertions(+), 54 deletions(-) diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 80440e3d57a..c06970c862a 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -100,17 +100,16 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp substMod, llvm::DataLayout &); /// Create callee.init_N that initializes the state -/// Callee (the kernel captured by state): +/// // clang-format off +/// Callee (the kernel captured by state): /// func.func @callee(%arg0: i64) { -/// %0 = cc.alloca i64 -/// cc.store %arg0, %0 : !cc.ptr -/// %1 = cc.load %0 : !cc.ptr -/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %2 = quake.alloca !quake.veq[%arg0 : i64] /// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref /// quake.x %3 : (!quake.ref) -> () /// return /// } +/// /// callee.init_N: /// func.func private @callee.init_0(%arg0: !quake.veq, %arg0: i64) -> /// !!quake.veq { @@ -228,13 +227,11 @@ static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, /// Create callee.num_qubits_N that calculates the number of qubits to /// initialize the state -/// Callee: (the kernel captured by state): +/// // clang-format off +/// Callee: (the kernel captured by state): /// func.func @callee(%arg0: i64) { -/// %0 = cc.alloca i64 -/// cc.store %arg0, %0 : !cc.ptr -/// %1 = cc.load %0 : !cc.ptr -/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %2 = quake.alloca !quake.veq[%arg0 : i64] /// %3 = quake.extract_ref %2[1] : (!quake.veq) -> !quake.ref /// quake.x %3 : (!quake.ref) -> () /// return @@ -242,10 +239,7 @@ static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, /// /// callee.num_qubits_0: /// func.func private @callee.num_qubits_0(%arg0: i64) -> i64 { -/// %0 = cc.alloca i64 -/// cc.store %arg0, %0 : !cc.ptr -/// %1 = cc.load %0 : !cc.ptr -/// return %1 : i64 +/// return %arg0 : i64 /// } // clang-format on static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, @@ -306,10 +300,21 @@ static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, SmallVector keep; while (!used.empty()) { auto *op = used.pop_back_val(); + if (std::find(keep.begin(), keep.end(), op) != keep.end()) + continue; + keep.push_back(op); + + // Collect ops creating operands used in ops we already collected for (auto opnd : op->getOperands()) if (auto defOp = opnd.getDefiningOp()) used.push_back(defOp); + + // Collect ops that store into memory used in ops we already collected. + for (auto user : op->getUsers()) + if (auto iface = dyn_cast(user)) + if (iface.hasEffect()) + used.push_back(user); } // Remove the rest of the ops diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 825e08152ed..e66a9d37a8f 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -513,6 +513,7 @@ void test_simulation_state(mlir::MLIRContext *ctx) { } void test_quantum_state(mlir::MLIRContext *ctx) { + { // @cudaq.kernel // def init(): @@ -530,19 +531,13 @@ void test_quantum_state(mlir::MLIRContext *ctx) { "}\n"; __cudaq_deviceCodeHolderAdd(init, initCode); - std::int64_t n = 2; - std::vector a = {static_cast(&n)}; - auto s = cudaq::state(new FakeDeviceState(init, a)); + auto s = cudaq::state(new FakeDeviceState(init, {})); std::vector v = {static_cast(&s)}; doSimpleTest(ctx, "!cc.ptr", v, initCode); } // clang-format off // CHECK: Source module: -// CHECK: func.func private @__nvqpp__mlirgen__init() { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> -// CHECK: return -// CHECK: } // CHECK: func.func private @callee(!cc.ptr) // CHECK: ======================================== @@ -607,12 +602,6 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // clang-format off // CHECK: Source module: -// CHECK: func.func private @__nvqpp__mlirgen__init0(%arg0: i64) { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] -// CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref -// CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () -// CHECK: return -// CHECK: } // CHECK: func.func private @callee(!cc.ptr) // CHECK: ======================================== @@ -708,18 +697,6 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // clang-format off // CHECK: Source module: -// CHECK: func.func private @__nvqpp__mlirgen__init1(%arg0: i64) { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] -// CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref -// CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () -// CHECK: return -// CHECK: } -// CHECK: func.func private @__nvqpp__mlirgen__state_param(%arg0: !cc.ptr) { -// CHECK: %[[VAL_0:.*]] = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 -// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq[%[[VAL_0]] : i64] -// CHECK: %[[VAL_2:.*]] = quake.init_state %[[VAL_1]], %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq -// CHECK: return -// CHECK: } // CHECK: func.func private @callee(!cc.ptr) // CHECK: ======================================== @@ -838,21 +815,6 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // clang-format off // CHECK: Source module: -// CHECK: func.func private @__nvqpp__mlirgen__init2(%arg0: i64) { -// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq[%arg0 : i64] -// CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq) -> !quake.ref -// CHECK: quake.x %[[VAL_1]] : (!quake.ref) -> () -// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "q0" : (!quake.ref) -> !quake.measure -// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_2]] : (!quake.measure) -> i1 -// CHECK: cc.if(%[[VAL_3]]) { -// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%arg0 : i64] -// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_4]][0] : (!quake.veq) -> !quake.ref -// CHECK: quake.x %[[VAL_5]] : (!quake.ref) -> () -// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_0]][1] : (!quake.veq) -> !quake.ref -// CHECK: quake.y %[[VAL_6]] : (!quake.ref) -> () -// CHECK: } -// CHECK: return -// CHECK: } // CHECK: func.func private @callee(!cc.ptr) // CHECK: ======================================== @@ -901,6 +863,75 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } // clang-format on + + { + // (No memtoreg pass before argument conversion) + // @cudaq.kernel + // def init3(n: int): + // q0 = cudaq.qvector(n) + // + // def kernel(s: cudaq.State): + // ... + // + // s = cudaq.get_state(init3, 2) + // cudaq.sample(kernel, s) + auto init = "init3"; + auto initCode = " func.func @__nvqpp__mlirgen__init3(%arg0: i64) {\n" + " %0 = cc.alloca i64\n" + " cc.store %arg0, %0 : !cc.ptr\n" + " %1 = cc.load %0 : !cc.ptr\n" + " %2 = quake.alloca !quake.veq[%1 : i64]\n" + " return\n" + "}\n"; + + __cudaq_deviceCodeHolderAdd(init, initCode); + + std::int64_t n = 2; + std::vector a = {static_cast(&n)}; + auto s = cudaq::state(new FakeDeviceState(init, a)); + std::vector v = {static_cast(&s)}; + doSimpleTest(ctx, "!cc.ptr", v, initCode); + } + + // clang-format off +// CHECK: Source module: +// CHECK: func.func private @callee(!cc.ptr) + +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: testy +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init3.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init3.init_[[HASH_0]] : !cc.ptr +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init3.init_[[HASH_0]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = cc.alloca i64 +// CHECK: cc.store %arg0, %[[VAL_2]] : !cc.ptr +// CHECK: %[[VAL_3:.*]] = cc.load %[[VAL_2]] : !cc.ptr +// CHECK: %[[VAL_4:.*]] = arith.subi %[[VAL_3]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_5:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_4]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: %[[VAL_6:.*]] = arith.addi %[[VAL_0]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_7:.*]] = arith.addi %[[VAL_0]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_7]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_9:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_8]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: return %[[VAL_9]] : !quake.veq +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init3.num_qubits_[[HASH_0]](%arg0: i64) -> i64 { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = cc.alloca i64 +// CHECK: cc.store %arg0, %[[VAL_1]] : !cc.ptr +// CHECK: %[[VAL_2:.*]] = cc.load %[[VAL_1]] : !cc.ptr +// CHECK: %[[VAL_3:.*]] = arith.addi %[[VAL_0]], %[[VAL_2]] : i64 +// CHECK: return %[[VAL_3]] : i64 +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init3.init_[[HASH_0]] +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: init3.num_qubits_[[HASH_0]] + // clang-format on } void test_combinations(mlir::MLIRContext *ctx) { From 745fc5d9cc7fb15815a39c65a9ddbb1850774e21 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 12 Mar 2025 16:31:47 -0700 Subject: [PATCH 45/54] Cleanup Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.td | 2 +- runtime/common/ArgumentConversion.cpp | 3 +-- runtime/common/BaseRemoteRESTQPU.h | 7 +------ 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 715c50623e6..243e70bfaf0 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -874,7 +874,7 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func The `quake.materialize_state` operation accepts symbols for the synthesized kernels `@num_qubits` and `@init` that argument synthesis generated from - the original kernel call that generated the state, e.g., + the original kernel call that generated the state, e.g., the `cudaq::get_state` call that refers to the result of a specific quantum kernel being invoked with a set of parameters diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index c06970c862a..bdeeafdcc96 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -723,10 +723,9 @@ void cudaq::opt::ArgumentConverter::gen(StringRef kernelName, // We should look up the input type signature here. auto fun = sourceModule.lookupSymbol( cudaq::runtime::cudaqGenPrefixName + kernelName.str()); - if (!fun) { + if (!fun) throw std::runtime_error("missing fun in argument conversion: " + kernelName.str()); - } FunctionType fromFuncTy = fun.getFunctionType(); for (auto iter : diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 94ebdbaab2d..64f68cb6021 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -586,8 +586,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { } else modules.emplace_back(kernelName, moduleOp); - std::cout << "Modules: " << modules.size() << std::endl; - if (emulate) { // If we are in emulation mode, we need to first get a full QIR // representation of the code. Then we'll map to an LLVM Module, create a @@ -714,7 +712,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { std::vector results; // If seed is 0, then it has not been set. - if (seed == 0) + if (seed > 0) cudaq::set_random_seed(seed); bool hasConditionals = @@ -722,7 +720,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (hasConditionals && isObserve) throw std::runtime_error("error: spin_ops not yet supported with " "kernels containing conditionals"); - if (hasConditionals) { executor->setShots(1); // run one shot at a time @@ -748,8 +745,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { counts.sequential_data(regName); } } - localJIT.clear(); - return cudaq::sample_result(results); } for (std::size_t i = 0; i < codes.size(); i++) { From b8c8c8a0d2794136f031e8d11774dadc82760b05 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 12 Mar 2025 16:56:28 -0700 Subject: [PATCH 46/54] Cleanup Signed-off-by: Anna Gringauze --- test/Quake/arg_subst-5.txt | 2 +- test/Quake/arg_subst-6.txt | 2 +- test/Quake/arg_subst-7.txt | 2 +- test/Quake/arg_subst-8.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/Quake/arg_subst-5.txt b/test/Quake/arg_subst-5.txt index b1383d071d3..2d6ce5c6cf1 100644 --- a/test/Quake/arg_subst-5.txt +++ b/test/Quake/arg_subst-5.txt @@ -1,5 +1,5 @@ // ========================================================================== // -// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. // // All rights reserved. // // // // This source code and the accompanying materials are made available under // diff --git a/test/Quake/arg_subst-6.txt b/test/Quake/arg_subst-6.txt index 4871d034829..4227fbd386b 100644 --- a/test/Quake/arg_subst-6.txt +++ b/test/Quake/arg_subst-6.txt @@ -1,5 +1,5 @@ // ========================================================================== // -// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. // // All rights reserved. // // // // This source code and the accompanying materials are made available under // diff --git a/test/Quake/arg_subst-7.txt b/test/Quake/arg_subst-7.txt index a3ed90891ab..58ca8a163e6 100644 --- a/test/Quake/arg_subst-7.txt +++ b/test/Quake/arg_subst-7.txt @@ -1,5 +1,5 @@ // ========================================================================== // -// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. // // All rights reserved. // // // // This source code and the accompanying materials are made available under // diff --git a/test/Quake/arg_subst-8.txt b/test/Quake/arg_subst-8.txt index 7a53d0369de..26583075911 100644 --- a/test/Quake/arg_subst-8.txt +++ b/test/Quake/arg_subst-8.txt @@ -1,5 +1,5 @@ // ========================================================================== // -// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. // // All rights reserved. // // // // This source code and the accompanying materials are made available under // From edf02247ac698eaa19b9190e46e7a1a0dced39a1 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 14 Mar 2025 09:35:29 -0700 Subject: [PATCH 47/54] Cleanup Signed-off-by: Anna Gringauze --- python/runtime/cudaq/platform/py_alt_launch_kernel.cpp | 2 +- runtime/common/BaseRemoteRESTQPU.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 5d53c7098cd..00c2bbcc272 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -543,7 +543,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, auto isLocalSimulator = platform.is_simulator() && !platform.is_emulated(); auto isSimulator = isLocalSimulator || isRemoteSimulator; - auto argCon = cudaq::opt::ArgumentConverter(name, unwrap(module)); + cudaq::opt::ArgumentConverter argCon(name, unwrap(module)); argCon.gen(runtimeArgs.getArgs()); // Store kernel and substitution strings on the stack. diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 64f68cb6021..cec3644d19d 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -459,7 +459,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { // created from a kernel that generated the state argument. // Traverse the list and collect substitutions for all those // functions. - auto argCon = cudaq::opt::ArgumentConverter(kernelName, moduleOp); + cudaq::opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); // Store kernel and substitution strings on the stack. From 6b5161a74dd1fcddd74e3258b1ec79f5efcc90f4 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 14 Mar 2025 11:43:25 -0700 Subject: [PATCH 48/54] Fix failing test Signed-off-by: Anna Gringauze --- runtime/common/ArgumentConversion.cpp | 3 ++- runtime/common/BaseRemoteRESTQPU.h | 8 +++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index bdeeafdcc96..4c89a185194 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -313,7 +313,8 @@ static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, // Collect ops that store into memory used in ops we already collected. for (auto user : op->getUsers()) if (auto iface = dyn_cast(user)) - if (iface.hasEffect()) + if (iface.hasEffect() && + !iface.hasEffect()) used.push_back(user); } diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index cec3644d19d..56449854431 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -454,11 +454,9 @@ class BaseRemoteRESTQPU : public cudaq::QPU { mlir::PassManager pm(&context); if (!rawArgs.empty()) { cudaq::info("Run Argument Synth.\n"); - // For quantum devices, create a list of ArgumentConverters - // with nodes corresponding to `init` and `num_qubits` functions - // created from a kernel that generated the state argument. - // Traverse the list and collect substitutions for all those - // functions. + // For quantum devices, we generate a collection of `init` and + // `num_qubits` functions and their substitutions created + // from a kernel and arguments that generated a state argument. cudaq::opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); From 4df4390479fe43ffe3ce74c0beb425eaaec7c4ca Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 14 Mar 2025 15:41:27 -0700 Subject: [PATCH 49/54] Fix failing doc build Signed-off-by: Anna Gringauze --- docs/sphinx/api/languages/cpp_api.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sphinx/api/languages/cpp_api.rst b/docs/sphinx/api/languages/cpp_api.rst index 0c050f4faf7..69332c4dd2a 100644 --- a/docs/sphinx/api/languages/cpp_api.rst +++ b/docs/sphinx/api/languages/cpp_api.rst @@ -91,6 +91,8 @@ Common .. doxygenclass:: cudaq::RemoteSimulationState +.. doxygenclass:: cudaq::QPUState + .. doxygenclass:: cudaq::registry::RegisteredType :members: From a5d63c5969b33333d3b29f283b2ebc469bb108f8 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 18 Mar 2025 11:23:16 -0700 Subject: [PATCH 50/54] Address CR comments Signed-off-by: Anna Gringauze --- .../cudaq/Optimizer/Dialect/Quake/QuakeOps.td | 4 +- include/cudaq/Optimizer/Transforms/Passes.td | 4 +- .../Transforms/ReplaceStateWithKernel.cpp | 28 +-- .../cudaq/platform/py_alt_launch_kernel.cpp | 20 +- runtime/common/ArgumentConversion.cpp | 14 +- runtime/common/ArgumentConversion.h | 30 +-- runtime/common/BaseRemoteRESTQPU.h | 20 +- runtime/common/BaseRestRemoteClient.h | 24 +-- runtime/cudaq/algorithms/get_state.h | 8 +- runtime/cudaq/cudaq.cpp | 1 - runtime/cudaq/platform/qpu_state.cpp | 7 - runtime/cudaq/platform/qpu_state.h | 16 +- runtime/test/test_argument_conversion.cpp | 94 +++++---- targettests/execution/test_trotter.cpp | 183 ++++++++++++++++++ test/Quake/arg_subst-7.txt | 2 +- test/Quake/arg_subst_func.qke | 2 +- test/Quake/replace_state_with_kernel.qke | 6 +- 17 files changed, 328 insertions(+), 135 deletions(-) create mode 100644 targettests/execution/test_trotter.cpp diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index 1bc97abebc6..8046d5ca4ee 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -1659,7 +1659,7 @@ def QuakeOp_MaterializeStateOp : QuakeOp<"materialize_state", [Pure] > { pass. ```mlir - %0 = quake.materialize_state @num_qubits @init : !cc.ptr + %0 = quake.materialize_state @num_qubits, @init : !cc.ptr ``` }]; @@ -1669,7 +1669,7 @@ def QuakeOp_MaterializeStateOp : QuakeOp<"materialize_state", [Pure] > { ); let results = (outs PointerOf<[cc_StateType]>:$result); let assemblyFormat = [{ - $numQubitsFunc $initFunc `:` qualified(type(results)) attr-dict + $numQubitsFunc `,` $initFunc `:` qualified(type(results)) attr-dict }]; } diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 76bb242e840..63f4f3b3c0f 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -898,7 +898,7 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func ``` This optimization performs the replacements for the the following operations - that use a state produced by `quake.materialize_state @num_qubits @init` + that use a state produced by `quake.materialize_state @num_qubits, @init` operation: - Replace `quake.get_number_of_qubits` operation by call to `@num_qubits` @@ -910,7 +910,7 @@ def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::Func Before ReplaceStateWithKernel (replace-state-with-kernel): ``` func.func @foo() { - %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0: !cc.ptr + %0 = quake.materialize_state @callee.num_qubits_0, @callee.init_0: !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index a9cd1dd80e1..8ff63140b6f 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -33,7 +33,7 @@ namespace { /// that computes the number of qubits for a state. /// /// ```mlir -/// %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr +/// %0 = quake.materialize_state @callee.num_qubits_0, @callee.init_0 : !cc.ptr /// %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 /// ─────────────────────────────────────────── /// %1 = call @callee.num_qubits_0() : () -> i64 @@ -47,11 +47,14 @@ class ReplaceGetNumQubitsPattern LogicalResult matchAndRewrite(quake::GetNumberOfQubitsOp numQubits, PatternRewriter &rewriter) const override { - auto stateOp = numQubits.getOperand(); + auto stateOp = numQubits.getState(); auto materializeState = stateOp.getDefiningOp(); - if (!materializeState) - return numQubits->emitError( - "ReplaceStateWithKernel: failed to replace `quake.get_num_qubits`"); + if (!materializeState) { + LLVM_DEBUG(llvm::dbgs() << "ReplaceStateWithKernel: failed to replace " + "`quake.get_num_qubits`: " + << stateOp << '\n'); + return failure(); + } auto numQubitsFunc = materializeState.getNumQubitsFunc(); rewriter.setInsertionPoint(numQubits); @@ -66,7 +69,7 @@ class ReplaceGetNumQubitsPattern /// the state. /// /// ```mlir -/// %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr +/// %0 = quake.materialize_state @callee.num_qubits_0, @callee.init_0 : !cc.ptr /// %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq /// ─────────────────────────────────────────── /// %3 = call @callee.init_0(%2): (!quake.veq) -> !quake.veq @@ -79,16 +82,19 @@ class ReplaceInitStatePattern LogicalResult matchAndRewrite(quake::InitializeStateOp initState, PatternRewriter &rewriter) const override { - auto allocaOp = initState.getOperand(0); - auto stateOp = initState.getOperand(1); + auto allocaOp = initState.getTargets(); + auto stateOp = initState.getState(); if (auto ptrTy = dyn_cast(stateOp.getType())) { if (isa(ptrTy.getElementType())) { auto materializeState = stateOp.getDefiningOp(); - if (!materializeState) - return initState->emitError( - "ReplaceStateWithKernel: failed to replace `quake.init_state`"); + if (!materializeState) { + LLVM_DEBUG(llvm::dbgs() << "ReplaceStateWithKernel: failed to " + "replace `quake.init_state`: " + << stateOp << '\n'); + return failure(); + } auto initName = materializeState.getInitFunc(); rewriter.setInsertionPoint(initState); diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 00c2bbcc272..687886cdffb 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -550,18 +550,14 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; - for (auto &kInfo : argCon.getKernelSubstitutions()) { - { - std::string kernName = - cudaq::runtime::cudaqGenPrefixName + kInfo.getKernelName().str(); - kernels.emplace_back(kernName); - } - { - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << kInfo.getSubstitutionModule(); - substs.emplace_back(substBuff); - } + for (auto *kInfo : argCon.getKernelSubstitutions()) { + std::string kernName = + cudaq::runtime::cudaqGenPrefixName + kInfo->getKernelName().str(); + kernels.emplace_back(kernName); + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << kInfo->getSubstitutionModule(); + substs.emplace_back(substBuff); } // Collect references for the argument synthesis. diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 4c89a185194..8c1af552461 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -124,9 +124,8 @@ static void createInitFunc(OpBuilder &builder, ModuleOp moduleOp, builder.setInsertionPointToEnd(moduleOp.getBody()); auto ctx = builder.getContext(); - auto loc = builder.getUnknownLoc(); - auto initFunc = cast(builder.clone(*calleeFunc)); + auto loc = initFunc.getLoc(); auto argTypes = calleeFunc.getArgumentTypes(); auto retTy = quake::VeqType::getUnsized(ctx); @@ -249,9 +248,8 @@ static void createNumQubitsFunc(OpBuilder &builder, ModuleOp moduleOp, builder.setInsertionPointToEnd(moduleOp.getBody()); auto ctx = builder.getContext(); - auto loc = builder.getUnknownLoc(); - auto numQubitsFunc = cast(builder.clone(*calleeFunc)); + auto loc = numQubitsFunc.getLoc(); auto argTypes = calleeFunc.getArgumentTypes(); auto retType = builder.getI64Type(); @@ -515,11 +513,11 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, createNumQubitsFunc(builder, substMod, calleeFunc, numQubitsKernelName); // Convert arguments for `callee.init_N`. - auto ®isteredInitName = converter.registerKernel(initName); + auto registeredInitName = converter.registerKernel(initName); converter.gen(registeredInitName, substMod, calleeArgs); // Convert arguments for `callee.num_qubits_N`. - auto ®isteredNumQubitsName = converter.registerKernel(numQubitsName); + auto registeredNumQubitsName = converter.registerKernel(numQubitsName); converter.gen(registeredNumQubitsName, substMod, calleeArgs); } @@ -719,7 +717,7 @@ void cudaq::opt::ArgumentConverter::gen(StringRef kernelName, OpBuilder builder(ctx); ModuleOp substModule = builder.create(builder.getUnknownLoc()); - auto &kernelInfo = addKernelInfo(kernelName, substModule); + auto *kernelInfo = addKernelInfo(kernelName, substModule); // We should look up the input type signature here. auto fun = sourceModule.lookupSymbol( @@ -813,7 +811,7 @@ void cudaq::opt::ArgumentConverter::gen(StringRef kernelName, }) .Default({}); if (subst) - kernelInfo.getSubstitutions().emplace_back(std::move(subst)); + kernelInfo->getSubstitutions().emplace_back(std::move(subst)); } } diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 9252ee1b8a2..6d4d23958fc 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -53,6 +53,12 @@ class ArgumentConverter { /// kernelName in \p sourceModule. ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule); + ~ArgumentConverter() { + for (auto *kInfo : kernelSubstitutions) { + delete kInfo; + } + } + /// Generate a substitution ModuleOp for the vector of arguments presented. /// The arguments are those presented to the kernel, kernelName. void gen(const std::vector &arguments); @@ -72,32 +78,32 @@ class ArgumentConverter { void gen_drop_front(const std::vector &arguments, unsigned numDrop); /// Get the kernel info that were collected by `gen()`. - std::list &getKernelSubstitutions() { + mlir::SmallVector &getKernelSubstitutions() { return kernelSubstitutions; } - bool isRegisteredKernel(const std::string &kernelName) { - return std::find(nameRegistry.begin(), nameRegistry.end(), kernelName) != - nameRegistry.end(); + bool isRegisteredKernel(mlir::StringRef kernelName) { + return std::find(nameRegistry.begin(), nameRegistry.end(), + kernelName.str()) != nameRegistry.end(); } - std::string ®isterKernel(const std::string &kernelName) { - return nameRegistry.emplace_back(kernelName); + mlir::StringRef registerKernel(mlir::StringRef kernelName) { + return nameRegistry.emplace_back( + mlir::StringAttr::get(sourceModule.getContext(), kernelName)); } private: - KernelSubstitutionInfo &addKernelInfo(mlir::StringRef kernelName, + KernelSubstitutionInfo *addKernelInfo(mlir::StringRef kernelName, mlir::ModuleOp substModule) { - return kernelSubstitutions.emplace_back(kernelName, substModule); + return kernelSubstitutions.emplace_back( + new KernelSubstitutionInfo(kernelName, substModule)); } /// Memory to store new kernel names generated during argument conversion. - /// Use list here to keep references to those elements valid. - std::list nameRegistry; + mlir::SmallVector nameRegistry; /// Memory to store new kernel info generated during argument conversion. - /// Use list here to keep elements sorted in order of creation. - std::list kernelSubstitutions; + mlir::SmallVector kernelSubstitutions; /// Original module before substitutions. mlir::ModuleOp sourceModule; diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 56449854431..60070249615 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -464,18 +464,14 @@ class BaseRemoteRESTQPU : public cudaq::QPU { // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; - for (auto &kInfo : argCon.getKernelSubstitutions()) { - { - std::string kernName = cudaq::runtime::cudaqGenPrefixName + - kInfo.getKernelName().str(); - kernels.emplace_back(kernName); - } - { - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << kInfo.getSubstitutionModule(); - substs.emplace_back(substBuff); - } + for (auto *kInfo : argCon.getKernelSubstitutions()) { + std::string kernName = + cudaq::runtime::cudaqGenPrefixName + kInfo->getKernelName().str(); + kernels.emplace_back(kernName); + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << kInfo->getSubstitutionModule(); + substs.emplace_back(substBuff); } // Collect references for the argument synthesis. diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index fdb34719420..445463b0c63 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -190,18 +190,14 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { // We pass string references to the `createArgumentSynthesisPass`. mlir::SmallVector kernels; mlir::SmallVector substs; - for (auto &kInfo : argCon.getKernelSubstitutions()) { - { - std::string kernName = cudaq::runtime::cudaqGenPrefixName + - kInfo.getKernelName().str(); - kernels.emplace_back(kernName); - } - { - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << kInfo.getSubstitutionModule(); - substs.emplace_back(substBuff); - } + for (auto *kInfo : argCon.getKernelSubstitutions()) { + std::string kernName = cudaq::runtime::cudaqGenPrefixName + + kInfo->getKernelName().str(); + kernels.emplace_back(kernName); + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << kInfo->getSubstitutionModule(); + substs.emplace_back(substBuff); } // Collect references for the argument synthesis. @@ -349,6 +345,10 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (!castedState1 || !castedState2) throw std::runtime_error( "Invalid execution context: input states are not compatible"); + if (!castedState1->getKernelInfo().has_value()) + throw std::runtime_error("Missing first input state in state-overlap"); + if (!castedState2->getKernelInfo().has_value()) + throw std::runtime_error("Missing second input state in state-overlap"); auto [kernelName1, args1] = castedState1->getKernelInfo().value(); auto [kernelName2, args2] = castedState2->getKernelInfo().value(); cudaq::IRPayLoad stateIrPayload1, stateIrPayload2; diff --git a/runtime/cudaq/algorithms/get_state.h b/runtime/cudaq/algorithms/get_state.h index 79202f98b48..093ae36dcff 100644 --- a/runtime/cudaq/algorithms/get_state.h +++ b/runtime/cudaq/algorithms/get_state.h @@ -119,8 +119,7 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { return state(new RemoteSimulationState(std::forward(kernel), std::forward(args)...)); } -#else -#if defined(CUDAQ_QUANTUM_DEVICE) && !defined(CUDAQ_LIBRARY_MODE) +#elif defined(CUDAQ_QUANTUM_DEVICE) && !defined(CUDAQ_LIBRARY_MODE) // Store kernel name and arguments for quantum states. if (!cudaq::get_quake_by_name(cudaq::getKernelName(kernel), false).empty()) return state(new QPUState(std::forward(kernel), @@ -128,8 +127,7 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { throw std::runtime_error( "cudaq::state* argument synthesis is not supported for quantum hardware" " for c-like functions, use class kernels instead"); -#else -#if defined(CUDAQ_QUANTUM_DEVICE) +#elif defined(CUDAQ_QUANTUM_DEVICE) // Kernel builder is MLIR-based kernel. if constexpr (has_name::value) return state(new QPUState(std::forward(kernel), @@ -138,8 +136,6 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { throw std::runtime_error( "cudaq::state* argument synthesis is not supported for quantum hardware" " for c-like functions in library mode"); -#endif -#endif #endif return details::extractState([&]() mutable { cudaq::invokeKernel(std::forward(kernel), diff --git a/runtime/cudaq/cudaq.cpp b/runtime/cudaq/cudaq.cpp index 5dbdf4ee8cf..071f658f43f 100644 --- a/runtime/cudaq/cudaq.cpp +++ b/runtime/cudaq/cudaq.cpp @@ -19,7 +19,6 @@ #include "distributed/mpi_plugin.h" #include #include -#include #include #include #include diff --git a/runtime/cudaq/platform/qpu_state.cpp b/runtime/cudaq/platform/qpu_state.cpp index 0561ca29ddb..24ce4c412c9 100644 --- a/runtime/cudaq/platform/qpu_state.cpp +++ b/runtime/cudaq/platform/qpu_state.cpp @@ -7,17 +7,10 @@ ******************************************************************************/ #include "qpu_state.h" -#include "common/Logger.h" namespace cudaq { QPUState::~QPUState() { - if (!platformExecutionLog.empty()) { - // Flush any info log from the remote execution - printf("%s\n", platformExecutionLog.c_str()); - platformExecutionLog.clear(); - } - for (std::size_t counter = 0; auto &ptr : args) deleters[counter++](ptr); diff --git a/runtime/cudaq/platform/qpu_state.h b/runtime/cudaq/platform/qpu_state.h index a13ac6f7b40..a04120b3728 100644 --- a/runtime/cudaq/platform/qpu_state.h +++ b/runtime/cudaq/platform/qpu_state.h @@ -13,21 +13,13 @@ #include "cudaq/utils/cudaq_utils.h" namespace cudaq { -/// Implementation of `SimulationState` for quantum device backends. -// The state is represented by a quantum kernel. -// Quantum state contains all the information we need to replicate a -// call to kernel that created the state. +/// @brief Implementation of `SimulationState` for quantum device backends. +/// The state is represented by a quantum kernel. +/// Quantum state contains all the information we need to replicate a +/// call to kernel that created the state. class QPUState : public cudaq::SimulationState { protected: std::string kernelName; - // Lazily-evaluated state data (just keeping the kernel name and arguments). - // e.g., to be evaluated at amplitude accessor APIs (const APIs, hence needs - // to be mutable) or overlap calculation with another remote state (combining - // the IR of both states for remote evaluation) - mutable std::unique_ptr state; - // Cache log messages from the remote execution. - // Mutable to support lazy execution during `const` API calls. - mutable std::string platformExecutionLog; using ArgDeleter = std::function; /// @brief Vector of arguments // Note: we create a copy of all arguments except pointers. diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index e66a9d37a8f..fb913384c7d 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -143,13 +143,12 @@ class FakeDeviceState : public cudaq::SimulationState { extern "C" void __cudaq_deviceCodeHolderAdd(const char *, const char *); void dumpSubstitutionModules(cudaq::opt::ArgumentConverter &con) { - for (auto &kInfo : con.getKernelSubstitutions()) { - // Dump the conversions + // Dump the conversions + for (auto *kInfo : con.getKernelSubstitutions()) llvm::outs() << "========================================\n" "Substitution module:\n" - << kInfo.getKernelName() << "\n" - << kInfo.getSubstitutionModule() << '\n'; - } + << kInfo->getKernelName() << "\n" + << kInfo->getSubstitutionModule() << '\n'; } void doSimpleTest(mlir::MLIRContext *ctx, const std::string &typeName, @@ -544,7 +543,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init.init_[[HASH_0]] : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init.num_qubits_[[HASH_0:.*]], @__nvqpp__mlirgen__init.init_[[HASH_0]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__init.init_[[HASH_0]](%arg0: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 @@ -608,7 +607,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init0.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init0.init_[[HASH_0]] : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init0.num_qubits_[[HASH_0:.*]], @__nvqpp__mlirgen__init0.init_[[HASH_0]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__init0.init_[[HASH_0]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 @@ -657,15 +656,14 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // // s0 = cudaq.get_state(init1, 2) // s1 = cudaq.get_state(state_param, s0) - // cudaq.sample(kernel, s1) + // s2 = cudaq.get_state(state_param, s1) + // s3 = cudaq.get_state(state_param, s2) + // cudaq.sample(kernel, s3) auto init = "init1"; - auto initCode = - "func.func private @__nvqpp__mlirgen__init1(%arg0: i64) {\n" - " %0 = quake.alloca !quake.veq[%arg0 : i64]\n" - " %1 = quake.extract_ref %0[0] : (!quake.veq) -> !quake.ref\n" - " quake.x %1 : (!quake.ref) -> ()\n" - " return\n" - "}\n"; + auto initCode = "func.func private @__nvqpp__mlirgen__init1(%arg0: i64) {\n" + " %0 = quake.alloca !quake.veq[%arg0 : i64]\n" + " return\n" + "}\n"; __cudaq_deviceCodeHolderAdd(init, initCode); auto stateParam = "state_param"; @@ -690,9 +688,11 @@ void test_quantum_state(mlir::MLIRContext *ctx) { std::vector v0 = {static_cast(&s0)}; auto s1 = cudaq::state(new FakeDeviceState(stateParam, v0)); std::vector v1 = {static_cast(&s1)}; + auto s2 = cudaq::state(new FakeDeviceState(stateParam, v1)); + std::vector v2 = {static_cast(&s2)}; auto code = std::string{initCode} + std::string{stateParamCode}; - doSimpleTest(ctx, "!cc.ptr", v1, code); + doSimpleTest(ctx, "!cc.ptr", v2, code); } // clang-format off @@ -703,7 +703,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__state_param.init_[[HASH_0]] : !cc.ptr +// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_0:.*]], @__nvqpp__mlirgen__state_param.init_[[HASH_0]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__state_param.init_[[HASH_0]](%arg0: !cc.ptr, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 @@ -728,45 +728,73 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: state_param.init_[[HASH_0]] // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1:.*]] @__nvqpp__mlirgen__init1.init_[[HASH_1]] : !cc.ptr +// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_1:.*]], @__nvqpp__mlirgen__state_param.init_[[HASH_1]] : !cc.ptr // CHECK: } -// CHECK: func.func private @__nvqpp__mlirgen__init1.init_[[HASH_1]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { +// CHECK: func.func private @__nvqpp__mlirgen__state_param.init_[[HASH_1]](%arg0: !cc.ptr, %arg1: !quake.veq) -> !quake.veq { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 +// CHECK: %[[VAL_3:.*]] = arith.subi %[[VAL_2]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_4:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_3]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: %[[VAL_5:.*]] = arith.addi %[[VAL_0]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_6:.*]] = arith.addi %[[VAL_0]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_7:.*]] = quake.init_state %[[VAL_4]], %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq +// CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_6]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_9:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_8]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: return %[[VAL_9]] : !quake.veq +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_1]](%arg0: !cc.ptr) -> i64 { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = quake.get_number_of_qubits %arg0 : (!cc.ptr) -> i64 +// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i64 +// CHECK: return %[[VAL_2]] : i64 +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: +// CHECK: state_param.init_[[HASH_1]] +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_2:.*]], @__nvqpp__mlirgen__init1.init_[[HASH_2]] : !cc.ptr +// CHECK: } +// CHECK: func.func private @__nvqpp__mlirgen__init1.init_[[HASH_2]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 // CHECK: %[[VAL_2:.*]] = arith.subi %arg0, %[[VAL_1]] : i64 -// CHECK: %[[VAL_3:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_2]] : (!quake.veq, i64, i64) -> !quake.veq -// CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: %[[VAL_4:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_2]] : (!quake.veq, i64, i64) -> !quake.veq // CHECK: %[[VAL_5:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 -// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_3]][0] : (!quake.veq) -> !quake.ref -// CHECK: quake.x %[[VAL_6]] : (!quake.ref) -> () -// CHECK: %[[VAL_7:.*]] = arith.subi %[[VAL_5]], %[[VAL_1]] : i64 -// CHECK: %[[VAL_8:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_7]] : (!quake.veq, i64, i64) -> !quake.veq -// CHECK: return %[[VAL_8]] : !quake.veq +// CHECK: %[[VAL_6:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 +// CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_6]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_9:.*]] = quake.subveq %arg1, %[[VAL_0]], %[[VAL_8]] : (!quake.veq, i64, i64) -> !quake.veq +// CHECK: return %[[VAL_9]] : !quake.veq // CHECK: } -// CHECK: func.func private @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1]](%arg0: i64) -> i64 { +// CHECK: func.func private @__nvqpp__mlirgen__init1.num_qubits_[[HASH_2]](%arg0: i64) -> i64 { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 // CHECK: %[[VAL_1:.*]] = arith.addi %[[VAL_0]], %arg0 : i64 // CHECK: return %[[VAL_1]] : i64 // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: init1.init_[[HASH_1]] +// CHECK: init1.init_[[HASH_2]] // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: -// CHECK: init1.num_qubits_[[HASH_1]] +// CHECK: init1.num_qubits_[[HASH_2]] // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = arith.constant 2 : i64 // CHECK: } // CHECK: ======================================== // CHECK: Substitution module: +// CHECK: state_param.num_qubits_[[HASH_1]] +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_2]], @__nvqpp__mlirgen__init1.init_[[HASH_2]] : !cc.ptr +// CHECK: } +// CHECK: ======================================== +// CHECK: Substitution module: // CHECK: state_param.num_qubits_[[HASH_0]] // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init1.num_qubits_[[HASH_1]] @__nvqpp__mlirgen__init1.init_[[HASH_1]] : !cc.ptr +// CHECK: %0 = quake.materialize_state @__nvqpp__mlirgen__state_param.num_qubits_[[HASH_1]], @__nvqpp__mlirgen__state_param.init_[[HASH_1]] : !cc.ptr // CHECK: } - // clang-format on { @@ -821,7 +849,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init2.num_qubits_[[HASH_1:.*]] @__nvqpp__mlirgen__init2.init_[[HASH_1]] : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init2.num_qubits_[[HASH_1:.*]], @__nvqpp__mlirgen__init2.init_[[HASH_1]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__init2.init_[[HASH_1]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 @@ -901,7 +929,7 @@ void test_quantum_state(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK: testy // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init3.num_qubits_[[HASH_0:.*]] @__nvqpp__mlirgen__init3.init_[[HASH_0]] : !cc.ptr +// CHECK: %[[VAL_0:.*]] = quake.materialize_state @__nvqpp__mlirgen__init3.num_qubits_[[HASH_0:.*]], @__nvqpp__mlirgen__init3.init_[[HASH_0]] : !cc.ptr // CHECK: } // CHECK: func.func private @__nvqpp__mlirgen__init3.init_[[HASH_0]](%arg0: i64, %arg1: !quake.veq) -> !quake.veq { // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 diff --git a/targettests/execution/test_trotter.cpp b/targettests/execution/test_trotter.cpp new file mode 100644 index 00000000000..4dd06bb3040 --- /dev/null +++ b/targettests/execution/test_trotter.cpp @@ -0,0 +1,183 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// TODO-FIX-KERNEL-EXEC +// Simulators +// RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s + +// Quantum emulators +// RUN: if %braket_avail; then nvq++ %cpp_std -target braket -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s ; fi +// RUN: nvq++ %cpp_std -target quantinuum -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std -target ionq -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std -target oqc -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s + +// 2 different IQM machines for 2 different topologies +// RUN: nvq++ %cpp_std -target iqm --iqm-machine Adonis -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std -target iqm --iqm-machine Apollo -emulate %s -fkernel-exec-kind=2 -o %t && %t | FileCheck %s +// clang-format on + +#include +#include +#include +#include + +// Compute magnetization using Suzuki-Trotter approximation. +// This example demonstrates usage of quantum states in kernel mode. +// +// Details +// https://pubs.aip.org/aip/jmp/article-abstract/32/2/400/229229/General-theory-of-fractal-path-integrals-with +// +// Hamiltonian used +// https://en.m.wikipedia.org/wiki/Quantum_Heisenberg_model + +// If you have a NVIDIA GPU you can use this example to see +// that the GPU-accelerated backends can easily handle a +// larger number of qubits compared the CPU-only backend. +// +// Depending on the available memory on your GPU, you can +// set the number of qubits to around 30 qubits, and run +// the execution command with `-target nvidia` option. +// +// Note: Without setting the target to the `nvidia` backend, +// there will be a noticeable decrease in simulation performance. +// This is because the CPU-only backend has difficulty handling +// 30+ qubit simulations. + +int SPINS = 5; // set to around 25 qubits for `nvidia` target +int STEPS = 4; // set to around 100 for `nvidia` target + +// Compile and run with: +// clang-format off +// ``` +// nvq++ --enable-mlir -v trotter_kernel_mode.cpp -o trotter.x --target nvidia && ./trotter.x +// ``` +// clang-format off + +// Alternating up/down spins +struct initState { + void operator()(int num_spins) __qpu__ { + cudaq::qvector q(num_spins); + for (int qId = 0; qId < num_spins; qId += 2) + x(q[qId]); + } +}; + +std::vector term_coefficients(cudaq::spin_op op) { + std::vector result{}; + op.for_each_term([&](cudaq::spin_op &term) { + const auto coeff = term.get_coefficient().real(); + result.push_back(coeff); + }); + return result; +} + +std::vector term_words(cudaq::spin_op op) { + std::vector result{}; + op.for_each_term( + [&](cudaq::spin_op &term) { result.push_back(term.to_string(false)); }); + return result; +} + +struct trotter { + // Note: This performs a single-step Trotter on top of an initial state, e.g., + // result state of the previous Trotter step. + void operator()(cudaq::state *initial_state, + std::vector &coefficients, + std::vector &words, double dt) __qpu__ { + cudaq::qvector q(initial_state); + for (std::size_t i = 0; i < coefficients.size(); ++i) { + cudaq::exp_pauli(coefficients[i] * dt, q, words[i]); + } + } +}; + +int run_steps(int steps, int spins) { + const double g = 1.0; + const double Jx = 1.0; + const double Jy = 1.0; + const double Jz = g; + const double dt = 0.05; + const int n_steps = steps; + const int n_spins = spins; + const double omega = 2 * M_PI; + const auto heisenbergModelHam = [&](double t) -> cudaq::spin_op { + cudaq::spin_op tdOp(n_spins); + for (int i = 0; i < n_spins - 1; ++i) { + tdOp += (Jx * cudaq::spin::x(i) * cudaq::spin::x(i + 1)); + tdOp += (Jy * cudaq::spin::y(i) * cudaq::spin::y(i + 1)); + tdOp += (Jz * cudaq::spin::z(i) * cudaq::spin::z(i + 1)); + } + for (int i = 0; i < n_spins; ++i) + tdOp += (std::cos(omega * t) * cudaq::spin::x(i)); + return tdOp; + }; + // Observe the average magnetization of all spins () + cudaq::spin_op average_magnetization(n_spins); + for (int i = 0; i < n_spins; ++i) + average_magnetization += ((1.0 / n_spins) * cudaq::spin::z(i)); + average_magnetization -= 1.0; + + // Run loop + auto state = cudaq::get_state(initState{}, n_spins); + std::vector expResults; + std::vector runtimeMs; + for (int i = 0; i < n_steps; ++i) { + const auto start = std::chrono::high_resolution_clock::now(); + auto ham = heisenbergModelHam(i * dt); + auto coefficients = term_coefficients(ham); + auto words = term_words(ham); + auto magnetization_exp_val = cudaq::observe( + trotter{}, average_magnetization, &state, coefficients, words, dt); + auto result = magnetization_exp_val.expectation(); + expResults.emplace_back(result); + state = cudaq::get_state(trotter{}, &state, coefficients, words, dt); + const auto stop = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(stop - start); + auto timeInSeconds = duration.count() / 1000.0 / 1000.0; + runtimeMs.emplace_back(timeInSeconds); + std::cout << "Step " << i << ": time [s]: " << timeInSeconds + << ", result: " << result << std::endl; + } + std::cout << std::endl; + + // Print runtimes and results (useful for plotting). + std::cout << "Step times [s]: ["; + for (const auto &x : runtimeMs) + std::cout << x << ", "; + std::cout << "]" << std::endl; + + std::cout << "Results: ["; + for (const auto &x : expResults) + std::cout << x << ", "; + std::cout << "]" << std::endl; + + std::cout << std::endl; + return 0; +} + +int main() { + const auto start = std::chrono::high_resolution_clock::now(); + run_steps(STEPS, SPINS); + const auto stop = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(stop - start); + std::cout << "Total running time: " << duration.count() / 1000.0 / 1000.0 + << "s" << std::endl; +} + +// CHECK: Step 0: time [s]: [[t0:.*]], result: [[v0:.*]] +// CHECK: Step 1: time [s]: [[t1:.*]], result: [[v1:.*]] +// CHECK: Step 2: time [s]: [[t2:.*]], result: [[v2:.*]] +// CHECK: Step 3: time [s]: [[t3:.*]], result: [[v3:.*]] + +// CHECK: Step times [s]: [[ts:.*]] +// CHECK: Results: [[rs:.*]] + +// CHECK: Total running time: [[tts:.*]]s diff --git a/test/Quake/arg_subst-7.txt b/test/Quake/arg_subst-7.txt index 58ca8a163e6..e5ec93f57f8 100644 --- a/test/Quake/arg_subst-7.txt +++ b/test/Quake/arg_subst-7.txt @@ -8,7 +8,7 @@ module { cc.arg_subst[0] { - %0 = quake.materialize_state @num_qubits @init : !cc.ptr + %0 = quake.materialize_state @num_qubits, @init : !cc.ptr } func.func @init(%arg0: i64, %arg1: !quake.veq) -> !quake.veq { return %arg1 : !quake.veq diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index 5310404c3cd..2125ca99710 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -163,7 +163,7 @@ func.func @testy6(%arg0: !cc.ptr) { // CHECK: return %[[VAL_0]] : i32 // CHECK: } // CHECK-LABEL: func.func @testy6() { -// CHECK: %[[VAL_2:.*]] = quake.materialize_state @num_qubits @init : !cc.ptr +// CHECK: %[[VAL_2:.*]] = quake.materialize_state @num_qubits, @init : !cc.ptr // CHECK: %[[VAL_3:.*]] = quake.get_number_of_qubits %[[VAL_2]] : (!cc.ptr) -> i64 // CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] // CHECK: %[[VAL_5:.*]] = quake.init_state %[[VAL_4]], %[[VAL_2]] : (!quake.veq, !cc.ptr) -> !quake.veq diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 38b1c81d36d..40ca88badd9 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -23,7 +23,7 @@ module { } func.func @caller0() { - %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr + %0 = quake.materialize_state @callee.num_qubits_0, @callee.init_0 : !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq @@ -38,7 +38,7 @@ module { // CHECK: } func.func @caller1(%arg0: i64) { - %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr + %0 = quake.materialize_state @callee.num_qubits_0, @callee.init_0 : !cc.ptr %2 = quake.alloca !quake.veq[%arg0 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq return @@ -51,7 +51,7 @@ module { // CHECK: } func.func @caller2() -> i64 { - %0 = quake.materialize_state @callee.num_qubits_0 @callee.init_0 : !cc.ptr + %0 = quake.materialize_state @callee.num_qubits_0, @callee.init_0 : !cc.ptr %1 = quake.get_number_of_qubits %0 : (!cc.ptr) -> i64 return %1: i64 } From 4a12db08bc5514e4c3ba317cb4e94d3f523094e5 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 18 Mar 2025 11:37:49 -0700 Subject: [PATCH 51/54] Address more CR comments and add a test Signed-off-by: Anna Gringauze --- runtime/common/ArgumentConversion.h | 1 - targettests/execution/test_trotter.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 6d4d23958fc..d38de7399cf 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -13,7 +13,6 @@ #include "cudaq/qis/state.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Types.h" -#include #include #include diff --git a/targettests/execution/test_trotter.cpp b/targettests/execution/test_trotter.cpp index 4dd06bb3040..341594ecefb 100644 --- a/targettests/execution/test_trotter.cpp +++ b/targettests/execution/test_trotter.cpp @@ -12,7 +12,6 @@ // RUN: nvq++ %cpp_std %s -o %t && %t | FileCheck %s // Quantum emulators -// RUN: if %braket_avail; then nvq++ %cpp_std -target braket -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s ; fi // RUN: nvq++ %cpp_std -target quantinuum -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std -target ionq -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std -target oqc -emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s From cc1faea5fd87773d53ee13b5c144ee667fcb7a79 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 18 Mar 2025 11:59:53 -0700 Subject: [PATCH 52/54] Address more CR comments Signed-off-by: Anna Gringauze --- runtime/test/test_argument_conversion.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index fb913384c7d..2e3f43720df 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -18,7 +18,6 @@ #include "cudaq/qis/pauli_word.h" #include "cudaq/qis/state.h" #include "mlir/Parser/Parser.h" -#include #include #include @@ -109,8 +108,13 @@ class FakeDeviceState : public cudaq::SimulationState { operator()(std::size_t tensorIdx, const std::vector &indices) override { if (hasData()) { - assert(tensorIdx == 0); - assert(indices.size() == 1); + if (tensorIdx != 0) + throw std::runtime_error("Non-zero tensor index is not supported"); + + if (indices.size() != 1) + throw std::runtime_error( + "Multi-dimensional tensor index is not supported"); + return *(static_cast *>(data) + indices[0]); } throw std::runtime_error("Not implemented"); From 175a70d38c9fbcc5176f29c6c5945e65edd369a8 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Wed, 19 Mar 2025 21:28:45 -0700 Subject: [PATCH 53/54] Fix links Signed-off-by: Anna Gringauze --- docs/sphinx/using/backends/hardware/neutralatom.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/using/backends/hardware/neutralatom.rst b/docs/sphinx/using/backends/hardware/neutralatom.rst index d54031c5fdc..918bc42ff5e 100644 --- a/docs/sphinx/using/backends/hardware/neutralatom.rst +++ b/docs/sphinx/using/backends/hardware/neutralatom.rst @@ -11,8 +11,8 @@ accessed via `Superstaq `__, a cross-platform that performs low-level compilation and cross-layer optimization. To get started users can create a Superstaq account by following `these instructions `__. -For access to Infleqtion's neutral atom quantum computer, Sqale, -`pre-registration `__ is now open. +For access to Infleqtion's neutral atom quantum computer, Sqale, see details about +`pre-registration `__. Setting Credentials ````````````````````````` From e1449de8bb7cc1f3477fda04f4c4ae0f861dfe62 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 20 Mar 2025 09:54:49 -0700 Subject: [PATCH 54/54] Fix links Signed-off-by: Anna Gringauze --- docs/sphinx/using/backends/hardware/neutralatom.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/sphinx/using/backends/hardware/neutralatom.rst b/docs/sphinx/using/backends/hardware/neutralatom.rst index 918bc42ff5e..313a32224fc 100644 --- a/docs/sphinx/using/backends/hardware/neutralatom.rst +++ b/docs/sphinx/using/backends/hardware/neutralatom.rst @@ -11,8 +11,6 @@ accessed via `Superstaq `__, a cross-platform that performs low-level compilation and cross-layer optimization. To get started users can create a Superstaq account by following `these instructions `__. -For access to Infleqtion's neutral atom quantum computer, Sqale, see details about -`pre-registration `__. Setting Credentials `````````````````````````