From 284ff7335608f87c4165862ced39277b6577c3a4 Mon Sep 17 00:00:00 2001 From: Luca Mondada Date: Tue, 10 Mar 2026 23:33:08 +0000 Subject: [PATCH 1/2] Create executeKernel function in PythonLauncher Signed-off-by: Luca Mondada --- runtime/cudaq/platform/default/python/QPU.cpp | 136 ++++++++++-------- 1 file changed, 77 insertions(+), 59 deletions(-) diff --git a/runtime/cudaq/platform/default/python/QPU.cpp b/runtime/cudaq/platform/default/python/QPU.cpp index 7daccf784ef..23d11910d82 100644 --- a/runtime/cudaq/platform/default/python/QPU.cpp +++ b/runtime/cudaq/platform/default/python/QPU.cpp @@ -180,16 +180,65 @@ static void updateExecutionContext(ModuleOp module) { } } -static std::optional alreadyBuiltJITCode() { +static std::optional +alreadyBuiltJITCode(const std::string &name, + const std::vector &rawArgs) { auto *currentExecCtx = cudaq::getExecutionContext(); if (!currentExecCtx || !currentExecCtx->allowJitEngineCaching) return std::nullopt; - if (currentExecCtx->jitEng) + + auto jit = currentExecCtx->jitEng; + if (jit && cudaq::compiler_artifact::isPersistingJITEngine()) { CUDAQ_INFO("Loading previously compiled JIT engine for {}. This will " "re-run the previous job, discarding any changes to the kernel, " "arguments or launch configuration.", currentExecCtx->kernelName); - return currentExecCtx->jitEng; + + // Ensure the arguments are the same as the previous launch. + auto argsCreatorThunk = [&jit, &name]() { + return (void *)jit->lookupRawNameOrFail(name + ".argsCreator"); + }; + cudaq::compiler_artifact::checkArtifactReuse(name, rawArgs, jit.value(), + argsCreatorThunk); + } + + return jit; +} + +static cudaq::KernelThunkResultType +executeKernel(cudaq::JitEngine jit, const std::string &name, + const std::vector &rawArgs, bool hasResult, + bool hasVariationalArgs) { + cudaq::KernelThunkResultType result{nullptr, 0}; + void *buff = nullptr; + if (hasResult) { + buff = const_cast(rawArgs.back()); + } else if (hasVariationalArgs) { + auto argsCreatorFn = reinterpret_cast( + jit.lookupRawNameOrFail(name + ".argsCreator")); + argsCreatorFn(static_cast(rawArgs.data()), &buff); + } + + if (buff) { + // Proceed to call the .thunk function so that the result value will be + // properly marshaled into the buffer we allocated in + // appendTheResultBuffer(). + // FIXME: Python ought to set up the call stack so that a legit C++ entry + // point can be called instead of winging it and duplicating what the core + // compiler already does. + auto funcPtr = jit.lookupRawNameOrFail(name + ".thunk"); + result = reinterpret_cast( + funcPtr)(buff, /*client_server=*/false); + } else { + jit.run(name); + } + + if (hasVariationalArgs) { + std::free(buff); + return {nullptr, 0}; + } + + return result; } /// In a sample launch context, the (`JIT` compiled) execution engine may be @@ -218,7 +267,6 @@ struct PythonLauncher : public cudaq::ModuleLauncher { cudaq::getEnvBool("CUDAQ_PYTHON_CODEGEN_DUMP", false); std::string fullName = cudaq::runtime::cudaqGenPrefixName + name; - cudaq::KernelThunkResultType result{nullptr, 0}; auto funcOp = module.lookupSymbol(fullName); if (!funcOp) @@ -249,70 +297,40 @@ struct PythonLauncher : public cudaq::ModuleLauncher { varArgIndices.clear(); } const bool hasVariationalArgs = !varArgIndices.empty(); + const bool hasResult = !!resultTy; - auto jit = alreadyBuiltJITCode(); - if (!jit) { - // 1. Check that this call is sane. - if (enablePythonCodegenDump) - module.dump(); + if (auto jit = alreadyBuiltJITCode(name, rawArgs)) { + return executeKernel(*jit, name, rawArgs, hasResult, hasVariationalArgs); + } - // 2. Merge other modules (e.g., if there are device kernel calls). - cudaq::detail::mergeAllCallableClosures(module, name, rawArgs); + // 1. Check that this call is sane. + if (enablePythonCodegenDump) + module.dump(); - // Mark all newly merged kernels private. - for (auto &op : module) - if (auto f = dyn_cast(op)) - if (f != funcOp) - f.setPrivate(); + // 2. Merge other modules (e.g., if there are device kernel calls). + cudaq::detail::mergeAllCallableClosures(module, name, rawArgs); - updateExecutionContext(module); + // Mark all newly merged kernels private. + for (auto &op : module) + if (auto f = dyn_cast(op)) + if (f != funcOp) + f.setPrivate(); - // 3. LLVM JIT the code so we can execute it. - CUDAQ_INFO("Run Argument Synth.\n"); - if (enablePythonCodegenDump) - module.dump(); - specializeKernel(name, module, rawArgs, resultTy, enablePythonCodegenDump, - /*isEntryPoint=*/true, varArgIndices); + updateExecutionContext(module); - // 4. Execute the code right here, right now. - jit = cudaq::createQIRJITEngine(module, "qir:"); - } + // 3. LLVM JIT the code so we can execute it. + CUDAQ_INFO("Run Argument Synth.\n"); + if (enablePythonCodegenDump) + module.dump(); + specializeKernel(name, module, rawArgs, resultTy, enablePythonCodegenDump, + /*isEntryPoint=*/true, varArgIndices); - if (cudaq::compiler_artifact::isPersistingJITEngine()) { - auto argsCreatorThunk = [&jit, &name]() { - return (void *)jit->lookupRawNameOrFail(name + ".argsCreator"); - }; - cudaq::compiler_artifact::checkArtifactReuse(name, rawArgs, jit.value(), - argsCreatorThunk); - } + auto jit = cudaq::createQIRJITEngine(module, "qir:"); + cacheJITForPerformance(jit); - if (resultTy) { - // Proceed to call the .thunk function so that the result value will be - // properly marshaled into the buffer we allocated in - // appendTheResultBuffer(). - // FIXME: Python ought to set up the call stack so that a legit C++ entry - // point can be called instead of winging it and duplicating what the core - // compiler already does. - auto funcPtr = jit->lookupRawNameOrFail(name + ".thunk"); - void *buff = const_cast(rawArgs.back()); - result = reinterpret_cast( - *funcPtr)(buff, /*client_server=*/false); - } else if (hasVariationalArgs) { - auto argsCreatorFn = reinterpret_cast( - *jit->lookupRawNameOrFail(name + ".argsCreator")); - void *argsBuffer = nullptr; - argsCreatorFn(static_cast(rawArgs.data()), &argsBuffer); - auto thunkFn = - reinterpret_cast( - *jit->lookupRawNameOrFail(name + ".thunk")); - thunkFn(argsBuffer, /*client_server=*/false); - std::free(argsBuffer); - } else { - jit->run(name); - } - cacheJITForPerformance(jit.value()); // FIXME: actually handle results - return result; + // 4. Execute the code right here, right now. + return executeKernel(jit, name, rawArgs, hasResult, hasVariationalArgs); } void *specializeModule(const std::string &name, ModuleOp module, From 890416ca419456cdb95913ea5cb06cedb915921c Mon Sep 17 00:00:00 2001 From: Luca Mondada Date: Wed, 11 Mar 2026 17:19:45 +0000 Subject: [PATCH 2/2] add saveArtifact Signed-off-by: Luca Mondada --- runtime/common/ExecutionContext.cpp | 34 +++++++++++++++---- runtime/common/ExecutionContext.h | 4 +++ runtime/cudaq/platform/default/python/QPU.cpp | 5 +++ 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/runtime/common/ExecutionContext.cpp b/runtime/common/ExecutionContext.cpp index 3bfe725c819..3691900eaa8 100644 --- a/runtime/common/ExecutionContext.cpp +++ b/runtime/common/ExecutionContext.cpp @@ -27,18 +27,29 @@ thread_local bool reuseArtifact = false; class SavedCompilerArtifact { public: + void saveArtifact(const std::string &kernelName, + const std::vector &args, + const cudaq::JitEngine &engine, + std::function argsCreatorThunk) { + if (jitEng.has_value()) { + throw std::runtime_error( + "Attempted to overwrite saved compiler artifact."); + } + jitEng = engine; + argsCreator = reinterpret_cast( + argsCreatorThunk()); + this->kernelName = kernelName; + auto [resSize, scopedArgBuffer] = processArgs(args); + argSize = resSize; + argBuff = std::move(scopedArgBuffer); + } + void checkArtifactReuse(const std::string &kernelName, const std::vector &args, const cudaq::JitEngine &engine, std::function argsCreatorThunk) { if (!jitEng.has_value()) { - jitEng = engine; - this->argsCreator = reinterpret_cast( - argsCreatorThunk()); - this->kernelName = kernelName; - auto [resSize, scopedArgBuffer] = processArgs(args); - this->argSize = resSize; - this->argBuff = std::move(scopedArgBuffer); + saveArtifact(kernelName, args, engine, argsCreatorThunk); return; } @@ -126,6 +137,15 @@ void checkArtifactReuse(const std::string kernelName, savedArtifact.checkArtifactReuse(kernelName, args, jit, argsCreatorThunk); } + +void saveArtifact(const std::string kernelName, const std::vector &args, + const JitEngine jit, + std::function argsCreatorThunk) { + if (!reuseArtifact) + return; + + savedArtifact.saveArtifact(kernelName, args, jit, argsCreatorThunk); +} } // namespace compiler_artifact ExecutionContext *getExecutionContext() { return currentExecutionContext; } diff --git a/runtime/common/ExecutionContext.h b/runtime/common/ExecutionContext.h index 34e56800199..3307032d3b0 100644 --- a/runtime/common/ExecutionContext.h +++ b/runtime/common/ExecutionContext.h @@ -225,5 +225,9 @@ bool isPersistingJITEngine(); void checkArtifactReuse(const std::string kernelName, const std::vector &args, const JitEngine jit, std::function argsCreatorThunk); + +void saveArtifact(const std::string kernelName, const std::vector &args, + const JitEngine jit, + std::function argsCreatorThunk); }; // namespace compiler_artifact } // namespace cudaq diff --git a/runtime/cudaq/platform/default/python/QPU.cpp b/runtime/cudaq/platform/default/python/QPU.cpp index 23d11910d82..9e91fc659a8 100644 --- a/runtime/cudaq/platform/default/python/QPU.cpp +++ b/runtime/cudaq/platform/default/python/QPU.cpp @@ -327,6 +327,11 @@ struct PythonLauncher : public cudaq::ModuleLauncher { auto jit = cudaq::createQIRJITEngine(module, "qir:"); cacheJITForPerformance(jit); + auto argsCreatorThunk = [&jit, &name]() { + return (void *)jit.lookupRawNameOrFail(name + ".argsCreator"); + }; + cudaq::compiler_artifact::saveArtifact(name, rawArgs, jit, + argsCreatorThunk); // FIXME: actually handle results // 4. Execute the code right here, right now.