forked from NVIDIA/cuda-quantum
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.h
More file actions
117 lines (101 loc) · 4.71 KB
/
run.h
File metadata and controls
117 lines (101 loc) · 4.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/****************************************************************-*- C++ -*-****
* Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. *
* All rights reserved. *
* *
* This source code and the accompanying materials are made available under *
* the terms of the Apache License 2.0 which accompanies this distribution. *
******************************************************************************/
#pragma once
#include "common/ExecutionContext.h"
#include "common/KernelWrapper.h"
#include "common/MeasureCounts.h"
#include "cudaq/algorithms/broadcast.h"
#include "cudaq/concepts.h"
#include "cudaq/host_config.h"
#include <cstdint>
extern "C" {
void __nvqpp_initializer_list_to_vector_bool(std::vector<bool> &, char *,
std::size_t);
}
namespace cudaq {
namespace details {
// The span-like structure for the results of a `cudaq::run` kernel run. The
// span is a variable number of typed result values. These values will be stored
// in a contiguous buffer, the start of which is `data`. The size of the buffer
// must be exactly `lengthInBytes` bytes. `lengthInBytes` is an integer multiple
// of the size of the result type of the kernel launched.
// NB: for a vector of bool, each bool value is stored in a byte.
struct RunResultSpan {
void *data;
std::uint64_t lengthInBytes;
};
// The main entry point to launching a kernel, \p kernel, in a `cudaq::run`
// context and getting back a span containing the results. (The kernel is
// logically executed \p shots times, which can result in up to \p shots
// distinct result values. The results are returned in a span, which is a
// pointer to a buffer and the size of that buffer in bytes.
RunResultSpan runTheKernel(std::function<void()> &&kernel,
quantum_platform &platform,
const std::string &kernel_name, std::size_t shots);
// Template to transfer the ownership of the buffer in a RunResultSpan to a
// `std::vector<T>` object. This special code is required because a
// `std::vector<T>` will always construct its own data, and own it, using its
// standard constructors. In this case, we are transferring ownership of a
// buffer to the vector, `result`, and do not want to make a copy.
template <typename T>
void resultSpanToVectorViaOwnership(std::vector<T> &result,
RunResultSpan &spanIn) {
using raw_vector = struct {
T *start;
T *end0;
T *end1;
};
static_assert(sizeof(std::vector<T>) == sizeof(raw_vector) &&
"std::vector must use the nominal 3 pointer implementation");
// Swap vec into a local variable. vec's original content, if any will be
// reclaimed at the end of this function.
std::vector<T> deadEnder;
std::swap(deadEnder, result);
// Initialize the vector `result` in place and without any data copies.
if constexpr (std::is_same_v<T, bool>) {
// std::vector<bool> is a specialization, so we have to call the
// vector<bool> constructor in this case to pack the bools.
__nvqpp_initializer_list_to_vector_bool(
result, reinterpret_cast<char *>(spanIn.data), spanIn.lengthInBytes);
} else {
raw_vector *rawVec = reinterpret_cast<raw_vector *>(&result);
rawVec->start = reinterpret_cast<T *>(spanIn.data);
rawVec->end0 = rawVec->end1 = reinterpret_cast<T *>(
reinterpret_cast<char *>(spanIn.data) + spanIn.lengthInBytes);
}
// Destroy the contents of the span. The caller no longer owns the `data`
// buffer, the vector `result` does.
spanIn.data = nullptr;
spanIn.lengthInBytes = 0;
}
} // namespace details
/// cudaq::run allows an entry-point kernel to be executed a \p shots number of
/// times and return a `std::vector` of results.
template <typename RESULT, typename... ARGS>
#if CUDAQ_USE_STD20
requires(!std::is_void_v<RESULT>)
#endif
std::vector<RESULT> run(std::size_t shots,
std::function<RESULT(ARGS...)> &&kernel,
ARGS &&...args) {
if (shots == 0)
return {};
// Launch the kernel in the appropriate context.
auto &platform = cudaq::get_platform();
std::string kernelName{cudaq::getKernelName(kernel)};
details::RunResultSpan span = details::runTheKernel(
[&]() mutable {
cudaq::invokeKernel(std::forward(kernel), std::forward<ARGS>(args)...);
},
platform, kernelName, shots);
std::uint64_t end_offset = span.lengthInBytes / sizeof(RESULT);
return {reinterpret_cast<RESULT *>(span.data),
reinterpret_cast<RESULT *>(span.data) + end_offset};
}
// FIXME: Provide an async variant of run?
} // namespace cudaq