Skip to content

Commit 075193c

Browse files
bmhowe23cketcham2333justinlietzkvmtoamccaskey
authored
Real-time decoding support (#333)
This PR adds support using our decoders in a real-time, online environment, where the decoders are running on a low-latency connection to a real QPU. The decoder corrections (calculated in the classical computer) can be sent back to a live CUDA-Q program (running on the quantum computer) during the quantum program's execution. Additionally, one can run the decoder(s) in a simulated environment where real-time decoding occurs in the simulation context (i.e. on the user's desktop). --------- Signed-off-by: Ben Howe <[email protected]> Co-authored-by: Chuck Ketcham <[email protected]> Co-authored-by: Justin Lietz <[email protected]> Co-authored-by: Kevin Mato <[email protected]> Co-authored-by: Alex McCaskey <[email protected]> Co-authored-by: Thien Nguyen <[email protected]> Co-authored-by: Sachin Pisal <[email protected]> Co-authored-by: melody-ren <[email protected]>
1 parent 27754d5 commit 075193c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+8756
-87
lines changed

cmake/Modules/CUDA-QX.cmake

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# ============================================================================ #
2-
# Copyright (c) 2024 NVIDIA Corporation & Affiliates. #
2+
# Copyright (c) 2024 - 2025 NVIDIA Corporation & Affiliates. #
33
# All rights reserved. #
44
# #
55
# This source code and the accompanying materials are made available under #
@@ -43,6 +43,9 @@ resulting object files to the specified library target.
4343
Note: This function assumes that the CUDAQ_INSTALL_DIR variable is set
4444
to the CUDAQ installation directory.
4545
46+
Note: You can use DEPENDS_ON if you want to delay compilation until some other
47+
target has been built.
48+
4649
Example usage:
4750
cudaqx_add_device_code(
4851
my_library
@@ -52,13 +55,15 @@ Example usage:
5255
COMPILER_FLAGS
5356
--enable-mlir
5457
-v
58+
DEPENDS_ON
59+
SomeOtherTarget
5560
)
5661
5762
#]=======================================================================]
5863
function(cudaqx_add_device_code LIBRARY_NAME)
5964
set(options)
6065
set(oneValueArgs)
61-
set(multiValueArgs SOURCES COMPILER_FLAGS)
66+
set(multiValueArgs SOURCES COMPILER_FLAGS DEPENDS_ON)
6267
cmake_parse_arguments(ARGS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
6368

6469
if(NOT DEFINED CUDAQ_INSTALL_DIR)
@@ -83,7 +88,7 @@ function(cudaqx_add_device_code LIBRARY_NAME)
8388
set(prop "$<TARGET_PROPERTY:${LIBRARY_NAME},INCLUDE_DIRECTORIES>")
8489
foreach(source ${ARGS_SOURCES})
8590
get_filename_component(filename ${source} NAME_WE)
86-
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.o")
91+
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${LIBRARY_NAME}_${filename}.o")
8792
cmake_path(GET output_file FILENAME baseName)
8893

8994
add_custom_command(
@@ -92,15 +97,15 @@ function(cudaqx_add_device_code LIBRARY_NAME)
9297
${ARGS_COMPILER_FLAGS} -c -fPIC --enable-mlir
9398
${CMAKE_CURRENT_SOURCE_DIR}/${source} -o ${baseName}
9499
"$<$<BOOL:${prop}>:-I $<JOIN:${prop}, -I >>"
95-
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
100+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source} ${ARGS_DEPENDS_ON}
96101
COMMENT "Compiling ${source} with nvq++"
97102
VERBATIM
98103
)
99104

100105
list(APPEND object_files ${output_file})
101-
list(APPEND custom_targets ${filename}_target)
106+
list(APPEND custom_targets ${LIBRARY_NAME}_${filename}_target)
102107

103-
add_custom_target(${filename}_target DEPENDS ${output_file})
108+
add_custom_target(${LIBRARY_NAME}_${filename}_target DEPENDS ${output_file})
104109
endforeach()
105110

106111
add_dependencies(${LIBRARY_NAME} ${custom_targets})

libs/core/include/cuda-qx/core/kwargs_utils.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,31 @@ inline heterogeneous_map hetMapFromKwargs(const py::kwargs &kwargs) {
4747
} else if (py::isinstance<py::dict>(value)) {
4848
// Recursively convert nested dictionary
4949
result.insert(key, hetMapFromKwargs(value.cast<py::dict>()));
50+
} else if (py::isinstance<py::list>(value)) {
51+
// Handle Python lists
52+
py::list py_list = value.cast<py::list>();
53+
if (py_list.size() > 0) {
54+
// Check if it's a nested list (list of lists)
55+
if (py::isinstance<py::list>(py_list[0])) {
56+
std::vector<std::vector<double>> vec_vec;
57+
for (const auto &item : py_list) {
58+
py::list inner_list = item.cast<py::list>();
59+
std::vector<double> inner_vec;
60+
for (const auto &v : inner_list) {
61+
inner_vec.push_back(v.cast<double>());
62+
}
63+
vec_vec.push_back(inner_vec);
64+
}
65+
result.insert(key, std::move(vec_vec));
66+
} else {
67+
// Single-level list - try to convert to vector<double>
68+
std::vector<double> vec;
69+
for (const auto &item : py_list) {
70+
vec.push_back(item.cast<double>());
71+
}
72+
result.insert(key, std::move(vec));
73+
}
74+
}
5075
} else if (py::isinstance<py::array>(value)) {
5176
py::array np_array = value.cast<py::array>();
5277
py::buffer_info info = np_array.request();

libs/qec/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,20 @@ if (QEC_EXTERNAL_DECODERS)
226226
add_target_libs_to_wheel(${LIB_FILE})
227227
endwhile()
228228
endif()
229+
230+
# External Dependencies
231+
# ==============================================================================
232+
233+
include(FetchContent)
234+
235+
# We need version 3.11.1 because that is what CUDA-Q uses. If CUDA-Q updates,
236+
# then we need to remember to update.
237+
# TODO: remove when no longer needed.
238+
FetchContent_Declare(
239+
json
240+
GIT_REPOSITORY https://github.com/nlohmann/json
241+
GIT_TAG v3.11.1
242+
EXCLUDE_FROM_ALL
243+
)
244+
245+
FetchContent_MakeAvailable(json)

libs/qec/include/cudaq/qec/decoder.h

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ class async_decoder_result {
123123
class decoder
124124
: public cudaqx::extension_point<decoder, const cudaqx::tensor<uint8_t> &,
125125
const cudaqx::heterogeneous_map &> {
126+
private:
127+
struct rt_impl;
128+
struct rt_impl_deleter {
129+
void operator()(rt_impl *p) const;
130+
};
131+
std::unique_ptr<rt_impl, rt_impl_deleter> pimpl;
132+
126133
public:
127134
decoder() = delete;
128135

@@ -173,8 +180,59 @@ class decoder
173180
std::size_t get_block_size() { return block_size; }
174181
std::size_t get_syndrome_size() { return syndrome_size; }
175182

183+
// -- Begin realtime decoding API --
184+
185+
// Note: all of the current realtime decoding API is designed to be used with
186+
// hard syndromes.
187+
188+
/// @brief Get the number of measurement syndromes per decode call. This
189+
/// depends on D_sparse, so you must have called set_D_sparse() first.
190+
uint32_t get_num_msyn_per_decode() const;
191+
192+
/// @brief Set the observable matrix.
193+
void set_O_sparse(const std::vector<std::vector<uint32_t>> &O_sparse);
194+
195+
/// @brief Set the observable matrix, using a single long vector with -1 as
196+
/// row terminators.
197+
void set_O_sparse(const std::vector<int64_t> &O_sparse);
198+
199+
/// @brief Set the D_sparse matrix.
200+
void set_D_sparse(const std::vector<std::vector<uint32_t>> &D_sparse);
201+
202+
/// @brief Set the D_sparse matrix, using a single long vector with -1 as row
203+
/// terminators.
204+
void set_D_sparse(const std::vector<int64_t> &D_sparse);
205+
206+
/// @brief Set the decoder id.
207+
void set_decoder_id(uint32_t decoder_id);
208+
209+
/// @brief Get the decoder id.
210+
uint32_t get_decoder_id() const;
211+
212+
/// @brief Enqueue a syndrome for decoding (pointer version)
213+
/// @return True if enough syndromes have been enqueued to trigger a decode.
214+
bool enqueue_syndrome(const uint8_t *syndrome, std::size_t syndrome_length);
215+
216+
/// @brief Enqueue a syndrome for decoding (vector version)
217+
/// @return True if enough syndromes have been enqueued to trigger a decode.
218+
bool enqueue_syndrome(const std::vector<uint8_t> &syndrome);
219+
220+
/// @brief Get the current observable corrections.
221+
const uint8_t *get_obs_corrections() const;
222+
223+
/// @brief Get the number of observables.
224+
std::size_t get_num_observables() const;
225+
226+
/// @brief Clear any stored corrections.
227+
void clear_corrections();
228+
229+
/// @brief Reset the decoder, clearing all per-shot memory and corrections.
230+
void reset_decoder();
231+
232+
// -- End realtime decoding API --
233+
176234
/// @brief Destructor
177-
virtual ~decoder() {}
235+
virtual ~decoder() = default;
178236

179237
/// @brief Get the version of the decoder. Subclasses that are not part of the
180238
/// standard GitHub repo should override this to provide a more tailored
@@ -191,6 +249,12 @@ class decoder
191249

192250
/// @brief The decoder's parity check matrix
193251
cudaqx::tensor<uint8_t> H;
252+
253+
/// @brief The decoder's observable matrix in sparse format
254+
std::vector<std::vector<uint32_t>> O_sparse;
255+
256+
/// @brief The decoder's D matrix in sparse format
257+
std::vector<std::vector<uint32_t>> D_sparse;
194258
};
195259

196260
/// @brief Convert a vector of soft probabilities to a vector of hard
@@ -243,6 +307,7 @@ inline void convert_vec_soft_to_tensor_hard(const std::vector<t_soft> &in,
243307
/// @brief Convert a vector of hard probabilities to a vector of soft
244308
/// probabilities.
245309
/// @param in Hard probability input vector containing only 0/false or 1/true.
310+
/// @param in_size The size of the input vector (in elements)
246311
/// @param out Soft probability output vector in the range [0.0, 1.0]
247312
/// @param true_val The soft probability value assigned when the input is 1
248313
/// (default to 1.0)
@@ -253,15 +318,35 @@ template <typename t_soft, typename t_hard,
253318
(std::is_integral<t_hard>::value ||
254319
std::is_same<t_hard, bool>::value),
255320
int>::type = 0>
256-
inline void convert_vec_hard_to_soft(const std::vector<t_hard> &in,
321+
inline void convert_vec_hard_to_soft(const t_hard *in, std::size_t in_size,
257322
std::vector<t_soft> &out,
258323
const t_soft true_val = 1.0,
259324
const t_soft false_val = 0.0) {
260-
out.resize(in.size());
261-
for (std::size_t i = 0; i < in.size(); i++)
325+
out.resize(in_size);
326+
for (std::size_t i = 0; i < in_size; i++)
262327
out[i] = static_cast<t_soft>(in[i] ? true_val : false_val);
263328
}
264329

330+
/// @brief Convert a vector of hard probabilities to a vector of soft
331+
/// probabilities.
332+
/// @param in Hard probability input vector containing only 0/false or 1/true.
333+
/// @param out Soft probability output vector in the range [0.0, 1.0]
334+
/// @param true_val The soft probability value assigned when the input is 1
335+
/// (default to 1.0)
336+
/// @param false_val The soft probability value assigned when the input is 0
337+
/// (default to 0.0)
338+
template <typename t_soft, typename t_hard,
339+
typename std::enable_if<std::is_floating_point<t_soft>::value &&
340+
(std::is_integral<t_hard>::value ||
341+
std::is_same<t_hard, bool>::value),
342+
int>::type = 0>
343+
inline void convert_vec_hard_to_soft(const std::vector<t_hard> &in,
344+
std::vector<t_soft> &out,
345+
const t_soft true_val = 1.0,
346+
const t_soft false_val = 0.0) {
347+
convert_vec_hard_to_soft(in.data(), in.size(), out, true_val, false_val);
348+
}
349+
265350
/// @brief Convert a 2D vector of soft probabilities to a 2D vector of hard
266351
/// probabilities.
267352
/// @param in Soft probability input vector in range [0.0, 1.0]

libs/qec/include/cudaq/qec/pcm_utils.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,62 @@ namespace cudaq::qec {
2222
std::vector<std::vector<std::uint32_t>>
2323
dense_to_sparse(const cudaqx::tensor<uint8_t> &pcm);
2424

25+
/// @brief Return a sparse representation of the PCM as a string.
26+
/// @param pcm The PCM to convert to a sparse representation.
27+
/// @return A string that represents the PCM in a sparse format.
28+
std::string pcm_to_sparse_string(const cudaqx::tensor<uint8_t> &pcm);
29+
30+
/// @brief Return a PCM from a sparse representation.
31+
/// @param sparse_str The sparse representation of the PCM.
32+
/// @param num_rows The number of rows in the PCM.
33+
/// @param num_cols The number of columns in the PCM.
34+
/// @return A PCM tensor.
35+
cudaqx::tensor<uint8_t> pcm_from_sparse_string(const std::string &sparse_str,
36+
std::size_t num_rows,
37+
std::size_t num_cols);
38+
39+
/// @brief Return a PCM from a sparse representation.
40+
/// @param sparse_vec The sparse representation of the PCM.
41+
/// @param num_rows The number of rows in the PCM.
42+
/// @param num_cols The number of columns in the PCM.
43+
/// @return A PCM tensor.
44+
cudaqx::tensor<uint8_t>
45+
pcm_from_sparse_vec(const std::vector<std::int64_t> &sparse_vec,
46+
std::size_t num_rows, std::size_t num_cols);
47+
48+
/// @brief Return a sparse representation of the PCM.
49+
/// @param pcm The PCM to convert to a sparse representation.
50+
/// @return A vector of integers that represents the PCM in a sparse format.
51+
std::vector<std::int64_t> pcm_to_sparse_vec(const cudaqx::tensor<uint8_t> &pcm);
52+
53+
/// @brief Generate a sparse detector matrix for a given number of syndromes per
54+
/// round and number of rounds. Timelike here means that each round of syndrome
55+
/// bits are xor'd against the preceding round.
56+
/// @param num_syndromes_per_round The number of syndromes per round.
57+
/// @param num_rounds The number of rounds.
58+
/// @param include_first_round Whether to include the first round in the
59+
/// detector matrix.
60+
/// @return The detector matrix format is CSR-like, with -1 values
61+
/// indicating the end of a row.
62+
std::vector<std::int64_t>
63+
generate_timelike_sparse_detector_matrix(std::uint32_t num_syndromes_per_round,
64+
std::uint32_t num_rounds,
65+
bool include_first_round = false);
66+
67+
/// @brief Generate a sparse detector matrix for a given number of syndromes per
68+
/// round and number of rounds. Timelike here means that each round of syndrome
69+
/// bits are xor'd against the preceding round. The first round is supplied by
70+
/// the user, to allow for a mixture of detectors and non-detectors.
71+
/// @param num_syndromes_per_round The number of syndromes per round.
72+
/// @param num_rounds The number of rounds.
73+
/// @param first_round_matrix User specified detector matrix for the first
74+
/// round.
75+
/// @return The detector matrix format is CSR-like, with -1 values
76+
/// indicating the end of a row.
77+
std::vector<std::int64_t> generate_timelike_sparse_detector_matrix(
78+
std::uint32_t num_syndromes_per_round, std::uint32_t num_rounds,
79+
std::vector<std::int64_t> first_round_matrix);
80+
2581
/// @brief Return a vector of column indices that would sort the PCM columns
2682
/// in topological order.
2783
/// @param row_indices For each column, a vector of row indices that have a
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/****************************************************************-*- C++ -*-****
2+
* Copyright (c) 2024 - 2025 NVIDIA Corporation & Affiliates. *
3+
* All rights reserved. *
4+
* *
5+
* This source code and the accompanying materials are made available under *
6+
* the terms of the Apache License 2.0 which accompanies this distribution. *
7+
******************************************************************************/
8+
9+
#pragma once
10+
11+
#include "cudaq/qis/qubit_qis.h"
12+
13+
// Define the CUDA-Q QEC Realtime Decoding API
14+
//
15+
// These functions enable CUDA-Q quantum kernel code to
16+
// offload decoding work to our QEC decoders in real time
17+
// (within qubit coherence times)
18+
//
19+
// The design here is as follows: We declare but do not
20+
// implement the API. Then we allow users to specify concrete
21+
// implementations of the API via the target specification passed to
22+
// nvq++.
23+
24+
namespace cudaq::qec::decoding {
25+
// CUDA-Q QEC Realtime Decoding API (declarations)
26+
27+
/// @brief Enqueue syndromes for decoding.
28+
/// @param decoder_id The ID of the decoder to use.
29+
/// @param syndromes The syndromes to enqueue.
30+
/// @param tag The tag to use for the syndrome (currently useful for logging
31+
/// only)
32+
__qpu__ void
33+
enqueue_syndromes(std::uint64_t decoder_id,
34+
const std::vector<cudaq::measure_result> &syndromes,
35+
std::uint64_t tag = 0);
36+
37+
/// @brief Get the corrections for a given decoder.
38+
/// @param decoder_id The ID of the decoder to use.
39+
/// @param return_size The number of bits to return (in bits). This is expected
40+
/// to match the number of observables in the decoder.
41+
/// @param reset Whether to reset the decoder corrections after retrieving them.
42+
/// @return The corrections (detected bit flips) for the given decoder, based on
43+
/// all of the decoded syndromes since the last time any corrections were reset.
44+
__qpu__ std::vector<bool> get_corrections(std::uint64_t decoder_id,
45+
std::uint64_t return_size,
46+
bool reset = false);
47+
48+
/// @brief Reset the decoder. This clears any queued syndromes and resets any
49+
/// corrections back to 0.
50+
/// @param decoder_id The ID of the decoder to reset.
51+
__qpu__ void reset_decoder(std::uint64_t decoder_id);
52+
} // namespace cudaq::qec::decoding

0 commit comments

Comments
 (0)