Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/config/gitlab_commits.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
nvidia-mgpu-repo: cuda-quantum/cuquantum-mgpu.git
nvidia-mgpu-commit: 69fa05df00a069c3bc5c040294678014a396fa68
nvidia-mgpu-commit: 52dbd7d31cf3c88c8e5a1de9bac6635a5b0c8309
23 changes: 13 additions & 10 deletions python/cudaq/runtime/ptsbe.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def sample(kernel,
max_trajectories=None,
sampling_strategy=None,
shot_allocation=None,
return_execution_data=False):
return_execution_data=False,
include_sequential_data=False):
"""
Sample using Pre-Trajectory Sampling with Batch Execution (`PTSBE`).

Expand Down Expand Up @@ -84,6 +85,8 @@ def sample(kernel,
return_execution_data (bool): Include circuit structure, trajectory
specifications, and per-trajectory measurement outcomes in the
returned result. Defaults to ``False``.
include_sequential_data (bool): Populate per-shot sequential bitstring
data on the result. Defaults to ``False``.

Returns:
``SampleResult``: Measurement results. Returns a list of results
Expand All @@ -106,18 +109,16 @@ def sample(kernel,
result = cudaq_runtime.ptsbe.sample_impl(
decorator.uniqName, module, shots_count, noise_model,
max_trajectories, sampling_strategy, shot_allocation,
return_execution_data, *processedArgs)
return_execution_data, include_sequential_data, *processedArgs)
results.append(result)
return results

processedArgs, module = decorator.prepare_call(*args)

return cudaq_runtime.ptsbe.sample_impl(decorator.uniqName, module,
shots_count, noise_model,
max_trajectories, sampling_strategy,
shot_allocation,
return_execution_data,
*processedArgs)
return cudaq_runtime.ptsbe.sample_impl(
decorator.uniqName, module, shots_count, noise_model, max_trajectories,
sampling_strategy, shot_allocation, return_execution_data,
include_sequential_data, *processedArgs)


def sample_async(kernel,
Expand All @@ -127,7 +128,8 @@ def sample_async(kernel,
max_trajectories=None,
sampling_strategy=None,
shot_allocation=None,
return_execution_data=False):
return_execution_data=False,
include_sequential_data=False):
"""
Asynchronously sample using PTSBE. Returns a future whose result
can be retrieved via ``.get()``.
Expand All @@ -143,6 +145,7 @@ def sample_async(kernel,
shot_allocation (``ShotAllocationStrategy`` or ``None``): Strategy for
allocating shots across trajectories.
return_execution_data (bool): Include execution data in the result.
include_sequential_data (bool): Populate per-shot sequential data.

Returns:
``AsyncPTSBESampleResult``: A future whose ``.get()`` returns the
Expand All @@ -162,6 +165,6 @@ def sample_async(kernel,
impl = cudaq_runtime.ptsbe.sample_async_impl(
decorator.uniqName, module, shots_count, noise_model, max_trajectories,
sampling_strategy, shot_allocation, return_execution_data,
*processedArgs)
include_sequential_data, *processedArgs)

return AsyncSampleResult(impl, module)
9 changes: 7 additions & 2 deletions python/runtime/cudaq/algorithms/py_sample_ptsbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,14 @@ pySamplePTSBE(const std::string &shortName, MlirModule module,
std::size_t shots_count, noise_model noiseModel,
std::optional<std::size_t> max_trajectories,
py::object sampling_strategy, py::object shot_allocation_obj,
bool return_execution_data, py::args runtimeArgs) {
bool return_execution_data, bool include_sequential_data,
py::args runtimeArgs) {
if (shots_count == 0)
return ptsbe::sample_result();

ptsbe::PTSBEOptions ptsbe_options;
ptsbe_options.return_execution_data = return_execution_data;
ptsbe_options.include_sequential_data = include_sequential_data;
ptsbe_options.max_trajectories = max_trajectories;

if (!sampling_strategy.is_none())
Expand Down Expand Up @@ -107,10 +109,12 @@ pySampleAsyncPTSBE(const std::string &shortName, MlirModule module,
std::size_t shots_count, noise_model &noiseModel,
std::optional<std::size_t> max_trajectories,
py::object sampling_strategy, py::object shot_allocation_obj,
bool return_execution_data, py::args runtimeArgs) {
bool return_execution_data, bool include_sequential_data,
py::args runtimeArgs) {

ptsbe::PTSBEOptions ptsbe_options;
ptsbe_options.return_execution_data = return_execution_data;
ptsbe_options.include_sequential_data = include_sequential_data;
ptsbe_options.max_trajectories = max_trajectories;

if (!sampling_strategy.is_none())
Expand Down Expand Up @@ -396,6 +400,7 @@ Run PTSBE sampling on the provided kernel.
sampling_strategy: Sampling strategy or None for default (probabilistic).
shot_allocation: Shot allocation strategy or None for default (proportional).
return_execution_data: Whether to include execution data in the result.
include_sequential_data: Whether to populate per-shot sequential data.
*arguments: The kernel arguments.
Returns:
Expand Down
20 changes: 20 additions & 0 deletions python/tests/ptsbe/test_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,23 @@ def test_ptsbe_sample_with_apply_noise_in_kernel(kernel_with_apply_noise):
)
assert sum(result.count(bs) for bs in result) == 100
assert len(result) >= 1


def test_ptsbe_sequential_data_empty_by_default(depol_noise, bell_kernel):
result = cudaq.ptsbe.sample(bell_kernel,
noise_model=depol_noise,
shots_count=10)
seq = result.get_sequential_data()
assert len(seq) == 0


def test_ptsbe_sequential_data_populated_when_requested(depol_noise,
bell_kernel):
result = cudaq.ptsbe.sample(bell_kernel,
noise_model=depol_noise,
shots_count=10,
include_sequential_data=True)
seq = result.get_sequential_data()
assert len(seq) == 10
for bs in seq:
assert len(bs) == 2
4 changes: 4 additions & 0 deletions runtime/cudaq/ptsbe/PTSBEOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ struct PTSBEOptions {
/// needed.
bool return_execution_data = false;

/// Populate per-shot sequential bitstring data on the result. When false
/// (default), only aggregated counts are produced.
bool include_sequential_data = false;

/// Maximum number of unique trajectories to generate. When `nullopt`,
/// defaults to the number of shots.
std::optional<std::size_t> max_trajectories = std::nullopt;
Expand Down
1 change: 1 addition & 0 deletions runtime/cudaq/ptsbe/PTSBESample.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ sample_result runSamplingPTSBE(KernelFunctor &&wrappedKernel,

// Stage 3: Build PTSBatch with trajectory generation and shot allocation
auto batch = buildPTSBatchFromTrace(std::move(ptsbeTrace), options, shots);
batch.includeSequentialData = options.include_sequential_data;
cudaq::info("[ptsbe] Allocated {} shots across {} trajectories",
batch.totalShots(), batch.trajectories.size());

Expand Down
21 changes: 16 additions & 5 deletions runtime/cudaq/ptsbe/PTSBESampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,23 @@ aggregateResults(const std::vector<cudaq::sample_result> &results) {
return cudaq::sample_result{};

cudaq::CountsDictionary aggregatedCounts;
std::vector<std::string> aggregatedSeqData;
for (const auto &res : results) {
// Skip empty results (e.g., trajectories with zero shots).
if (res.get_total_shots() == 0)
continue;

for (const auto &[bitstring, count] : res.to_map())
aggregatedCounts[bitstring] += count;

auto seq = res.sequential_data();
if (!seq.empty())
aggregatedSeqData.insert(aggregatedSeqData.end(),
std::make_move_iterator(seq.begin()),
std::make_move_iterator(seq.end()));
}
return cudaq::sample_result{cudaq::ExecutionResult{aggregatedCounts}};
cudaq::ExecutionResult er{aggregatedCounts};
er.sequentialData = std::move(aggregatedSeqData);
return cudaq::sample_result{std::move(er)};
}

template <typename ScalarType>
Expand Down Expand Up @@ -205,10 +213,13 @@ samplePTSBEGeneric(nvqir::CircuitSimulatorBase<ScalarType> &simulator,
simulator.flushGateQueue();

auto execResult =
simulator.sample(batch.measureQubits, static_cast<int>(traj.num_shots));
simulator.sample(batch.measureQubits, static_cast<int>(traj.num_shots),
batch.includeSequentialData);

results.push_back(
cudaq::sample_result{cudaq::ExecutionResult{execResult.counts}});
cudaq::ExecutionResult er{execResult.counts};
if (batch.includeSequentialData)
er.sequentialData = std::move(execResult.sequentialData);
results.push_back(cudaq::sample_result{std::move(er)});

if ((ti + 1) % progressInterval == 0)
cudaq::info("[ptsbe] Trajectory progress: {}/{} ({} shots)", ti + 1,
Expand Down
9 changes: 7 additions & 2 deletions runtime/cudaq/ptsbe/PTSBESampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ struct PTSBatch {
/// only which is a limitation of the current PTSBE implementation.
std::vector<std::size_t> measureQubits;

/// @brief Populate per-shot sequential bitstring data on the result. When
/// false (default), only aggregated counts are produced.
bool includeSequentialData = false;

/// @brief Calculate total shots across all trajectories
std::size_t totalShots() const;
};
Expand All @@ -53,7 +57,8 @@ aggregateResults(const std::vector<cudaq::sample_result> &results);
/// Caller must have set up ExecutionContext and allocated qubits
/// on the simulator before calling this function.
///
/// @param batch PTSBatch with trace, trajectories, and measureQubits
/// @param batch PTSBatch with trace, trajectories, measureQubits, and
/// includeSequentialData flag
/// @return Per-trajectory sample results
/// @throws std::runtime_error if simulator cast fails or contract violated
std::vector<cudaq::sample_result> samplePTSBE(const PTSBatch &batch);
Expand All @@ -67,7 +72,7 @@ std::vector<cudaq::sample_result> samplePTSBE(const PTSBatch &batch);
/// 4. Calls samplePTSBE for precision dispatch and trajectory execution
/// 5. Deallocates qubits and resets context
///
/// @param batch PTSBE specification
/// @param batch PTSBE specification (includes includeSequentialData flag)
/// @param contextType ExecutionContext type (default: `"ptsbe-sample"`).
/// @return Per-trajectory sample results
/// @throws std::runtime_error if simulator cast fails or gate conversion fails
Expand Down
3 changes: 2 additions & 1 deletion runtime/nvqir/CircuitSimulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,8 @@ class CircuitSimulator {
/// @brief Sample the current multi-qubit state on the given qubit indices
/// over a certain number of shots
virtual cudaq::ExecutionResult
sample(const std::vector<std::size_t> &qubitIdxs, const int shots) = 0;
sample(const std::vector<std::size_t> &qubitIdxs, const int shots,
bool includeSequentialData = true) = 0;

/// @brief Return the name of this CircuitSimulator
virtual std::string name() const = 0;
Expand Down
3 changes: 2 additions & 1 deletion runtime/nvqir/cudensitymat/CuDensityMatSim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ class CuDensityMatSim : public nvqir::CircuitSimulatorBase<double> {
"[dynamics target] Quantum gate simulation is not supported.");
}
cudaq::ExecutionResult sample(const std::vector<std::size_t> &qubitIdxs,
const int shots) override {
const int shots,
bool includeSequentialData = true) override {
throw std::runtime_error("[dynamics target] Quantum gate simulation is not "
"supported.");
return cudaq::ExecutionResult();
Expand Down
27 changes: 16 additions & 11 deletions runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,8 @@ class CuStateVecCircuitSimulator

/// @brief Sample the multi-qubit state.
cudaq::ExecutionResult sample(const std::vector<std::size_t> &measuredBits,
const int shots) override {
const int shots,
bool includeSequentialData = true) override {
ScopedTraceWithContext(cudaq::TIMING_SAMPLE, "CuStateVecSimulator::sample");
double expVal = 0.0;
// cudaq::CountsDictionary counts;
Expand Down Expand Up @@ -706,19 +707,23 @@ class CuStateVecCircuitSimulator
extraWorkspace = nullptr;
}

std::vector<std::string> sequentialData;
sequentialData.reserve(shots);

cudaq::ExecutionResult counts;

// We've sampled, convert the results to our ExecutionResult counts
for (int i = 0; i < shots; ++i) {
auto bitstring = std::bitset<64>(bitstrings0[i])
.to_string()
.erase(0, 64 - measuredBits.size());
// Bitstrings are sorted in ascending order.
// Use this to avoid O(N) string conversions.
for (int i = 0; i < shots;) {
auto val = bitstrings0[i];
int runLen = 1;
while (i + runLen < shots && bitstrings0[i + runLen] == val)
++runLen;
auto bitstring =
std::bitset<64>(val).to_string().erase(0, 64 - measuredBits.size());
std::reverse(bitstring.begin(), bitstring.end());
counts.appendResult(bitstring, 1);
sequentialData.push_back(std::move(bitstring));
if (includeSequentialData)
counts.appendResult(bitstring, runLen);
else
counts.counts[std::move(bitstring)] += runLen;
i += runLen;
}

// Compute the expectation value from the counts
Expand Down
4 changes: 2 additions & 2 deletions runtime/nvqir/cutensornet/simulator_cutensornet.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class SimulatorTensorNetBase : public nvqir::CircuitSimulatorBase<ScalarType> {

/// @brief Sample a subset of qubits
virtual cudaq::ExecutionResult
sample(const std::vector<std::size_t> &measuredBits,
const int shots) override;
sample(const std::vector<std::size_t> &measuredBits, const int shots,
bool includeSequentialData = true) override;

/// @brief Evaluate the expectation value of a given observable
virtual cudaq::observe_result observe(const cudaq::spin_op &op) override;
Expand Down
15 changes: 9 additions & 6 deletions runtime/nvqir/cutensornet/simulator_cutensornet.inc
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,8 @@ bool SimulatorTensorNetBase<ScalarType>::measureQubit(
/// @brief Sample a subset of qubits
template <typename ScalarType>
cudaq::ExecutionResult SimulatorTensorNetBase<ScalarType>::sample(
const std::vector<std::size_t> &measuredBits, const int shots) {
const std::vector<std::size_t> &measuredBits, const int shots,
bool includeSequentialData) {
LOG_API_TIME();
std::vector<int32_t> measuredBitIds(measuredBits.begin(), measuredBits.end());
if (shots < 1) {
Expand Down Expand Up @@ -449,11 +450,13 @@ cudaq::ExecutionResult SimulatorTensorNetBase<ScalarType>::sample(
}

counts.expectationValue = expVal;
counts.sequentialData.resize(sum_counts);
std::size_t s = 0;
for (auto &kv : counts.counts)
for (std::size_t c = 0; c < kv.second; c++)
counts.sequentialData[s++] = kv.first;
if (includeSequentialData) {
counts.sequentialData.resize(sum_counts);
std::size_t s = 0;
for (auto &kv : counts.counts)
for (std::size_t c = 0; c < kv.second; c++)
counts.sequentialData[s++] = kv.first;
}

return counts;
}
Expand Down
14 changes: 10 additions & 4 deletions runtime/nvqir/cutensornet/simulator_mps.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,14 @@ class SimulatorMPS : public SimulatorTensorNetBase<ScalarType> {

/// @brief Sample a subset of qubits
cudaq::ExecutionResult sample(const std::vector<std::size_t> &measuredBits,
const int shots) override {
const int shots,
bool includeSequentialData = true) override {
auto executionContext = cudaq::getExecutionContext();

const bool hasNoise = executionContext && executionContext->noiseModel;
if (!hasNoise || shots < 1)
return SimulatorTensorNetBase<ScalarType>::sample(measuredBits, shots);
return SimulatorTensorNetBase<ScalarType>::sample(measuredBits, shots,
includeSequentialData);

LOG_API_TIME();
cudaq::ExecutionResult counts;
Expand Down Expand Up @@ -274,8 +276,12 @@ class SimulatorMPS : public SimulatorTensorNetBase<ScalarType> {
const auto samples = m_state->executeSample(
sampler, workDesc, measuredBitIds, 1, requireCacheWorkspace());
assert(samples.size() == 1);
for (const auto &[bitString, count] : samples)
counts.appendResult(bitString, count);
for (const auto &[bitString, count] : samples) {
if (includeSequentialData)
counts.appendResult(bitString, count);
else
counts.counts[bitString] += count;
}
}

for (const auto &[k, v] : samplerCache) {
Expand Down
8 changes: 6 additions & 2 deletions runtime/nvqir/qpp/QppCircuitSimulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ class QppCircuitSimulator : public nvqir::CircuitSimulatorBase<double> {

/// @brief Sample the multi-qubit state.
cudaq::ExecutionResult sample(const std::vector<std::size_t> &qubits,
const int shots) override {
const int shots,
bool includeSequentialData = true) override {
if (shots < 1) {
double expectationValue = calculateExpectationValue(qubits);
CUDAQ_INFO("Computed expectation value = {}", expectationValue);
Expand Down Expand Up @@ -382,7 +383,10 @@ class QppCircuitSimulator : public nvqir::CircuitSimulatorBase<double> {
// Add to the sample result
// in mid-circ sampling mode this will append 1 bitstring
auto bitstring = bitstream.str();
counts.appendResult(bitstring, count);
if (includeSequentialData)
counts.appendResult(bitstring, count);
else
counts.counts[bitstring] += count;
auto par = cudaq::sample_result::has_even_parity(bitstring);
auto p = count / (double)shots;
if (!par) {
Expand Down
3 changes: 2 additions & 1 deletion runtime/nvqir/resourcecounter/ResourceCounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ class ResourceCounter : public nvqir::CircuitSimulatorBase<double> {

/// @brief Sample the multi-qubit state.
cudaq::ExecutionResult sample(const std::vector<std::size_t> &qubits,
const int shots) override {
const int shots,
bool includeSequentialData = true) override {
throw std::runtime_error("Can't sample from resource counter simulator!");
}

Expand Down
Loading
Loading