diff --git a/docs/sphinx/using/backends/sims/tnsims.rst b/docs/sphinx/using/backends/sims/tnsims.rst index 2e0a46ad33f..24c6fbd76aa 100644 --- a/docs/sphinx/using/backends/sims/tnsims.rst +++ b/docs/sphinx/using/backends/sims/tnsims.rst @@ -81,6 +81,9 @@ Specific aspects of the simulation can be configured by setting the following of * **`CUDA_VISIBLE_DEVICES=X`**: Makes the process only see GPU X on multi-GPU nodes. Each MPI process must only see its own dedicated GPU. For example, if you run 8 MPI processes on a DGX system with 8 GPUs, each MPI process should be assigned its own dedicated GPU via `CUDA_VISIBLE_DEVICES` when invoking `mpiexec` (or `mpirun`) commands. * **`OMP_PLACES=cores`**: Set this environment variable to improve CPU parallelization. * **`OMP_NUM_THREADS=X`**: To enable CPU parallelization, set X to `NUMBER_OF_CORES_PER_NODE/NUMBER_OF_GPUS_PER_NODE`. +* **`CUDAQ_TENSORNET_CONTROLLED_RANK=X`**: Specify the number of controlled qubits whereby the full tensor body of the controlled gate is expanded. If the number of controlled qubits is greater than this value, the gate is applied as a controlled tensor operator to the tensor network state. Default value is 1. +* **`CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=X`**: Set this environment variable to `TRUE` (`ON`) or `FALSE` (`OFF`) to enable or disable contraction path reuse when computing expectation values. Default is `OFF`. +* **`CUDAQ_TENSORNET_NUM_HYPER_SAMPLES=X`**: Specify the number of hyper samples used in the tensor network contraction path finder. Default value is 8 if not specified. .. note:: diff --git a/runtime/common/NoiseModel.cpp b/runtime/common/NoiseModel.cpp index ab7b4446df1..aa276f68284 100644 --- a/runtime/common/NoiseModel.cpp +++ b/runtime/common/NoiseModel.cpp @@ -97,7 +97,7 @@ kraus_channel &kraus_channel::operator=(const kraus_channel &other) { return *this; } -std::vector kraus_channel::get_ops() { return ops; } +std::vector kraus_channel::get_ops() const { return ops; } void kraus_channel::push_back(kraus_op op) { ops.push_back(op); } void noise_model::add_channel(const std::string &quantumOp, diff --git a/runtime/common/NoiseModel.h b/runtime/common/NoiseModel.h index 23f8997a081..6ee2bc5e3da 100644 --- a/runtime/common/NoiseModel.h +++ b/runtime/common/NoiseModel.h @@ -185,7 +185,7 @@ class kraus_channel { kraus_channel &operator=(const kraus_channel &other); /// @brief Return all kraus_ops in this channel - std::vector get_ops(); + std::vector get_ops() const; /// @brief Add a kraus_op to this channel. void push_back(kraus_op op); diff --git a/runtime/nvqir/cutensornet/CMakeLists.txt b/runtime/nvqir/cutensornet/CMakeLists.txt index 4e73dc9a32a..6490e16d36e 100644 --- a/runtime/nvqir/cutensornet/CMakeLists.txt +++ b/runtime/nvqir/cutensornet/CMakeLists.txt @@ -60,8 +60,8 @@ set(CUTENSORNET_PATCH ${CMAKE_MATCH_1}) set(CUTENSORNET_VERSION ${CUTENSORNET_MAJOR}.${CUTENSORNET_MINOR}.${CUTENSORNET_PATCH}) message(STATUS "Found cutensornet version: ${CUTENSORNET_VERSION}") -# We need cutensornet v2.5.0+ -if (${CUTENSORNET_VERSION} VERSION_GREATER_EQUAL "2.5") +# We need cutensornet v2.6.0+ (cutensornetStateApplyUnitaryChannel) +if (${CUTENSORNET_VERSION} VERSION_GREATER_EQUAL "2.6") set (BASE_TENSOR_BACKEND_SRS simulator_cutensornet.cpp tensornet_spin_op.cpp diff --git a/runtime/nvqir/cutensornet/simulator_cutensornet.cpp b/runtime/nvqir/cutensornet/simulator_cutensornet.cpp index f99e189fe4f..a6aaa7a81d4 100644 --- a/runtime/nvqir/cutensornet/simulator_cutensornet.cpp +++ b/runtime/nvqir/cutensornet/simulator_cutensornet.cpp @@ -11,6 +11,17 @@ #include "cutensornet.h" #include "tensornet_spin_op.h" +namespace { +const std::vector> matPauliI = { + {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}}; +const std::vector> matPauliX{ + {0.0, 0.0}, {1.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}}; +const std::vector> matPauliY{ + {0.0, 0.0}, {0.0, -1.0}, {0.0, 1.0}, {0.0, 0.0}}; +const std::vector> matPauliZ{ + {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {-1.0, 0.0}}; +} // namespace + namespace nvqir { SimulatorTensorNetBase::SimulatorTensorNetBase() @@ -137,6 +148,201 @@ void SimulatorTensorNetBase::applyGate(const GateApplicationTask &task) { } } +// Helper to check whether a matrix is a scaled unitary matrix, i.e., `k * U` +// where U is a unitary matrix. If so, it also returns the `k` factor. +// Otherwise, return a nullopt. +template +std::optional isScaledUnitary(const std::vector> &mat, + double eps) { + typedef Eigen::Matrix, Eigen::Dynamic, Eigen::Dynamic, + Eigen::RowMajor> + RowMajorMatTy; + const int dim = std::log2(mat.size()); + Eigen::Map kMat(mat.data(), dim, dim); + if (kMat.isZero()) + return std::nullopt; + // Check that (K_dag * K) is a scaled identity matrix + // i.e., the K matrix is a scaled unitary. + auto kdK = kMat.adjoint() * kMat; + if (!kdK.isDiagonal()) + return std::nullopt; + // First element + std::complex val = kdK(0, 0); + if (std::abs(val) > eps && std::abs(val.imag()) < eps) { + auto scaledKdK = (std::complex{1.0} / val) * kdK; + if (scaledKdK.isIdentity()) + return val.real(); + } + return std::nullopt; +} + +std::optional, + std::vector>>>> static computeUnitaryMixture(const std:: + vector>> + &krausOps, + double tol = 1e-6) { + std::vector probs; + std::vector>> mats; + const auto scaleMat = [](const std::vector> &mat, + double scaleFactor) { + std::vector> scaledMat = mat; + for (auto &x : scaledMat) + x /= scaleFactor; + return scaledMat; + }; + for (const auto &op : krausOps) { + const auto scaledFactor = isScaledUnitary(op, tol); + if (!scaledFactor.has_value()) + return std::nullopt; + probs.emplace_back(scaledFactor.value()); + mats.emplace_back(scaleMat(op, scaledFactor.value())); + } + + if (std::abs(1.0 - std::reduce(probs.begin(), probs.end())) > tol) + return std::nullopt; + + return std::make_pair(probs, mats); +} + +// Helper to look up a device memory pointer from a cache. +// If not found, allocate a new device memory buffer and put it to the cache. +static void * +getOrCacheMat(const std::string &key, + const std::vector> &mat, + std::unordered_map &gateDeviceMemCache) { + const auto iter = gateDeviceMemCache.find(key); + + if (iter == gateDeviceMemCache.end()) { + void *dMem = allocateGateMatrix(mat); + gateDeviceMemCache[key] = dMem; + return dMem; + } + return iter->second; +}; + +void SimulatorTensorNetBase::applyKrausChannel( + const std::vector &qubits, + const cudaq::kraus_channel &krausChannel) { + LOG_API_TIME(); + switch (krausChannel.noise_type) { + case cudaq::noise_model_type::depolarization_channel: { + if (krausChannel.parameters.size() != 1) + throw std::runtime_error( + fmt::format("Invalid parameters for a depolarization channel. " + "Expecting 1 parameter, got {}.", + krausChannel.parameters.size())); + const std::vector channelMats{ + getOrCacheMat("PauliI", matPauliI, m_gateDeviceMemCache), + getOrCacheMat("PauliX", matPauliX, m_gateDeviceMemCache), + getOrCacheMat("PauliY", matPauliY, m_gateDeviceMemCache), + getOrCacheMat("PauliZ", matPauliZ, m_gateDeviceMemCache)}; + const double p = krausChannel.parameters[0]; + const std::vector probabilities = {1 - p, p / 3., p / 3., p / 3.}; + m_state->applyUnitaryChannel(qubits, channelMats, probabilities); + break; + } + case cudaq::noise_model_type::bit_flip_channel: { + if (krausChannel.parameters.size() != 1) + throw std::runtime_error( + fmt::format("Invalid parameters for a bit-flip channel. " + "Expecting 1 parameter, got {}.", + krausChannel.parameters.size())); + + const std::vector channelMats{ + getOrCacheMat("PauliI", matPauliI, m_gateDeviceMemCache), + getOrCacheMat("PauliX", matPauliX, m_gateDeviceMemCache)}; + const double p = krausChannel.parameters[0]; + const std::vector probabilities = {1 - p, p}; + m_state->applyUnitaryChannel(qubits, channelMats, probabilities); + break; + } + case cudaq::noise_model_type::phase_flip_channel: { + if (krausChannel.parameters.size() != 1) + throw std::runtime_error( + fmt::format("Invalid parameters for a phase-flip channel. " + "Expecting 1 parameter, got {}.", + krausChannel.parameters.size())); + + const std::vector channelMats{ + getOrCacheMat("PauliI", matPauliI, m_gateDeviceMemCache), + getOrCacheMat("PauliZ", matPauliZ, m_gateDeviceMemCache)}; + const double p = krausChannel.parameters[0]; + const std::vector probabilities = {1 - p, p}; + m_state->applyUnitaryChannel(qubits, channelMats, probabilities); + break; + } + case cudaq::noise_model_type::amplitude_damping_channel: { + if (krausChannel.parameters.size() != 1) + throw std::runtime_error( + fmt::format("Invalid parameters for a amplitude damping channel. " + "Expecting 1 parameter, got {}.", + krausChannel.parameters.size())); + if (krausChannel.parameters[0] != 0.0) + throw std::runtime_error("Non-unitary noise channels are not supported."); + break; + } + case cudaq::noise_model_type::unknown: { + std::vector>> mats; + for (const auto &op : krausChannel.get_ops()) + mats.emplace_back(op.data); + auto asUnitaryMixture = computeUnitaryMixture(mats); + if (asUnitaryMixture.has_value()) { + auto &[probabilities, unitaries] = asUnitaryMixture.value(); + std::vector channelMats; + for (const auto &mat : unitaries) + channelMats.emplace_back(getOrCacheMat( + "ScaledUnitary_" + std::to_string(vecComplexHash(mat)), mat, + m_gateDeviceMemCache)); + m_state->applyUnitaryChannel(qubits, channelMats, probabilities); + } else { + throw std::runtime_error("Non-unitary noise channels are not supported."); + } + break; + } + default: + throw std::runtime_error( + "Unsupported noise model type: " + + std::to_string(static_cast(krausChannel.noise_type))); + } +} + +void SimulatorTensorNetBase::applyNoiseChannel( + const std::string_view gateName, const std::vector &controls, + const std::vector &targets, + const std::vector ¶ms) { + LOG_API_TIME(); + // Do nothing if no execution context + if (!executionContext) + return; + + // Do nothing if no noise model + if (!executionContext->noiseModel) + return; + + // Get the name as a string + std::string gName(gateName); + std::vector qubits{controls.begin(), controls.end()}; + qubits.insert(qubits.end(), targets.begin(), targets.end()); + + // Get the Kraus channels specified for this gate and qubits + auto krausChannels = executionContext->noiseModel->get_channels( + gName, targets, controls, params); + + // If none, do nothing + if (krausChannels.empty()) + return; + + cudaq::info( + "[SimulatorTensorNetBase] Applying {} kraus channels on qubits: {}", + krausChannels.size(), qubits); + + for (const auto &krausChannel : krausChannels) + applyKrausChannel(qubits, krausChannel); +} + /// @brief Reset the state of a given qubit to zero void SimulatorTensorNetBase::resetQubit(const std::size_t qubitIdx) { flushGateQueue(); @@ -225,10 +431,7 @@ cudaq::ExecutionResult SimulatorTensorNetBase::sample(const std::vector &measuredBits, const int shots) { LOG_API_TIME(); - std::vector measuredBitIds; - std::transform(measuredBits.begin(), measuredBits.end(), - std::back_inserter(measuredBitIds), - [](std::size_t idx) { return static_cast(idx); }); + std::vector measuredBitIds(measuredBits.begin(), measuredBits.end()); if (shots < 1) { cudaq::spin_op::spin_op_term allZTerm(2 * m_state->getNumQubits(), 0); for (const auto &m : measuredBits) @@ -239,7 +442,8 @@ SimulatorTensorNetBase::sample(const std::vector &measuredBits, } prepareQubitTensorState(); - const auto samples = m_state->sample(measuredBitIds, shots); + const auto samples = + m_state->sample(measuredBitIds, shots, requireCacheWorkspace()); cudaq::ExecutionResult counts(samples); double expVal = 0.0; // Compute the expectation value from the counts @@ -286,7 +490,8 @@ SimulatorTensorNetBase::observe(const cudaq::spin_op &ham) { // cutensornetNetworkOperator_t and compute the expectation value. TensorNetworkSpinOp spinOp(ham, m_cutnHandle); std::complex expVal = - m_state->computeExpVal(spinOp.getNetworkOperator()); + m_state->computeExpVal(spinOp.getNetworkOperator(), + this->executionContext->numberTrajectories); expVal += spinOp.getIdentityTermOffset(); return cudaq::observe_result(expVal.real(), ham, cudaq::sample_result(cudaq::ExecutionResult( @@ -316,7 +521,8 @@ SimulatorTensorNetBase::observe(const cudaq::spin_op &ham) { }); // Compute the expectation value for all terms - const auto termExpVals = m_state->computeExpVals(terms); + const auto termExpVals = m_state->computeExpVals( + terms, this->executionContext->numberTrajectories); std::complex expVal = 0.0; // Construct per-term data in the final observe_result std::vector results; diff --git a/runtime/nvqir/cutensornet/simulator_cutensornet.h b/runtime/nvqir/cutensornet/simulator_cutensornet.h index 8d11c88deed..c99912496c5 100644 --- a/runtime/nvqir/cutensornet/simulator_cutensornet.h +++ b/runtime/nvqir/cutensornet/simulator_cutensornet.h @@ -30,6 +30,12 @@ class SimulatorTensorNetBase : public nvqir::CircuitSimulatorBase { /// @brief Apply quantum gate void applyGate(const GateApplicationTask &task) override; + /// @brief Apply a noise channel + void applyNoiseChannel(const std::string_view gateName, + const std::vector &controls, + const std::vector &targets, + const std::vector ¶ms) override; + // Override base calculateStateDim (we don't instantiate full state vector in // the tensornet backend). When the user want to retrieve the state vector, we // check if it is feasible to do so. @@ -88,6 +94,15 @@ class SimulatorTensorNetBase : public nvqir::CircuitSimulatorBase { /// @brief Query if direct expectation value calculation is enabled virtual bool canHandleObserve() override; + /// @brief Return true if this simulator can use cache workspace (e.g., for + /// intermediate tensors) + virtual bool requireCacheWorkspace() const = 0; + +private: + // Helper to apply a Kraus channel + void applyKrausChannel(const std::vector &qubits, + const cudaq::kraus_channel &channel); + protected: cutensornetHandle_t m_cutnHandle; std::unique_ptr m_state; diff --git a/runtime/nvqir/cutensornet/simulator_mps_register.cpp b/runtime/nvqir/cutensornet/simulator_mps_register.cpp index d6830b90a45..57b7f090bef 100644 --- a/runtime/nvqir/cutensornet/simulator_mps_register.cpp +++ b/runtime/nvqir/cutensornet/simulator_mps_register.cpp @@ -153,6 +153,164 @@ class SimulatorMPS : public SimulatorTensorNetBase { SimulatorTensorNetBase::applyExpPauli(theta, controls, qubitIds, op); } + // Helper to compute expectation value from a bit string distribution + static double computeExpValFromDistribution( + const std::unordered_map &distribution, + int shots) { + double expVal = 0.0; + // Compute the expectation value from the distribution + for (auto &kv : distribution) { + auto par = cudaq::sample_result::has_even_parity(kv.first); + auto p = kv.second / (double)shots; + if (!par) { + p = -p; + } + expVal += p; + } + return expVal; + }; + + // Set up the MPS factorization before trajectory simulation run loop. + // We only need to do cutensornetStateFinalizeMPS once + void setUpFactorizeForTrajectoryRuns() { + for (auto &tensor : m_mpsTensors_d) { + HANDLE_CUDA_ERROR(cudaFree(tensor.deviceData)); + } + m_mpsTensors_d.clear(); + m_mpsTensors_d = + m_state->setupMPSFactorize(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); + } + + /// @brief Sample a subset of qubits + cudaq::ExecutionResult sample(const std::vector &measuredBits, + const int shots) override { + const bool hasNoise = executionContext && executionContext->noiseModel; + if (!hasNoise || shots < 1) + return SimulatorTensorNetBase::sample(measuredBits, shots); + + LOG_API_TIME(); + cudaq::ExecutionResult counts; + std::vector measuredBitIds(measuredBits.begin(), + measuredBits.end()); + + setUpFactorizeForTrajectoryRuns(); + std::map, std::pair> + samplerCache; + for (int i = 0; i < shots; ++i) { + // As the Kraus operator sampling may change the MPS state, we need to + // re-compute the factorization in each trajectory. + m_state->computeMPSFactorize(m_mpsTensors_d); + std::vector samplerKey; + for (const auto &tensor : m_mpsTensors_d) + samplerKey.insert(samplerKey.end(), tensor.extents.begin(), + tensor.extents.end()); + + auto iter = samplerCache.find(samplerKey); + if (iter == samplerCache.end()) { + const auto [itInsert, success] = samplerCache.insert( + {samplerKey, m_state->prepareSample(measuredBitIds)}); + assert(success); + iter = itInsert; + } + + assert(iter != samplerCache.end()); + auto &[sampler, workDesc] = iter->second; + const auto samples = m_state->executeSample( + sampler, workDesc, measuredBitIds, 1, requireCacheWorkspace()); + assert(samples.size() == 1); + for (const auto &[bitString, count] : samples) + counts.appendResult(bitString, count); + } + + for (const auto &[k, v] : samplerCache) { + auto &[sampler, workDesc] = v; + // Destroy the workspace descriptor + HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc)); + // Destroy the quantum circuit sampler + HANDLE_CUTN_ERROR(cutensornetDestroySampler(sampler)); + } + + counts.expectationValue = + computeExpValFromDistribution(counts.counts, shots); + + return counts; + } + + // Helper to prepare term-by-term data from a spin op + static std::tuple, + std::vector, + std::vector>> + prepareSpinOpTermData(const cudaq::spin_op &ham) { + std::vector termStrs; + std::vector terms; + std::vector> coeffs; + termStrs.reserve(ham.num_terms()); + terms.reserve(ham.num_terms()); + coeffs.reserve(ham.num_terms()); + + // Note: as the spin_op terms are stored as an unordered map, we need to + // iterate in one loop to collect all the data (string, symplectic data, and + // coefficient). + ham.for_each_term([&](cudaq::spin_op &term) { + termStrs.emplace_back(term.to_string(false)); + auto [symplecticRep, coeff] = term.get_raw_data(); + if (symplecticRep.size() != 1 || coeff.size() != 1) + throw std::runtime_error(fmt::format( + "Unexpected data encountered when iterating spin operator terms: " + "expecting a single term, got {} terms.", + symplecticRep.size())); + terms.emplace_back(symplecticRep[0]); + coeffs.emplace_back(coeff[0]); + }); + return std::make_tuple(termStrs, terms, coeffs); + } + + cudaq::observe_result observe(const cudaq::spin_op &ham) override { + LOG_API_TIME(); + const bool hasNoise = executionContext && executionContext->noiseModel; + // If no noise, just use base class implementation. + if (!hasNoise) + return SimulatorTensorNetBase::observe(ham); + + setUpFactorizeForTrajectoryRuns(); + const std::size_t numObserveTrajectories = + this->executionContext->numberTrajectories.has_value() + ? this->executionContext->numberTrajectories.value() + : TensorNetState::g_numberTrajectoriesForObserve; + + auto [termStrs, terms, coeffs] = prepareSpinOpTermData(ham); + std::vector> termExpVals(terms.size(), 0.0); + + for (std::size_t i = 0; i < numObserveTrajectories; ++i) { + // As the Kraus operator sampling may change the MPS state, we need to + // re-compute the factorization in each trajectory. + m_state->computeMPSFactorize(m_mpsTensors_d); + // We run a single trajectory for MPS as the final MPS form depends on the + // randomly-selected noise op. + const auto trajTermExpVals = m_state->computeExpVals(terms, 1); + + for (std::size_t idx = 0; idx < terms.size(); ++idx) { + termExpVals[idx] += (trajTermExpVals[idx] / + static_cast(numObserveTrajectories)); + } + } + std::complex expVal = 0.0; + // Construct per-term data in the final observe_result + std::vector results; + results.reserve(terms.size()); + + for (std::size_t i = 0; i < terms.size(); ++i) { + expVal += (coeffs[i] * termExpVals[i]); + results.emplace_back( + cudaq::ExecutionResult({}, termStrs[i], termExpVals[i].real())); + } + + cudaq::sample_result perTermData(expVal.real(), results); + return cudaq::observe_result(expVal.real(), ham, perTermData); + } + virtual std::string name() const override { return "tensornet-mps"; } CircuitSimulator *clone() override { @@ -248,6 +406,8 @@ class SimulatorMPS : public SimulatorTensorNetBase { m_cutnHandle, m_randomEngine); } + bool requireCacheWorkspace() const override { return false; } + virtual ~SimulatorMPS() noexcept { for (auto &tensor : m_mpsTensors_d) { HANDLE_CUDA_ERROR(cudaFree(tensor.deviceData)); diff --git a/runtime/nvqir/cutensornet/simulator_tensornet_register.cpp b/runtime/nvqir/cutensornet/simulator_tensornet_register.cpp index 132e80f3b4c..649a8775b2e 100644 --- a/runtime/nvqir/cutensornet/simulator_tensornet_register.cpp +++ b/runtime/nvqir/cutensornet/simulator_tensornet_register.cpp @@ -134,6 +134,7 @@ class SimulatorTensorNet : public SimulatorTensorNetBase { } } } + bool requireCacheWorkspace() const override { return true; } private: friend nvqir::CircuitSimulator * ::getCircuitSimulator_tensornet(); diff --git a/runtime/nvqir/cutensornet/tensornet_state.cpp b/runtime/nvqir/cutensornet/tensornet_state.cpp index c6e2b2d712c..1e6f83f8199 100644 --- a/runtime/nvqir/cutensornet/tensornet_state.cpp +++ b/runtime/nvqir/cutensornet/tensornet_state.cpp @@ -12,6 +12,27 @@ #include namespace nvqir { +std::int32_t TensorNetState::numHyperSamples = []() { + constexpr int32_t defaultNumHyperSamples = 8; + if (auto envVal = std::getenv("CUDAQ_TENSORNET_NUM_HYPER_SAMPLES")) { + int32_t specifiedNumHyperSamples = 0; + try { + specifiedNumHyperSamples = std::stoi(envVal); + if (specifiedNumHyperSamples <= 0) { + // for the 'catch' below to handle. + throw std::invalid_argument("must be a positive number"); + } + cudaq::info("Update number of hyper samples from {} to {}.", + defaultNumHyperSamples, specifiedNumHyperSamples); + return specifiedNumHyperSamples; + } catch (...) { + throw std::runtime_error( + "Invalid CUDAQ_TENSORNET_NUM_HYPER_SAMPLES environment " + "variable, must be a positive integer."); + } + } + return defaultNumHyperSamples; +}(); TensorNetState::TensorNetState(std::size_t numQubits, ScratchDeviceMem &inScratchPad, @@ -74,6 +95,21 @@ void TensorNetState::applyGate(const std::vector &controlQubits, controlQubits, adjoint, true}); } +void TensorNetState::applyUnitaryChannel( + const std::vector &qubits, const std::vector &krausOps, + const std::vector &probabilities) { + LOG_API_TIME(); + HANDLE_CUTN_ERROR(cutensornetStateApplyUnitaryChannel( + m_cutnHandle, m_quantumState, /*numStateModes=*/qubits.size(), + /*stateModes=*/qubits.data(), + /*numTensors=*/krausOps.size(), + /*tensorData=*/const_cast(krausOps.data()), + /*tensorModeStrides=*/nullptr, + /*probabilities=*/probabilities.data(), &m_tensorId)); + m_tensorOps.emplace_back(AppliedTensorOp{qubits, krausOps, probabilities}); + m_hasNoiseChannel = true; +} + void TensorNetState::applyQubitProjector(void *proj_d, const std::vector &qubitIdx) { LOG_API_TIME(); @@ -99,23 +135,38 @@ void TensorNetState::addQubits(std::size_t numQubits) { // the new wires are all empty (zero state). int64_t tensorId = 0; for (auto &op : m_tensorOps) - if (op.controlQubitIds.empty()) { - HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator( - m_cutnHandle, m_quantumState, op.targetQubitIds.size(), - op.targetQubitIds.data(), op.deviceData, nullptr, /*immutable*/ 1, - /*adjoint*/ static_cast(op.isAdjoint), - /*unitary*/ static_cast(op.isUnitary), &tensorId)); - } else { - HANDLE_CUTN_ERROR(cutensornetStateApplyControlledTensorOperator( + if (op.deviceData) { + if (op.controlQubitIds.empty()) { + HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator( + m_cutnHandle, m_quantumState, op.targetQubitIds.size(), + op.targetQubitIds.data(), op.deviceData, nullptr, /*immutable*/ 1, + /*adjoint*/ static_cast(op.isAdjoint), + /*unitary*/ static_cast(op.isUnitary), &tensorId)); + } else { + HANDLE_CUTN_ERROR(cutensornetStateApplyControlledTensorOperator( + m_cutnHandle, m_quantumState, + /*numControlModes=*/op.controlQubitIds.size(), + /*stateControlModes=*/op.controlQubitIds.data(), + /*stateControlValues=*/nullptr, + /*numTargetModes*/ op.targetQubitIds.size(), + /*stateTargetModes*/ op.targetQubitIds.data(), op.deviceData, + nullptr, + /*immutable*/ 1, + /*adjoint*/ static_cast(op.isAdjoint), + /*unitary*/ static_cast(op.isUnitary), &m_tensorId)); + } + } else if (op.unitaryChannel.has_value()) { + HANDLE_CUTN_ERROR(cutensornetStateApplyUnitaryChannel( m_cutnHandle, m_quantumState, - /*numControlModes=*/op.controlQubitIds.size(), - /*stateControlModes=*/op.controlQubitIds.data(), - /*stateControlValues=*/nullptr, - /*numTargetModes*/ op.targetQubitIds.size(), - /*stateTargetModes*/ op.targetQubitIds.data(), op.deviceData, nullptr, - /*immutable*/ 1, - /*adjoint*/ static_cast(op.isAdjoint), - /*unitary*/ static_cast(op.isUnitary), &m_tensorId)); + /*numStateModes=*/op.targetQubitIds.size(), + /*stateModes=*/op.targetQubitIds.data(), + /*numTensors=*/op.unitaryChannel->tensorData.size(), + /*tensorData=*/op.unitaryChannel->tensorData.data(), + /*tensorModeStrides=*/nullptr, + /*probabilities=*/op.unitaryChannel->probabilities.data(), + &m_tensorId)); + } else { + throw std::runtime_error("Invalid AppliedTensorOp encountered."); } } @@ -148,9 +199,8 @@ void TensorNetState::addQubits(std::span> stateVec) { m_tempDevicePtrs.emplace_back(d_proj); } -std::unordered_map -TensorNetState::sample(const std::vector &measuredBitIds, - int32_t shots) { +std::pair +TensorNetState::prepareSample(const std::vector &measuredBitIds) { LOG_API_TIME(); // Create the quantum circuit sampler cutensornetStateSampler_t sampler; @@ -161,14 +211,10 @@ TensorNetState::sample(const std::vector &measuredBitIds, measuredBitIds.data(), &sampler)); } - // Configure the quantum circuit sampler - constexpr int32_t numHyperSamples = - 8; // desired number of hyper samples used in the tensor network - // contraction path finder { ScopedTraceWithContext("cutensornetSamplerConfigure"); HANDLE_CUTN_ERROR(cutensornetSamplerConfigure( - m_cutnHandle, sampler, CUTENSORNET_SAMPLER_OPT_NUM_HYPER_SAMPLES, + m_cutnHandle, sampler, CUTENSORNET_SAMPLER_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples))); // Generate a random seed from the backend simulator's random engine. @@ -211,9 +257,47 @@ TensorNetState::sample(const std::vector &measuredBitIds, throw std::runtime_error("ERROR: Insufficient workspace size on Device!"); } + return std::make_pair(sampler, workDesc); +} + +std::unordered_map +TensorNetState::executeSample(cutensornetStateSampler_t &sampler, + cutensornetWorkspaceDescriptor_t &workDesc, + const std::vector &measuredBitIds, + int32_t shots, bool enableCacheWorkspace) { + int64_t reqCacheSize{0}; + void *d_cache{nullptr}; + if (enableCacheWorkspace) { + ScopedTraceWithContext("Allocate Cache Workspace"); + HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize( + m_cutnHandle, workDesc, CUTENSORNET_WORKSIZE_PREF_RECOMMENDED, + CUTENSORNET_MEMSPACE_DEVICE, CUTENSORNET_WORKSPACE_CACHE, + &reqCacheSize)); + + // Query the GPU memory capacity + std::size_t freeSize{0}, totalSize{0}; + HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize)); + // Compute the minimum of [required size, or 90% of the free memory (to + // avoid oversubscribing)] (see cutensornet examples) + const std::size_t cacheSizeAvailable = + std::min(static_cast(reqCacheSize), + size_t(freeSize * 0.9) - (size_t(freeSize * 0.9) % 4096)); + cudaq::info("Cache size = {} bytes", cacheSizeAvailable); + const auto errCode = cudaMalloc(&d_cache, cacheSizeAvailable); + if (errCode == cudaSuccess) { + HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory( + m_cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE, + CUTENSORNET_WORKSPACE_CACHE, d_cache, cacheSizeAvailable)); + } else { + cudaq::info("Failed to allocate cache workspace memory."); + d_cache = nullptr; + } + } // Sample the quantum circuit state std::unordered_map counts; - constexpr int MAX_SHOTS_PER_RUNS = 10000; + // If this is a trajectory simulation, each shot needs an independent + // trajectory sampling. + const int MAX_SHOTS_PER_RUNS = m_hasNoiseChannel ? 1 : 10000; int shotsToRun = shots; while (shotsToRun > 0) { const int numShots = std::min(shotsToRun, MAX_SHOTS_PER_RUNS); @@ -236,6 +320,19 @@ TensorNetState::sample(const std::vector &measuredBitIds, shotsToRun -= numShots; } + if (enableCacheWorkspace && d_cache) { + HANDLE_CUDA_ERROR(cudaFree(d_cache)); + } + return counts; +} + +std::unordered_map +TensorNetState::sample(const std::vector &measuredBitIds, + int32_t shots, bool enableCacheWorkspace) { + LOG_API_TIME(); + auto [sampler, workDesc] = prepareSample(measuredBitIds); + std::unordered_map counts = executeSample( + sampler, workDesc, measuredBitIds, shots, enableCacheWorkspace); // Destroy the workspace descriptor HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc)); // Destroy the quantum circuit sampler @@ -270,13 +367,10 @@ std::pair TensorNetState::contractStateVectorInternal( projectedModes.data(), nullptr, &accessor)); } - const int32_t numHyperSamples = - 8; // desired number of hyper samples used in the tensor network - // contraction path finder { ScopedTraceWithContext("cutensornetAccessorConfigure"); HANDLE_CUTN_ERROR(cutensornetAccessorConfigure( - m_cutnHandle, accessor, CUTENSORNET_ACCESSOR_OPT_NUM_HYPER_SAMPLES, + m_cutnHandle, accessor, CUTENSORNET_ACCESSOR_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples))); } // Prepare the quantum state amplitudes accessor @@ -327,100 +421,10 @@ std::pair TensorNetState::contractStateVectorInternal( return std::make_pair(d_sv, svDim); } -std::vector> TensorNetState::getStateVector( - const std::vector &projectedModes, - const std::vector &projectedModeValues) { - auto [d_sv, svDim] = - contractStateVectorInternal(projectedModes, projectedModeValues); - std::vector> h_sv(svDim); - HANDLE_CUDA_ERROR(cudaMemcpy(h_sv.data(), d_sv, - svDim * sizeof(std::complex), - cudaMemcpyDeviceToHost)); - // Free resources - HANDLE_CUDA_ERROR(cudaFree(d_sv)); - - return h_sv; -} - -std::vector> -TensorNetState::computeRDM(const std::vector &qubits) { - // Make sure that we don't overflow the memory size calculation. - // Note: the actual limitation will depend on the system memory. - if (qubits.size() >= 32 || - (1ull << (2 * qubits.size())) > - std::numeric_limits::max() / sizeof(std::complex)) - throw std::runtime_error("Too many qubits are requested for reduced " - "density matrix contraction."); - LOG_API_TIME(); - void *d_rdm{nullptr}; - const uint64_t rdmSize = 1ull << (2 * qubits.size()); - const uint64_t rdmSizeBytes = rdmSize * sizeof(std::complex); - HANDLE_CUDA_ERROR(cudaMalloc(&d_rdm, rdmSizeBytes)); - - cutensornetStateMarginal_t marginal; - { - ScopedTraceWithContext("cutensornetCreateMarginal"); - HANDLE_CUTN_ERROR(cutensornetCreateMarginal( - m_cutnHandle, m_quantumState, qubits.size(), qubits.data(), - /*numProjectedModes*/ 0, /*projectedModes*/ nullptr, - /*marginalTensorStrides*/ nullptr, &marginal)); - } - - const int32_t numHyperSamples = - 8; // desired number of hyper samples used in the tensor network - // contraction path finder - { - ScopedTraceWithContext("cutensornetMarginalConfigure"); - HANDLE_CUTN_ERROR(cutensornetMarginalConfigure( - m_cutnHandle, marginal, CUTENSORNET_MARGINAL_OPT_NUM_HYPER_SAMPLES, - &numHyperSamples, sizeof(numHyperSamples))); - } - - // Prepare the specified quantum circuit reduced density matrix (marginal) - cutensornetWorkspaceDescriptor_t workDesc; - HANDLE_CUTN_ERROR( - cutensornetCreateWorkspaceDescriptor(m_cutnHandle, &workDesc)); - { - ScopedTraceWithContext("cutensornetMarginalPrepare"); - HANDLE_CUTN_ERROR(cutensornetMarginalPrepare( - m_cutnHandle, marginal, scratchPad.scratchSize, workDesc, 0)); - } - // Attach the workspace buffer - int64_t worksize{0}; - HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize( - m_cutnHandle, workDesc, CUTENSORNET_WORKSIZE_PREF_RECOMMENDED, - CUTENSORNET_MEMSPACE_DEVICE, CUTENSORNET_WORKSPACE_SCRATCH, &worksize)); - if (worksize <= static_cast(scratchPad.scratchSize)) { - HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory( - m_cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE, - CUTENSORNET_WORKSPACE_SCRATCH, scratchPad.d_scratch, worksize)); - } else { - throw std::runtime_error("ERROR: Insufficient workspace size on Device!"); - } - { - ScopedTraceWithContext("cutensornetMarginalCompute"); - // Compute the specified quantum circuit reduced density matrix (marginal) - HANDLE_CUTN_ERROR(cutensornetMarginalCompute(m_cutnHandle, marginal, - nullptr, workDesc, d_rdm, 0)); - } - std::vector> h_rdm(rdmSize); - HANDLE_CUDA_ERROR( - cudaMemcpy(h_rdm.data(), d_rdm, rdmSizeBytes, cudaMemcpyDeviceToHost)); - - // Clean up - HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc)); - HANDLE_CUTN_ERROR(cutensornetDestroyMarginal(marginal)); - HANDLE_CUDA_ERROR(cudaFree(d_rdm)); - - return h_rdm; -} - -// Returns MPS tensors (device mems) -// Note: user needs to clean up these tensors std::vector -TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, - double relCutoff, - cutensornetTensorSVDAlgo_t algo) { +TensorNetState::setupMPSFactorize(int64_t maxExtent, double absCutoff, + double relCutoff, + cutensornetTensorSVDAlgo_t algo) { LOG_API_TIME(); if (m_numQubits == 0) return {}; @@ -430,18 +434,10 @@ TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, tensor.extents = {2}; HANDLE_CUDA_ERROR( cudaMalloc(&tensor.deviceData, 2 * sizeof(std::complex))); - // Just contract all the gates to the tensor. - // Note: if none gates, don't call `getStateVector`, which performs a - // contraction (`Flop count is zero` error). - const std::vector> stateVec = - isDirty() ? getStateVector() - : std::vector>{1.0, 0.0}; - assert(stateVec.size() == 2); - HANDLE_CUDA_ERROR(cudaMemcpy(tensor.deviceData, stateVec.data(), - 2 * sizeof(std::complex), - cudaMemcpyHostToDevice)); + return {tensor}; } + std::vector mpsTensors(m_numQubits); std::vector extentsPtr(m_numQubits); for (std::size_t i = 0; i < m_numQubits; ++i) { @@ -480,6 +476,35 @@ TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, HANDLE_CUTN_ERROR(cutensornetStateConfigure( m_cutnHandle, m_quantumState, CUTENSORNET_STATE_CONFIG_MPS_SVD_REL_CUTOFF, &relCutoff, sizeof(relCutoff))); + return mpsTensors; +} + +void TensorNetState::computeMPSFactorize(std::vector &mpsTensors) { + LOG_API_TIME(); + if (mpsTensors.empty()) + return; + if (mpsTensors.size() == 1) { + MPSTensor &tensor = mpsTensors[0]; + // Just contract all the gates to the tensor. + // Note: if none gates, don't call `getStateVector`, which performs a + // contraction (`Flop count is zero` error). + const std::vector> stateVec = + isDirty() ? getStateVector() + : std::vector>{1.0, 0.0}; + assert(stateVec.size() == 2); + HANDLE_CUDA_ERROR(cudaMemcpy(tensor.deviceData, stateVec.data(), + 2 * sizeof(std::complex), + cudaMemcpyHostToDevice)); + return; + } + + std::vector extentsPtr(mpsTensors.size()); + std::vector allData(mpsTensors.size()); + for (std::size_t i = 0; auto &tensor : mpsTensors) { + allData[i] = tensor.deviceData; + extentsPtr[i] = tensor.extents.data(); + ++i; + } // Prepare the MPS computation and attach workspace cutensornetWorkspaceDescriptor_t workDesc; @@ -522,9 +547,6 @@ TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, m_cutnHandle, workDesc, CUTENSORNET_MEMSPACE_HOST, CUTENSORNET_WORKSPACE_SCRATCH, hostWork, hostWorkspaceSize)); - std::vector allData(m_numQubits); - for (std::size_t i = 0; auto &tensor : mpsTensors) - allData[i++] = tensor.deviceData; { ScopedTraceWithContext("cutensornetStateCompute"); // Execute MPS computation @@ -533,14 +555,110 @@ TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, /*strides=*/nullptr, allData.data(), 0)); } - if (hostWork) { + if (hostWork) free(hostWork); +} + +std::vector> TensorNetState::getStateVector( + const std::vector &projectedModes, + const std::vector &projectedModeValues) { + auto [d_sv, svDim] = + contractStateVectorInternal(projectedModes, projectedModeValues); + std::vector> h_sv(svDim); + HANDLE_CUDA_ERROR(cudaMemcpy(h_sv.data(), d_sv, + svDim * sizeof(std::complex), + cudaMemcpyDeviceToHost)); + // Free resources + HANDLE_CUDA_ERROR(cudaFree(d_sv)); + + return h_sv; +} + +std::vector> +TensorNetState::computeRDM(const std::vector &qubits) { + // Make sure that we don't overflow the memory size calculation. + // Note: the actual limitation will depend on the system memory. + if (qubits.size() >= 32 || + (1ull << (2 * qubits.size())) > + std::numeric_limits::max() / sizeof(std::complex)) + throw std::runtime_error("Too many qubits are requested for reduced " + "density matrix contraction."); + LOG_API_TIME(); + void *d_rdm{nullptr}; + const uint64_t rdmSize = 1ull << (2 * qubits.size()); + const uint64_t rdmSizeBytes = rdmSize * sizeof(std::complex); + HANDLE_CUDA_ERROR(cudaMalloc(&d_rdm, rdmSizeBytes)); + + cutensornetStateMarginal_t marginal; + { + ScopedTraceWithContext("cutensornetCreateMarginal"); + HANDLE_CUTN_ERROR(cutensornetCreateMarginal( + m_cutnHandle, m_quantumState, qubits.size(), qubits.data(), + /*numProjectedModes*/ 0, /*projectedModes*/ nullptr, + /*marginalTensorStrides*/ nullptr, &marginal)); + } + + { + ScopedTraceWithContext("cutensornetMarginalConfigure"); + HANDLE_CUTN_ERROR(cutensornetMarginalConfigure( + m_cutnHandle, marginal, CUTENSORNET_MARGINAL_CONFIG_NUM_HYPER_SAMPLES, + &numHyperSamples, sizeof(numHyperSamples))); + } + + // Prepare the specified quantum circuit reduced density matrix (marginal) + cutensornetWorkspaceDescriptor_t workDesc; + HANDLE_CUTN_ERROR( + cutensornetCreateWorkspaceDescriptor(m_cutnHandle, &workDesc)); + { + ScopedTraceWithContext("cutensornetMarginalPrepare"); + HANDLE_CUTN_ERROR(cutensornetMarginalPrepare( + m_cutnHandle, marginal, scratchPad.scratchSize, workDesc, 0)); + } + // Attach the workspace buffer + int64_t worksize{0}; + HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize( + m_cutnHandle, workDesc, CUTENSORNET_WORKSIZE_PREF_RECOMMENDED, + CUTENSORNET_MEMSPACE_DEVICE, CUTENSORNET_WORKSPACE_SCRATCH, &worksize)); + if (worksize <= static_cast(scratchPad.scratchSize)) { + HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory( + m_cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE, + CUTENSORNET_WORKSPACE_SCRATCH, scratchPad.d_scratch, worksize)); + } else { + throw std::runtime_error("ERROR: Insufficient workspace size on Device!"); + } + { + ScopedTraceWithContext("cutensornetMarginalCompute"); + // Compute the specified quantum circuit reduced density matrix (marginal) + HANDLE_CUTN_ERROR(cutensornetMarginalCompute(m_cutnHandle, marginal, + nullptr, workDesc, d_rdm, 0)); } + std::vector> h_rdm(rdmSize); + HANDLE_CUDA_ERROR( + cudaMemcpy(h_rdm.data(), d_rdm, rdmSizeBytes, cudaMemcpyDeviceToHost)); + + // Clean up + HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc)); + HANDLE_CUTN_ERROR(cutensornetDestroyMarginal(marginal)); + HANDLE_CUDA_ERROR(cudaFree(d_rdm)); + + return h_rdm; +} + +// Returns MPS tensors (device mems) +// Note: user needs to clean up these tensors +std::vector +TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, + double relCutoff, + cutensornetTensorSVDAlgo_t algo) { + LOG_API_TIME(); + auto mpsTensors = setupMPSFactorize(maxExtent, absCutoff, relCutoff, algo); + computeMPSFactorize(mpsTensors); return mpsTensors; } std::vector> TensorNetState::computeExpVals( - const std::vector> &symplecticRepr) { + const std::vector> &symplecticRepr, + const std::optional &numberTrajectories) { LOG_API_TIME(); if (symplecticRepr.empty()) return {}; @@ -607,16 +725,11 @@ std::vector> TensorNetState::computeExpVals( &tensorNetworkExpectation)); } // Step 2: configure - // Note: as we reuse this path across many contractions, use a higher number - // of hyper samples. - const int32_t numHyperSamples = - 512; // desired number of hyper samples used in the tensor network - // contraction path finder { ScopedTraceWithContext("cutensornetExpectationConfigure"); HANDLE_CUTN_ERROR(cutensornetExpectationConfigure( m_cutnHandle, tensorNetworkExpectation, - CUTENSORNET_EXPECTATION_OPT_NUM_HYPER_SAMPLES, &numHyperSamples, + CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples))); } @@ -646,6 +759,13 @@ std::vector> TensorNetState::computeExpVals( } // Step 4: Compute + const std::size_t numObserveTrajectories = [&]() -> std::size_t { + if (!m_hasNoiseChannel) + return 1; + if (numberTrajectories.has_value()) + return numberTrajectories.value(); + return g_numberTrajectoriesForObserve; + }(); for (const auto &term : symplecticRepr) { bool allIdOps = true; for (std::size_t i = 0; i < numQubits; ++i) { @@ -680,13 +800,14 @@ std::vector> TensorNetState::computeExpVals( HANDLE_CUDA_ERROR(cudaMemcpy(pauliMats_d, pauliMats_h, placeHolderArraySize, cudaMemcpyHostToDevice)); - std::complex expVal; - { + std::complex expVal = 0.0; + for (std::size_t trajId = 0; trajId < numObserveTrajectories; ++trajId) { + std::complex result; ScopedTraceWithContext("cutensornetExpectationCompute"); HANDLE_CUTN_ERROR(cutensornetExpectationCompute( - m_cutnHandle, tensorNetworkExpectation, workDesc, &expVal, - /*stateNorm*/ nullptr, + m_cutnHandle, tensorNetworkExpectation, workDesc, &result, nullptr, /*cudaStream*/ 0)); + expVal += (result / static_cast(numObserveTrajectories)); } allExpVals.emplace_back(expVal); } @@ -699,7 +820,8 @@ std::vector> TensorNetState::computeExpVals( } std::complex TensorNetState::computeExpVal( - cutensornetNetworkOperator_t tensorNetworkOperator) { + cutensornetNetworkOperator_t tensorNetworkOperator, + const std::optional &numberTrajectories) { LOG_API_TIME(); cutensornetStateExpectation_t tensorNetworkExpectation; // Step 1: create @@ -710,14 +832,11 @@ std::complex TensorNetState::computeExpVal( &tensorNetworkExpectation)); } // Step 2: configure - const int32_t numHyperSamples = - 8; // desired number of hyper samples used in the tensor network - // contraction path finder { ScopedTraceWithContext("cutensornetExpectationConfigure"); HANDLE_CUTN_ERROR(cutensornetExpectationConfigure( m_cutnHandle, tensorNetworkExpectation, - CUTENSORNET_EXPECTATION_OPT_NUM_HYPER_SAMPLES, &numHyperSamples, + CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples))); } @@ -746,14 +865,23 @@ std::complex TensorNetState::computeExpVal( } // Step 4: Compute - std::complex expVal; - - { + const std::size_t numObserveTrajectories = [&]() -> std::size_t { + if (!m_hasNoiseChannel) + return 1; + if (numberTrajectories.has_value()) + return numberTrajectories.value(); + return g_numberTrajectoriesForObserve; + }(); + + std::complex expVal = 0.0; + for (std::size_t trajId = 0; trajId < numObserveTrajectories; ++trajId) { + std::complex result; ScopedTraceWithContext("cutensornetExpectationCompute"); HANDLE_CUTN_ERROR(cutensornetExpectationCompute( - m_cutnHandle, tensorNetworkExpectation, workDesc, &expVal, + m_cutnHandle, tensorNetworkExpectation, workDesc, &result, /*stateNorm*/ nullptr, /*cudaStream*/ 0)); + expVal += (result / static_cast(numObserveTrajectories)); } // Step 5: clean up HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(tensorNetworkExpectation)); @@ -814,6 +942,55 @@ TensorNetState::reverseQubitOrder(std::span> stateVec) { return ket; } +void TensorNetState::applyCachedOps() { + int64_t tensorId = 0; + for (auto &op : m_tensorOps) + if (op.deviceData) { + if (op.controlQubitIds.empty()) { + HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator( + m_cutnHandle, m_quantumState, op.targetQubitIds.size(), + op.targetQubitIds.data(), op.deviceData, nullptr, /*immutable*/ 1, + /*adjoint*/ static_cast(op.isAdjoint), + /*unitary*/ static_cast(op.isUnitary), &tensorId)); + } else { + HANDLE_CUTN_ERROR(cutensornetStateApplyControlledTensorOperator( + m_cutnHandle, m_quantumState, + /*numControlModes=*/op.controlQubitIds.size(), + /*stateControlModes=*/op.controlQubitIds.data(), + /*stateControlValues=*/nullptr, + /*numTargetModes*/ op.targetQubitIds.size(), + /*stateTargetModes*/ op.targetQubitIds.data(), op.deviceData, + nullptr, + /*immutable*/ 1, + /*adjoint*/ static_cast(op.isAdjoint), + /*unitary*/ static_cast(op.isUnitary), &m_tensorId)); + } + } else if (op.unitaryChannel.has_value()) { + HANDLE_CUTN_ERROR(cutensornetStateApplyUnitaryChannel( + m_cutnHandle, m_quantumState, + /*numStateModes=*/op.targetQubitIds.size(), + /*stateModes=*/op.targetQubitIds.data(), + /*numTensors=*/op.unitaryChannel->tensorData.size(), + /*tensorData=*/op.unitaryChannel->tensorData.data(), + /*tensorModeStrides=*/nullptr, + /*probabilities=*/op.unitaryChannel->probabilities.data(), + &m_tensorId)); + } else { + throw std::runtime_error("Invalid AppliedTensorOp encountered."); + } +} + +void TensorNetState::setZeroState() { + LOG_API_TIME(); + // Destroy the current quantum circuit state + HANDLE_CUTN_ERROR(cutensornetDestroyState(m_quantumState)); + const std::vector qubitDims(m_numQubits, 2); + // Re-create the state + HANDLE_CUTN_ERROR(cutensornetCreateState( + m_cutnHandle, CUTENSORNET_STATE_PURITY_PURE, m_numQubits, + qubitDims.data(), CUDA_C_64F, &m_quantumState)); +} + std::unique_ptr TensorNetState::createFromStateVector( std::span> stateVec, ScratchDeviceMem &inScratchPad, cutensornetHandle_t handle, std::mt19937 &randomEngine) { diff --git a/runtime/nvqir/cutensornet/tensornet_state.h b/runtime/nvqir/cutensornet/tensornet_state.h index c680fd5c677..5400b0565f8 100644 --- a/runtime/nvqir/cutensornet/tensornet_state.h +++ b/runtime/nvqir/cutensornet/tensornet_state.h @@ -12,6 +12,7 @@ #include "cutensornet.h" #include "tensornet_utils.h" #include "timing_utils.h" +#include #include #include @@ -28,13 +29,31 @@ struct MPSTensor { std::vector extents; }; +struct UnitaryChannel { + std::vector tensorData; + std::vector probabilities; +}; + /// Track gate tensors that were appended to the tensor network. struct AppliedTensorOp { void *deviceData = nullptr; + std::optional unitaryChannel; std::vector targetQubitIds; std::vector controlQubitIds; bool isAdjoint; bool isUnitary; + AppliedTensorOp(void *dataPtr, const std::vector &targetQubits, + const std::vector &controlQubits, bool adjoint, + bool unitary) + : deviceData(dataPtr), targetQubitIds(targetQubits), + controlQubitIds(controlQubits), isAdjoint(adjoint), isUnitary(unitary) { + } + + AppliedTensorOp(const std::vector &qubits, + const std::vector &krausOps, + const std::vector &probabilities) + : targetQubitIds(qubits), + unitaryChannel(UnitaryChannel(krausOps, probabilities)) {} }; /// @brief Wrapper of cutensornetState_t to provide convenient API's for CUDA-Q @@ -56,8 +75,13 @@ class TensorNetState { // This is a reference to the backend random number generator, which can be // reseeded by users. std::mt19937 &m_randomEngine; + bool m_hasNoiseChannel = false; public: + // The number of hyper samples used in the tensor network contraction path + // finder + static std::int32_t numHyperSamples; + /// @brief Constructor TensorNetState(std::size_t numQubits, ScratchDeviceMem &inScratchPad, cutensornetHandle_t handle, std::mt19937 &randomEngine); @@ -69,6 +93,10 @@ class TensorNetState { std::unique_ptr clone() const; + /// Default number of trajectories for observe (with noise) in case no option + /// is provided. + static inline constexpr int g_numberTrajectoriesForObserve = 1000; + /// Reconstruct/initialize a state from MPS tensors static std::unique_ptr createFromMpsTensors(const std::vector &mpsTensors, @@ -101,6 +129,11 @@ class TensorNetState { const std::vector &targetQubits, void *gateDeviceMem, bool adjoint = false); + /// @brief Apply a unitary channel + void applyUnitaryChannel(const std::vector &qubits, + const std::vector &krausOps, + const std::vector &probabilities); + /// @brief Apply a projector matrix (non-unitary) /// @param proj_d Projector matrix (expected a 2x2 matrix in column major) /// @param qubitIdx Qubit operand @@ -122,7 +155,8 @@ class TensorNetState { /// @brief Perform measurement sampling on the quantum state. std::unordered_map - sample(const std::vector &measuredBitIds, int32_t shots); + sample(const std::vector &measuredBitIds, int32_t shots, + bool enableCacheWorkspace); /// @brief Contract the tensor network representation to retrieve the state /// vector. @@ -149,12 +183,14 @@ class TensorNetState { /// @param symplecticRepr The symplectic representation of the observable /// @return std::vector> - computeExpVals(const std::vector> &symplecticRepr); + computeExpVals(const std::vector> &symplecticRepr, + const std::optional &numberTrajectories); /// @brief Evaluate the expectation value of a given /// `cutensornetNetworkOperator_t` std::complex - computeExpVal(cutensornetNetworkOperator_t tensorNetworkOperator); + computeExpVal(cutensornetNetworkOperator_t tensorNetworkOperator, + const std::optional &numberTrajectories); /// @brief Number of qubits that this state represents. std::size_t getNumQubits() const { return m_numQubits; } @@ -167,6 +203,12 @@ class TensorNetState { static std::vector> reverseQubitOrder(std::span> stateVec); + /// @brief Apply all the cached ops to the state. + void applyCachedOps(); + + /// @brief Set the state to a zero state + void setZeroState(); + /// @brief Destructor ~TensorNetState(); @@ -178,5 +220,24 @@ class TensorNetState { std::pair contractStateVectorInternal( const std::vector &projectedModes, const std::vector &projectedModeValues = {}); + + /// Internal methods to perform MPS factorize. + // Note: `factorizeMPS` is an end-to-end API for factorization. + // This factorization can be split into `cutensornetStateFinalizeMPS` and + // `cutensornetStateCompute` to facilitate reuse. + std::vector setupMPSFactorize(int64_t maxExtent, double absCutoff, + double relCutoff, + cutensornetTensorSVDAlgo_t algo); + void computeMPSFactorize(std::vector &mpsTensors); + + /// Internal methods for sampling + std::pair + prepareSample(const std::vector &measuredBitIds); + + std::unordered_map + executeSample(cutensornetStateSampler_t &sampler, + cutensornetWorkspaceDescriptor_t &workspaceDesc, + const std::vector &measuredBitIds, int32_t shots, + bool enableCacheWorkspace); }; } // namespace nvqir diff --git a/runtime/nvqir/cutensornet/tn_simulation_state.cpp b/runtime/nvqir/cutensornet/tn_simulation_state.cpp index 0f4a065101d..baf18a163cf 100644 --- a/runtime/nvqir/cutensornet/tn_simulation_state.cpp +++ b/runtime/nvqir/cutensornet/tn_simulation_state.cpp @@ -75,23 +75,38 @@ TensorNetSimulationState::overlap(const cudaq::SimulationState &other) { allTensorOps.insert(allTensorOps.end(), tensorOps.begin(), tensorOps.end()); for (auto &op : allTensorOps) { - if (op.controlQubitIds.empty()) { - HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator( - cutnHandle, tempQuantumState, op.targetQubitIds.size(), - op.targetQubitIds.data(), op.deviceData, nullptr, /*immutable*/ 1, - /*adjoint*/ static_cast(op.isAdjoint), - /*unitary*/ static_cast(op.isUnitary), &tensorId)); - } else { - HANDLE_CUTN_ERROR(cutensornetStateApplyControlledTensorOperator( + if (op.deviceData) { + if (op.controlQubitIds.empty()) { + HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator( + cutnHandle, tempQuantumState, op.targetQubitIds.size(), + op.targetQubitIds.data(), op.deviceData, nullptr, /*immutable*/ 1, + /*adjoint*/ static_cast(op.isAdjoint), + /*unitary*/ static_cast(op.isUnitary), &tensorId)); + } else { + HANDLE_CUTN_ERROR(cutensornetStateApplyControlledTensorOperator( + cutnHandle, tempQuantumState, + /*numControlModes=*/op.controlQubitIds.size(), + /*stateControlModes=*/op.controlQubitIds.data(), + /*stateControlValues=*/nullptr, + /*numTargetModes*/ op.targetQubitIds.size(), + /*stateTargetModes*/ op.targetQubitIds.data(), op.deviceData, + nullptr, + /*immutable*/ 1, + /*adjoint*/ static_cast(op.isAdjoint), + /*unitary*/ static_cast(op.isUnitary), &tensorId)); + } + } else if (op.unitaryChannel.has_value()) { + HANDLE_CUTN_ERROR(cutensornetStateApplyUnitaryChannel( cutnHandle, tempQuantumState, - /*numControlModes=*/op.controlQubitIds.size(), - /*stateControlModes=*/op.controlQubitIds.data(), - /*stateControlValues=*/nullptr, - /*numTargetModes*/ op.targetQubitIds.size(), - /*stateTargetModes*/ op.targetQubitIds.data(), op.deviceData, nullptr, - /*immutable*/ 1, - /*adjoint*/ static_cast(op.isAdjoint), - /*unitary*/ static_cast(op.isUnitary), &tensorId)); + /*numStateModes=*/op.targetQubitIds.size(), + /*stateModes=*/op.targetQubitIds.data(), + /*numTensors=*/op.unitaryChannel->tensorData.size(), + /*tensorData=*/op.unitaryChannel->tensorData.data(), + /*tensorModeStrides=*/nullptr, + /*probabilities=*/op.unitaryChannel->probabilities.data(), + &tensorId)); + } else { + throw std::runtime_error("Invalid AppliedTensorOp encountered."); } } @@ -110,13 +125,11 @@ TensorNetSimulationState::overlap(const cudaq::SimulationState &other) { projectedModes.data(), nullptr, &accessor)); } - const int32_t numHyperSamples = - 8; // desired number of hyper samples used in the tensor network - // contraction path finder + const int32_t numHyperSamples = TensorNetState::numHyperSamples; { ScopedTraceWithContext("cutensornetAccessorConfigure"); HANDLE_CUTN_ERROR(cutensornetAccessorConfigure( - cutnHandle, accessor, CUTENSORNET_ACCESSOR_OPT_NUM_HYPER_SAMPLES, + cutnHandle, accessor, CUTENSORNET_ACCESSOR_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples))); } // Prepare the quantum state amplitudes accessor diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 81b7202ae6c..fa1a9be06d4 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -227,6 +227,7 @@ if(TARGET nvqir-tensornet) integration/builder_tester.cpp integration/deuteron_variational_tester.cpp integration/observe_result_tester.cpp + integration/noise_tester.cpp # This test contains noisy observe test cases. ) target_include_directories(test_tensornet_observe_path_reuse PRIVATE .) target_compile_definitions(test_tensornet_observe_path_reuse diff --git a/unittests/integration/noise_tester.cpp b/unittests/integration/noise_tester.cpp index 746a8930a0c..8368f7a6a01 100644 --- a/unittests/integration/noise_tester.cpp +++ b/unittests/integration/noise_tester.cpp @@ -11,7 +11,8 @@ #include #include -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) struct xOp { void operator()() __qpu__ { cudaq::qubit q; @@ -28,7 +29,7 @@ struct bell { }; #endif -#if defined(CUDAQ_BACKEND_DM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_TENSORNET) // Stim does not support arbitrary cudaq::kraus_channel specification. CUDAQ_TEST(NoiseTest, checkSimple) { @@ -93,81 +94,82 @@ CUDAQ_TEST(NoiseTest, checkAmplitudeDamping) { } #endif -#if defined(CUDAQ_BACKEND_DM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_TENSORNET) // Stim does not support arbitrary cudaq::kraus_op specification. CUDAQ_TEST(NoiseTest, checkCNOT) { cudaq::set_random_seed(13); - cudaq::kraus_op op0{cudaq::complex{0.99498743710662, 0.0}, + // 1% depolarization + cudaq::kraus_op op0{cudaq::complex{0.94868329805, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.99498743710662, 0.0}, + {0.94868329805, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.99498743710662, 0.0}, + {0.94868329805, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.99498743710662, 0.0}}, + {0.94868329805, 0.0}}, op1{cudaq::complex{0.0, 0.0}, {0.0, 0.0}, - {0.05773502691896258, 0.0}, + {0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.05773502691896258, 0.0}, - {0.05773502691896258, 0.0}, + {0.18257418583, 0.0}, + {0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.05773502691896258, 0.0}, + {0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}}, op2{cudaq::complex{0.0, 0.0}, {0.0, 0.0}, - {0.0, -0.05773502691896258}, + {0.0, -0.18257418583}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, -0.05773502691896258}, - {0.0, 0.05773502691896258}, + {0.0, -0.18257418583}, + {0.0, 0.18257418583}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, 0.05773502691896258}, + {0.0, 0.18257418583}, {0.0, 0.0}, {0.0, 0.0}}, - op3{cudaq::complex{0.05773502691896258, 0.0}, + op3{cudaq::complex{0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.05773502691896258, 0.0}, + {0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {-0.05773502691896258, 0.0}, + {-0.18257418583, 0.0}, {-0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {-0.0, 0.0}, - {-0.05773502691896258, 0.0}}; + {-0.18257418583, 0.0}}; cudaq::kraus_channel cnotNoise( std::vector{op0, op1, op2, op3}); cudaq::noise_model noise; noise.add_channel({0, 1}, cnotNoise); cudaq::set_noise(noise); - auto counts = cudaq::sample(10000, bell{}); + auto counts = cudaq::sample(100, bell{}); counts.dump(); EXPECT_TRUE(counts.size() > 2); cudaq::unset_noise(); // clear for subsequent tests @@ -188,7 +190,8 @@ CUDAQ_TEST(NoiseTest, checkExceptions) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) CUDAQ_TEST(NoiseTest, checkDepolType) { cudaq::set_random_seed(13); @@ -203,7 +206,8 @@ CUDAQ_TEST(NoiseTest, checkDepolType) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) CUDAQ_TEST(NoiseTest, checkDepolTypeSimple) { cudaq::set_random_seed(13); @@ -255,7 +259,8 @@ CUDAQ_TEST(NoiseTest, checkAmpDampTypeSimple) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) CUDAQ_TEST(NoiseTest, checkBitFlipType) { cudaq::set_random_seed(13); @@ -272,7 +277,8 @@ CUDAQ_TEST(NoiseTest, checkBitFlipType) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) CUDAQ_TEST(NoiseTest, checkBitFlipTypeSimple) { cudaq::set_random_seed(13); @@ -288,7 +294,8 @@ CUDAQ_TEST(NoiseTest, checkBitFlipTypeSimple) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) // Same as above but use alternate sample interface that specifies the number of // shots and the noise model to use. CUDAQ_TEST(NoiseTest, checkBitFlipTypeSimpleOptions) { @@ -308,7 +315,8 @@ CUDAQ_TEST(NoiseTest, checkBitFlipTypeSimpleOptions) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) CUDAQ_TEST(NoiseTest, checkPhaseFlipType) { cudaq::set_random_seed(13); @@ -333,7 +341,8 @@ CUDAQ_TEST(NoiseTest, checkPhaseFlipType) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) template struct xOpAll { @@ -344,7 +353,8 @@ struct xOpAll { }; #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) CUDAQ_TEST(NoiseTest, checkAllQubitChannel) { cudaq::set_random_seed(13); @@ -364,74 +374,75 @@ CUDAQ_TEST(NoiseTest, checkAllQubitChannel) { } #endif -#if defined(CUDAQ_BACKEND_DM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_TENSORNET) // Stim does not support arbitrary cudaq::kraus_op specification. static cudaq::kraus_channel create2pNoiseChannel() { - cudaq::kraus_op op0{cudaq::complex{0.99498743710662, 0.0}, + // 1% depolarization + cudaq::kraus_op op0{cudaq::complex{0.94868329805, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.99498743710662, 0.0}, + {0.94868329805, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.99498743710662, 0.0}, + {0.94868329805, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.99498743710662, 0.0}}, + {0.94868329805, 0.0}}, op1{cudaq::complex{0.0, 0.0}, {0.0, 0.0}, - {0.05773502691896258, 0.0}, + {0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.05773502691896258, 0.0}, - {0.05773502691896258, 0.0}, + {0.18257418583, 0.0}, + {0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.05773502691896258, 0.0}, + {0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}}, op2{cudaq::complex{0.0, 0.0}, {0.0, 0.0}, - {0.0, -0.05773502691896258}, + {0.0, -0.18257418583}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, -0.05773502691896258}, - {0.0, 0.05773502691896258}, + {0.0, -0.18257418583}, + {0.0, 0.18257418583}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, 0.05773502691896258}, + {0.0, 0.18257418583}, {0.0, 0.0}, {0.0, 0.0}}, - op3{cudaq::complex{0.05773502691896258, 0.0}, + op3{cudaq::complex{0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.05773502691896258, 0.0}, + {0.18257418583, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {-0.05773502691896258, 0.0}, + {-0.18257418583, 0.0}, {-0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {-0.0, 0.0}, - {-0.05773502691896258, 0.0}}; + {-0.18257418583, 0.0}}; cudaq::kraus_channel noise2q( std::vector{op0, op1, op2, op3}); return noise2q; @@ -447,7 +458,7 @@ struct bellRandom { }; #endif -#if defined(CUDAQ_BACKEND_DM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_TENSORNET) // Stim does not support arbitrary cudaq::kraus_op specification. CUDAQ_TEST(NoiseTest, checkAllQubitChannelWithControl) { @@ -455,7 +466,7 @@ CUDAQ_TEST(NoiseTest, checkAllQubitChannelWithControl) { cudaq::noise_model noise; noise.add_all_qubit_channel(create2pNoiseChannel(), /*numControls=*/1); - const std::size_t shots = 1024; + const std::size_t shots = 100; constexpr std::size_t numQubits = 5; std::vector qubitIds(numQubits); std::iota(qubitIds.begin(), qubitIds.end(), 0); @@ -476,14 +487,14 @@ CUDAQ_TEST(NoiseTest, checkAllQubitChannelWithControl) { } #endif -#if defined(CUDAQ_BACKEND_DM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_TENSORNET) // Stim does not support arbitrary cudaq::kraus_op specification. CUDAQ_TEST(NoiseTest, checkAllQubitChannelWithControlPrefix) { cudaq::set_random_seed(13); cudaq::noise_model noise; noise.add_all_qubit_channel("cx", create2pNoiseChannel()); - const std::size_t shots = 1024; + const std::size_t shots = 100; constexpr std::size_t numQubits = 5; std::vector qubitIds(numQubits); std::iota(qubitIds.begin(), qubitIds.end(), 0); @@ -504,7 +515,8 @@ CUDAQ_TEST(NoiseTest, checkAllQubitChannelWithControlPrefix) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) CUDAQ_TEST(NoiseTest, checkCallbackChannel) { cudaq::set_random_seed(13); @@ -515,7 +527,7 @@ CUDAQ_TEST(NoiseTest, checkCallbackChannel) { return cudaq::bit_flip_channel(1.); return cudaq::kraus_channel(); }); - const std::size_t shots = 252; + const std::size_t shots = 100; auto counts = cudaq::sample({.shots = shots, .noise = noise}, xOpAll<5>{}); // Check results EXPECT_EQ(1, counts.size()); @@ -537,7 +549,7 @@ struct rxOp { }; #endif -#if defined(CUDAQ_BACKEND_DM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_TENSORNET) // Stim does not support rx gate. CUDAQ_TEST(NoiseTest, checkCallbackChannelWithParams) { @@ -574,14 +586,14 @@ CUDAQ_TEST(NoiseTest, checkCallbackChannelWithParams) { } #endif -#if defined(CUDAQ_BACKEND_DM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_TENSORNET) // Stim does not support custom operations. -CUDAQ_REGISTER_OPERATION(CustomX, 1, 0, {0, 1, 1, 0}); +CUDAQ_REGISTER_OPERATION(CustomXOp, 1, 0, {0, 1, 1, 0}); CUDAQ_TEST(NoiseTest, checkCustomOperation) { auto kernel = []() { cudaq::qubit q; - CustomX(q); + CustomXOp(q); }; // Add channel for custom operation using the (name + operand) API @@ -589,7 +601,7 @@ CUDAQ_TEST(NoiseTest, checkCustomOperation) { cudaq::set_random_seed(13); cudaq::bit_flip_channel bf(1.); cudaq::noise_model noise; - noise.add_channel("CustomX", {0}, bf); + noise.add_channel("CustomXOp", {0}, bf); const std::size_t shots = 252; auto counts = cudaq::sample({.shots = shots, .noise = noise}, kernel); // Check results @@ -607,7 +619,7 @@ CUDAQ_TEST(NoiseTest, checkCustomOperation) { cudaq::set_random_seed(13); cudaq::bit_flip_channel bf(1.); cudaq::noise_model noise; - noise.add_all_qubit_channel("CustomX", bf); + noise.add_all_qubit_channel("CustomXOp", bf); const std::size_t shots = 252; auto counts = cudaq::sample({.shots = shots, .noise = noise}, kernel); // Check results @@ -624,7 +636,7 @@ CUDAQ_TEST(NoiseTest, checkCustomOperation) { cudaq::set_random_seed(13); cudaq::noise_model noise; noise.add_channel( - "CustomX", + "CustomXOp", [](const auto &qubits, const auto ¶ms) -> cudaq::kraus_channel { return cudaq::bit_flip_channel(1.); }); @@ -642,14 +654,15 @@ CUDAQ_TEST(NoiseTest, checkCustomOperation) { } #endif -#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_STIM) || \ + defined(CUDAQ_BACKEND_TENSORNET) CUDAQ_TEST(NoiseTest, checkMeasurementNoise) { cudaq::set_random_seed(13); - constexpr double bitFlipRate = 0.1; + constexpr double bitFlipRate = 0.25; cudaq::bit_flip_channel bf(bitFlipRate); cudaq::noise_model noise; - // 10% bit flipping during measurement + // 25% bit flipping during measurement noise.add_channel("mz", {0}, bf); cudaq::set_noise(noise); { @@ -658,12 +671,12 @@ CUDAQ_TEST(NoiseTest, checkMeasurementNoise) { x(q); mz(q); }; - auto counts = cudaq::sample(10000, kernel); + auto counts = cudaq::sample(1000, kernel); counts.dump(); // Due to measurement errors, we have both 0/1 results. EXPECT_EQ(2, counts.size()); - EXPECT_NEAR(counts.probability("0"), bitFlipRate, 0.01); - EXPECT_NEAR(counts.probability("1"), 1.0 - bitFlipRate, 0.01); + EXPECT_NEAR(counts.probability("0"), bitFlipRate, 0.1); + EXPECT_NEAR(counts.probability("1"), 1.0 - bitFlipRate, 0.1); } { auto kernel = []() { @@ -671,14 +684,101 @@ CUDAQ_TEST(NoiseTest, checkMeasurementNoise) { x(q); mz(q); }; - auto counts = cudaq::sample(10000, kernel); + auto counts = cudaq::sample(1000, kernel); counts.dump(); // We only have measurement noise on the first qubit. EXPECT_EQ(2, counts.size()); - EXPECT_NEAR(counts.probability("01"), bitFlipRate, 0.01); - EXPECT_NEAR(counts.probability("11"), 1.0 - bitFlipRate, 0.01); + EXPECT_NEAR(counts.probability("01"), bitFlipRate, 0.1); + EXPECT_NEAR(counts.probability("11"), 1.0 - bitFlipRate, 0.1); } cudaq::unset_noise(); // clear for subsequent tests } #endif + +#if defined(CUDAQ_BACKEND_DM) || defined(CUDAQ_BACKEND_TENSORNET) +CUDAQ_TEST(NoiseTest, checkObserveHamiltonianWithNoise) { + using namespace cudaq::spin; + cudaq::spin_op h = 5.907 - 2.1433 * x(0) * x(1) - 2.1433 * y(0) * y(1) + + .21829 * z(0) - 6.125 * z(1); + cudaq::set_random_seed(13); + cudaq::depolarization_channel depol(0.1); + cudaq::noise_model noise; + noise.add_all_qubit_channel(depol); + noise.add_all_qubit_channel(depol); + cudaq::set_noise(noise); + + auto ansatz = [](double theta) __qpu__ { + cudaq::qubit q, r; + x(q); + ry(theta, r); + x(r, q); + }; + + double result = cudaq::observe(ansatz, h, 0.59); + printf("Energy value = %lf\n", result); + EXPECT_GT(std::abs(result + 1.7487), 0.1); + cudaq::unset_noise(); // clear for subsequent tests +} +#endif + +#if defined(CUDAQ_BACKEND_TENSORNET) +CUDAQ_REGISTER_OPERATION(CustomIdOp, 1, 0, {1, 0, 0, 1}); + +CUDAQ_TEST(NoiseTest, checkNoiseMatrixRepresentation) { + cudaq::set_random_seed(13); + cudaq::depolarization_channel depol( + 1.0); // 1/3 probability of X, Y, Z noise op. + cudaq::noise_model noise; + noise.add_channel("CustomIdOp", {0}, depol); + cudaq::set_noise(noise); + const auto noisyCircuit = []() __qpu__ { + cudaq::qubit q; + h(q); + CustomIdOp(q); // inject noise + }; + + // Referent states for X, Y, or Z noise + std::vector> stateVecX(2), stateVecY(2), stateVecZ(2); + + cudaq::get_state([]() __qpu__ { + cudaq::qubit q; + h(q); + x(q); + }).to_host(stateVecX.data(), 2); + cudaq::get_state([]() __qpu__ { + cudaq::qubit q; + h(q); + y(q); + }).to_host(stateVecY.data(), 2); + cudaq::get_state([]() __qpu__ { + cudaq::qubit q; + h(q); + z(q); + }).to_host(stateVecZ.data(), 2); + + const auto checkEqualVec = [](const auto &vec1, const auto &vec2) { + constexpr double tol = 1e-12; + const auto vecSize = vec1.size(); + if (vec2.size() != vecSize) + return false; + for (std::size_t i = 0; i < vecSize; ++i) { + if (std::abs(vec1[i] - vec2[i]) > tol) + return false; + } + return true; + }; + + for (int i = 0; i < 10; ++i) { + std::vector> noisyStateVec(2); + auto noisyState = cudaq::get_state(noisyCircuit); + noisyState.to_host(noisyStateVec.data(), 2); + const auto equalX = checkEqualVec(noisyStateVec, stateVecX); + const auto equalY = checkEqualVec(noisyStateVec, stateVecY); + const auto equalZ = checkEqualVec(noisyStateVec, stateVecZ); + // One of these expected output states + EXPECT_TRUE(equalX || equalY || equalZ); + } + cudaq::unset_noise(); // clear for subsequent tests +} +#endif