diff --git a/docs/sphinx/examples/cpp/basics/cutensornet_backends_observe.cpp b/docs/sphinx/examples/cpp/basics/cutensornet_backends_observe.cpp new file mode 100644 index 00000000000..89ea5d89678 --- /dev/null +++ b/docs/sphinx/examples/cpp/basics/cutensornet_backends_observe.cpp @@ -0,0 +1,53 @@ +// Compile with: +// ``` +// nvq++ cutensornet_backends_observe.cpp -o dyn.x --target tensornet +// ``` +// +// This example is meant to demonstrate the `cuTensorNet` +// multi-node/multi-GPU backend. +// On a multi-GPU system, we can enable distributed parallelization across MPI +// processes by initializing MPI (see code) and launch the compiled executable +// with MPI. +// ``` +// mpirun -np ./dyn.x +// ``` + +#include +#include + +// Define a quantum kernel with a runtime parameter +struct kernel { + auto operator()(int N) __qpu__ { + + // Dynamically sized vector of qubits + cudaq::qvector q(N); + h(q[0]); + for (int i = 0; i < N - 1; i++) { + x(q[i], q[i + 1]); + } + } +}; + +int main() { + // Initialize MPI to enable `cuTensorNet` distributed parallelization (see + // Simulation Backends/Tensor Network Simulators section of the documentation + // for more info) + // ``` + // cudaq::mpi::initialize(); + // if (cudaq::mpi::rank() == 0) + // printf("Number of MPI processes: %d\n", cudaq::mpi::num_ranks()); + // ``` + const std::string pauliWord = + "YXIXXIZYZYYYIIYXIZXZIIYYXYIZZYYZIXXXZIIYZXZXZIZYZZZXII"; + const std::size_t numQubits = pauliWord.size(); + auto pauliOp = cudaq::spin_op::from_word(pauliWord); + auto expVal = cudaq::observe(kernel{}, pauliOp, numQubits); + + std::cout << "<" << pauliWord << "> = " << expVal.expectation() << "\n"; + + // Finalize MPI if initialized + // ``` + // cudaq::mpi::finalize(); + // ``` + return 0; +} diff --git a/runtime/nvqir/cutensornet/tensornet_spin_op.inc b/runtime/nvqir/cutensornet/tensornet_spin_op.inc index c695c6656ef..f6a99f00a65 100644 --- a/runtime/nvqir/cutensornet/tensornet_spin_op.inc +++ b/runtime/nvqir/cutensornet/tensornet_spin_op.inc @@ -19,9 +19,16 @@ TensorNetworkSpinOp::TensorNetworkSpinOp( : m_cutnHandle(handle) { LOG_API_TIME(); auto degrees = spinOp.degrees(); - const std::vector qubitDims(degrees.size(), 2); + // Note: the `degrees` list is non-continuous and only contains non-identity + // indices. + // We need to create the network operator spanning over the entire qubit index + // space. + const std::size_t maxQubitIdx = + *std::max_element(degrees.begin(), degrees.end()); + const std::size_t numQubits = maxQubitIdx + 1; + const std::vector qubitDims(numQubits, 2); HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator( - m_cutnHandle, degrees.size(), qubitDims.data(), cudaDataType, + m_cutnHandle, qubitDims.size(), qubitDims.data(), cudaDataType, &m_cutnNetworkOperator)); // Heuristic threshold to perform direct observable calculation. // If the number of qubits in the spin_op is small, it is more efficient to diff --git a/unittests/integration/observe_result_tester.cpp b/unittests/integration/observe_result_tester.cpp index e5fd5d3e38b..7e377b8e256 100644 --- a/unittests/integration/observe_result_tester.cpp +++ b/unittests/integration/observe_result_tester.cpp @@ -124,4 +124,42 @@ CUDAQ_TEST(ObserveResult, checkExpValBug) { // also relevant for noise modeling. } #endif + +CUDAQ_TEST(ObserveResult, checkObserveWithIdentity) { + + auto kernel = []() __qpu__ { + cudaq::qvector qubits(5); + cudaq::exp_pauli(1.0, qubits, "XXIIX"); + }; + + const std::string pauliWord = "ZZIIZ"; + const std::size_t numQubits = pauliWord.size(); + auto pauliOp = cudaq::spin_op::from_word(pauliWord); + // The canonicalized degree list is less than the number of qubits + EXPECT_LT(cudaq::spin_op::canonicalize(pauliOp).degrees().size(), numQubits); + auto expVal = cudaq::observe(kernel, pauliOp); + std::cout << "<" << pauliWord << "> = " << expVal.expectation() << "\n"; + EXPECT_NEAR(expVal.expectation(), -0.416147, 1e-6); +} + +#ifdef CUDAQ_BACKEND_TENSORNET +CUDAQ_TEST(ObserveResult, checkObserveWithIdentityLarge) { + + auto kernel = []() __qpu__ { + cudaq::qvector qubits(50); + cudaq::exp_pauli(1.0, qubits, + "XXIIXXXIIXXXIIXXXIIXXXIIXXXIIXXXIIXXXIIXXXIIXXXIXX"); + }; + + const std::string pauliWord = + "ZZIIZZZIIZZZIIZZZIIZZZIIZZZIIZZZIIZZZIIZZZIIZZZIZZ"; + const std::size_t numQubits = pauliWord.size(); + auto pauliOp = cudaq::spin_op::from_word(pauliWord); + // The canonicalized degree list is less than the number of qubits + EXPECT_LT(cudaq::spin_op::canonicalize(pauliOp).degrees().size(), numQubits); + auto expVal = cudaq::observe(kernel, pauliOp); + std::cout << "<" << pauliWord << "> = " << expVal.expectation() << "\n"; + EXPECT_NEAR(expVal.expectation(), -0.416147, 1e-3); +} +#endif #endif