Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions runtime/nvqir/cutensornet/simulator_cutensornet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ SimulatorTensorNetBase::SimulatorTensorNetBase()
cudaq::mpi::is_initialized() ? cudaq::mpi::rank() % numDevices : 0;
HANDLE_CUDA_ERROR(cudaSetDevice(deviceId));
HANDLE_CUTN_ERROR(cutensornetCreate(&m_cutnHandle));
// The scratch pad must be allocated after we have selected the device.
scratchPad.allocate();
}

static std::vector<std::complex<double>>
Expand Down
13 changes: 11 additions & 2 deletions runtime/nvqir/cutensornet/tensornet_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,12 @@ struct ScratchDeviceMem {
2; // use half of available memory with alignment
}

ScratchDeviceMem() {
// Allocate scratch device memory based on available memory
void allocate() {
if (d_scratch)
throw std::runtime_error(
"Multiple scratch device memory allocations is not allowed.");

computeScratchSize();
// Try allocate device memory
auto errCode = cudaMalloc(&d_scratch, scratchSize);
Expand All @@ -86,7 +91,11 @@ struct ScratchDeviceMem {
HANDLE_CUDA_ERROR(errCode);
}
}
~ScratchDeviceMem() { HANDLE_CUDA_ERROR(cudaFree(d_scratch)); }

~ScratchDeviceMem() {
if (scratchSize > 0)
HANDLE_CUDA_ERROR(cudaFree(d_scratch));
}
};

/// Initialize `cutensornet` MPI Comm
Expand Down
23 changes: 23 additions & 0 deletions unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,29 @@ if(TARGET nvqir-tensornet)
message(STATUS "Building cutensornet backend tests.")
create_tests_with_backend(tensornet "")
create_tests_with_backend(tensornet-mps "")
if (MPI_CXX_FOUND)
# Count the number of GPUs
find_program(NVIDIA_SMI "nvidia-smi")
if(NVIDIA_SMI)
execute_process(COMMAND bash -c "nvidia-smi --list-gpus | wc -l" OUTPUT_VARIABLE NGPUS)
# Only build this test if we have more than 1 GPUs
if (${NGPUS} GREATER_EQUAL 2)
message(STATUS "Building cutensornet MPI tests.")
add_executable(test_tensornet_mpi mpi/tensornet_mpi_tester.cpp)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT APPLE)
target_link_options(test_tensornet_mpi PRIVATE -Wl,--no-as-needed)
endif()
target_link_libraries(test_tensornet_mpi
PRIVATE
cudaq
cudaq-platform-default
nvqir-tensornet
gtest)
add_test(NAME TensornetMPITest COMMAND ${MPIEXEC} --allow-run-as-root -np 2 ${CMAKE_BINARY_DIR}/unittests/test_tensornet_mpi)
set_tests_properties(TensornetMPITest PROPERTIES LABELS "gpu_required;mgpus_required")
endif() # NGPUS
endif() # NVIDIA_SMI
endif() # MPI_CXX_FOUND
endif()

# Create an executable for SpinOp UnitTests
Expand Down
44 changes: 44 additions & 0 deletions unittests/mpi/tensornet_mpi_tester.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*******************************************************************************
* Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. *
* All rights reserved. *
* *
* This source code and the accompanying materials are made available under *
* the terms of the Apache License 2.0 which accompanies this distribution. *
******************************************************************************/
#include <cudaq.h>
#include <gtest/gtest.h>

TEST(TensornetMPITester, checkInit) {
EXPECT_TRUE(cudaq::mpi::is_initialized());
std::cout << "Rank = " << cudaq::mpi::rank() << "\n";
}

TEST(TensornetMPITester, checkSimple) {
constexpr std::size_t numQubits = 50;
auto kernel = []() __qpu__ {
cudaq::qvector q(numQubits);
h(q[0]);
for (int i = 0; i < numQubits - 1; i++)
x<cudaq::ctrl>(q[i], q[i + 1]);
mz(q);
};

auto counts = cudaq::sample(100, kernel);

if (cudaq::mpi::rank() == 0) {
EXPECT_EQ(2, counts.size());

for (auto &[bits, count] : counts) {
printf("Observed: %s, %lu\n", bits.data(), count);
EXPECT_EQ(numQubits, bits.size());
}
}
}

int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
cudaq::mpi::initialize();
const auto testResult = RUN_ALL_TESTS();
cudaq::mpi::finalize();
return testResult;
}