From af46ef40679f89934e43e68b8794a185dca90e97 Mon Sep 17 00:00:00 2001 From: huaweil Date: Tue, 10 Feb 2026 06:41:31 +0000 Subject: [PATCH 1/3] Fix density matrix overlap using wrong matrix dimension The density matrix branch of CuDensityMatState::overlap() incorrectly used `dimension` (total element count, N^2) as the matrix side length, creating an N^2 x N^2 Eigen matrix and copying N^4 elements instead of N^2. This caused excessive memory allocation or crashes for any density matrix overlap computation. Use sqrt(dimension) as the matrix side length, consistent with dump(), getTensor(), and operator() in the same file. Add regression tests for density matrix overlap covering self-overlap, orthogonal states, and partial overlap. Signed-off-by: huaweil --- .../nvqir/cudensitymat/CuDensityMatState.cpp | 9 ++- unittests/dynamics/test_CuDensityMatState.cpp | 57 +++++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/runtime/nvqir/cudensitymat/CuDensityMatState.cpp b/runtime/nvqir/cudensitymat/CuDensityMatState.cpp index 92e82113e18..77ce93ea80e 100644 --- a/runtime/nvqir/cudensitymat/CuDensityMatState.cpp +++ b/runtime/nvqir/cudensitymat/CuDensityMatState.cpp @@ -55,13 +55,16 @@ CuDensityMatState::overlap(const cudaq::SimulationState &other) { } // FIXME: implement this in GPU memory - Eigen::MatrixXcd state(dimension, dimension); - const auto size = dimension * dimension; + // For density matrices, `dimension` is the total number of elements (N^2). + // The matrix side length is sqrt(dimension). + const std::size_t matDim = static_cast(std::sqrt(dimension)); + Eigen::MatrixXcd state(matDim, matDim); + const auto size = dimension; HANDLE_CUDA_ERROR(cudaMemcpy(state.data(), devicePtr, size * sizeof(std::complex), cudaMemcpyDeviceToHost)); - Eigen::MatrixXcd otherState(dimension, dimension); + Eigen::MatrixXcd otherState(matDim, matDim); HANDLE_CUDA_ERROR(cudaMemcpy(otherState.data(), other.getTensor().data, size * sizeof(std::complex), cudaMemcpyDeviceToHost)); diff --git a/unittests/dynamics/test_CuDensityMatState.cpp b/unittests/dynamics/test_CuDensityMatState.cpp index 75ab9500ff9..20080817d8b 100644 --- a/unittests/dynamics/test_CuDensityMatState.cpp +++ b/unittests/dynamics/test_CuDensityMatState.cpp @@ -257,6 +257,63 @@ TEST_F(CuDensityMatStateTest, DensityMatrixIndexing) { EXPECT_THROW(state(0, {4, 4}), std::runtime_error); } +// Regression test for density matrix overlap dimension bug. +// The bug was that the density matrix branch of overlap() used `dimension` +// (total element count, N^2) as the matrix side length, creating an N^2 x N^2 +// Eigen matrix and copying N^4 elements -- far exceeding the actual buffer. +TEST_F(CuDensityMatStateTest, DensityMatrixOverlapSelf) { + // |00><00| density matrix: self-overlap should be Tr(rho^2) = 1.0 + CuDensityMatState state(densityMatrixData.size(), + cudaq::dynamics::createArrayGpu(densityMatrixData)); + state.initialize_cudm(handle, hilbertSpaceDims, /*batchSize=*/1); + EXPECT_TRUE(state.is_density_matrix()); + + auto result = state.overlap(state); + EXPECT_NEAR(result.real(), 1.0, 1e-12); + EXPECT_NEAR(result.imag(), 0.0, 1e-12); +} + +TEST_F(CuDensityMatStateTest, DensityMatrixOverlapOrthogonal) { + // |00><00| vs |11><11|: overlap should be Tr(rho1 * rho2) = 0.0 + CuDensityMatState state1(densityMatrixData.size(), + cudaq::dynamics::createArrayGpu(densityMatrixData)); + state1.initialize_cudm(handle, hilbertSpaceDims, /*batchSize=*/1); + + // |11><11|: element (3,3)=1 in a 4x4 matrix (column-major storage) + std::vector> dm11(16, {0.0, 0.0}); + dm11[15] = {1.0, 0.0}; // col-major index for (3,3) = 3*4+3 = 15 + CuDensityMatState state2(dm11.size(), cudaq::dynamics::createArrayGpu(dm11)); + state2.initialize_cudm(handle, hilbertSpaceDims, /*batchSize=*/1); + + auto result = state1.overlap(state2); + EXPECT_NEAR(result.real(), 0.0, 1e-12); + EXPECT_NEAR(result.imag(), 0.0, 1e-12); +} + +TEST_F(CuDensityMatStateTest, DensityMatrixOverlapPartial) { + // Single-qubit system: |0><0| vs |+><+| where |+> = (|0>+|1>)/sqrt(2) + // |0><0| = {{1,0},{0,0}}, |+><+| = {{0.5,0.5},{0.5,0.5}} + // Tr(|0><0| * |+><+|) = 0.5 + std::vector dims1q = {2}; + + // |0><0| in column-major: [1, 0, 0, 0] + std::vector> dm0 = { + {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}; + CuDensityMatState state0(dm0.size(), cudaq::dynamics::createArrayGpu(dm0)); + state0.initialize_cudm(handle, dims1q, /*batchSize=*/1); + + // |+><+| in column-major: [0.5, 0.5, 0.5, 0.5] + std::vector> dmPlus = { + {0.5, 0.0}, {0.5, 0.0}, {0.5, 0.0}, {0.5, 0.0}}; + CuDensityMatState statePlus(dmPlus.size(), + cudaq::dynamics::createArrayGpu(dmPlus)); + statePlus.initialize_cudm(handle, dims1q, /*batchSize=*/1); + + auto result = state0.overlap(statePlus); + EXPECT_NEAR(result.real(), 0.5, 1e-12); + EXPECT_NEAR(result.imag(), 0.0, 1e-12); +} + // Test indexing for single-qubit density matrix TEST_F(CuDensityMatStateTest, SingleQubitDensityMatrixIndexing) { // 1-qubit system: 2x2 density matrix (4 elements) From 4b6a6ca1eb8e32f43e68a26ab81accce559f6734 Mon Sep 17 00:00:00 2001 From: huaweil Date: Tue, 10 Feb 2026 06:42:08 +0000 Subject: [PATCH 2/3] Include singleStateDimension in move constructor and move assignment The move constructor and move assignment operator of CuDensityMatState did not transfer the singleStateDimension field. Add it for completeness so that all member state is properly moved, preventing potential issues if a batched state object is ever moved directly rather than through a unique_ptr. This is a defensive change with no impact on current usage, as batched states with non-zero singleStateDimension are always heap-allocated and managed via unique_ptr. Signed-off-by: huaweil --- runtime/nvqir/cudensitymat/CuDensityMatState.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/runtime/nvqir/cudensitymat/CuDensityMatState.cpp b/runtime/nvqir/cudensitymat/CuDensityMatState.cpp index 77ce93ea80e..61b47677786 100644 --- a/runtime/nvqir/cudensitymat/CuDensityMatState.cpp +++ b/runtime/nvqir/cudensitymat/CuDensityMatState.cpp @@ -403,11 +403,14 @@ CuDensityMatState::CuDensityMatState(CuDensityMatState &&other) noexcept : isDensityMatrix(other.isDensityMatrix), dimension(other.dimension), devicePtr(other.devicePtr), cudmState(other.cudmState), cudmHandle(other.cudmHandle), hilbertSpaceDims(other.hilbertSpaceDims), - batchSize(other.batchSize), borrowedData(other.borrowedData) { + batchSize(other.batchSize), + singleStateDimension(other.singleStateDimension), + borrowedData(other.borrowedData) { other.isDensityMatrix = false; other.dimension = 0; other.devicePtr = nullptr; other.batchSize = 1; + other.singleStateDimension = 0; other.cudmState = nullptr; other.cudmHandle = nullptr; other.hilbertSpaceDims.clear(); @@ -433,14 +436,15 @@ CuDensityMatState::operator=(CuDensityMatState &&other) noexcept { cudmHandle = other.cudmHandle; hilbertSpaceDims = std::move(other.hilbertSpaceDims); batchSize = other.batchSize; + singleStateDimension = other.singleStateDimension; borrowedData = other.borrowedData; // Nullify other other.isDensityMatrix = false; other.dimension = 0; other.devicePtr = nullptr; - other.cudmState = nullptr; other.batchSize = 1; + other.singleStateDimension = 0; other.borrowedData = false; } return *this; From 8836ce83fd2ba490eb85d681b6afe2b24e469392 Mon Sep 17 00:00:00 2001 From: huaweil Date: Wed, 11 Feb 2026 10:11:21 +0000 Subject: [PATCH 3/3] refactor(cudensitymat): use dimension directly in overlap() Remove redundant local `size` alias and pass `dimension` directly to cudaMemcpy. Signed-off-by: huaweil --- runtime/nvqir/cudensitymat/CuDensityMatState.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/runtime/nvqir/cudensitymat/CuDensityMatState.cpp b/runtime/nvqir/cudensitymat/CuDensityMatState.cpp index 61b47677786..4ea9e2b738b 100644 --- a/runtime/nvqir/cudensitymat/CuDensityMatState.cpp +++ b/runtime/nvqir/cudensitymat/CuDensityMatState.cpp @@ -39,14 +39,13 @@ CuDensityMatState::overlap(const cudaq::SimulationState &other) { if (!isDensityMatrix) { Eigen::VectorXcd state(dimension); - const auto size = dimension; HANDLE_CUDA_ERROR(cudaMemcpy(state.data(), devicePtr, - size * sizeof(std::complex), + dimension * sizeof(std::complex), cudaMemcpyDeviceToHost)); Eigen::VectorXcd otherState(dimension); HANDLE_CUDA_ERROR(cudaMemcpy(otherState.data(), other.getTensor().data, - size * sizeof(std::complex), + dimension * sizeof(std::complex), cudaMemcpyDeviceToHost)); return std::abs(std::inner_product( state.begin(), state.end(), otherState.begin(), @@ -59,14 +58,13 @@ CuDensityMatState::overlap(const cudaq::SimulationState &other) { // The matrix side length is sqrt(dimension). const std::size_t matDim = static_cast(std::sqrt(dimension)); Eigen::MatrixXcd state(matDim, matDim); - const auto size = dimension; HANDLE_CUDA_ERROR(cudaMemcpy(state.data(), devicePtr, - size * sizeof(std::complex), + dimension * sizeof(std::complex), cudaMemcpyDeviceToHost)); Eigen::MatrixXcd otherState(matDim, matDim); HANDLE_CUDA_ERROR(cudaMemcpy(otherState.data(), other.getTensor().data, - size * sizeof(std::complex), + dimension * sizeof(std::complex), cudaMemcpyDeviceToHost)); return (state.adjoint() * otherState).trace();