Skip to content

Commit 9da0df2

Browse files
committed
Merge branch 'cuq26-03-2-public' into 'main'
sync with internal repo1 (commit 2fab2fd88) See merge request cuda-hpc-libraries/cuquantum-sdk/cuquantum-public!52
2 parents 0e9431b + c31dcdf commit 9da0df2

12 files changed

Lines changed: 71 additions & 80 deletions

File tree

python/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ Runtime dependencies of the cuQuantum Python package include:
5353
* Driver: Linux (525.60.13+ for CUDA 12, 580.65.06+ for CUDA 13)
5454
* CUDA Toolkit 12.x or 13.x
5555
* cuStateVec 1.13.1+
56-
* cuTensorNet 2.12.1+
57-
* cuDensityMat >=0.5.1, <0.6.0
58-
* cuPauliProp >=0.3.1, <0.4.0
56+
* cuTensorNet 2.12.2+
57+
* cuDensityMat >=0.5.2, <0.6.0
58+
* cuPauliProp >=0.3.2, <0.4.0
5959
* cuStabilizer >=0.3.0, <0.4.0
6060
* Python >=3.11, <3.14
6161
* NumPy v1.21+
@@ -66,6 +66,7 @@ Runtime dependencies of the cuQuantum Python package include:
6666
* Qiskit v1.4.2+ (optional, see [installation guide](https://qiskit.org/documentation/getting_started.html))
6767
* Cirq v0.6.0+ (optional, see [installation guide](https://quantumai.google/cirq/install))
6868
* mpi4py v3.1.0+ (optional, see [installation guide](https://mpi4py.readthedocs.io/en/stable/install.html))
69+
* nccl4py (optional)
6970

7071
If you install everything from conda-forge, all the required dependencies are taken care for you (except for the driver).
7172

python/cuquantum/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
# Note: cuQuantum Python follows the cuQuantum SDK version, which is now
66
# switched to YY.MM and is different from individual libraries' (semantic)
77
# versioning scheme.
8-
__version__ = '26.3.1'
8+
__version__ = '26.3.2'

python/cuquantum/densitymat/spectrum.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,24 +66,32 @@ class OperatorSpectrumConfig:
6666
If not specified, a default value will be chosen. Defaults to 1.
6767
max_buffer_ratio: Maximum ratio of the total number of blocks in the Krylov subspace to the number of requested eigenvalues.
6868
If not specified, a default value will be chosen. Must be greater than 1. Defaults to 5.
69-
max_restarts: Maximum number of restart cycles allowed during the iterative eigenvalue computation.
70-
If not specified, a default value will be chosen. Defaults to 20.
69+
max_restarts: Maximum number of thick restarts of the block Krylov algorithm.
70+
The total number of Krylov-subspace expansions performed is at most
71+
``max_restarts + 1`` (one initial expansion plus up to ``max_restarts``
72+
restarted expansions). A value of ``0`` corresponds to a single expansion
73+
with no restart. If not specified, a default value will be chosen.
74+
Defaults to 19 (i.e. up to 20 expansions).
7175
"""
7276
min_krylov_block_size: Optional[int] = None
7377
max_buffer_ratio: Optional[int] = None
7478
max_restarts: Optional[int] = None
7579

7680
def _check_int(self, attribute, name, min_value=0):
77-
message = f"Invalid value ({attribute}) for '{name}'. Expect non-zero integer or None."
81+
if min_value == -1:
82+
bound_desc = "non-negative integer"
83+
else:
84+
bound_desc = f"integer greater than {min_value}"
85+
message = f"Invalid value ({attribute}) for '{name}'. Expect {bound_desc} or None."
7886
if not isinstance(attribute, (type(None), int)):
7987
raise ValueError(message)
80-
if isinstance(attribute, int) and not attribute > min_value:
88+
if isinstance(attribute, int) and not attribute > min_value:
8189
raise ValueError(message)
8290

8391
def __post_init__(self):
8492
self._check_int(self.min_krylov_block_size, "min_krylov_block_size",0)
85-
self._check_int(self.max_buffer_ratio, "max_buffer_ratio",1)
86-
self._check_int(self.max_restarts, "max_restarts",0)
93+
self._check_int(self.max_buffer_ratio, "max_buffer_ratio",1)
94+
self._check_int(self.max_restarts, "max_restarts", -1)
8795

8896
@classmethod
8997
def _option_to_enum(cls, name):

python/extensions/cuquantum/densitymat/jax/cppsrc/cudensitymat.h

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464

6565
#define CUDENSITYMAT_MAJOR 0 //!< cuDensityMat major version.
6666
#define CUDENSITYMAT_MINOR 5 //!< cuDensityMat minor version.
67-
#define CUDENSITYMAT_PATCH 1 //!< cuDensityMat patch version.
67+
#define CUDENSITYMAT_PATCH 2 //!< cuDensityMat patch version.
6868
#define CUDENSITYMAT_VERSION (CUDENSITYMAT_MAJOR * 10000 + CUDENSITYMAT_MINOR * 100 + CUDENSITYMAT_PATCH)
6969

7070

@@ -222,7 +222,7 @@ typedef enum
222222
typedef enum
223223
{
224224
CUDENSITYMAT_OPERATOR_SPECTRUM_CONFIG_MAX_EXPANSION = 0, ///< int32_t: Configures the max ratio of the number of Krylov subspace blocks to the number of requested eigen-pairs (defaults to 5)
225-
CUDENSITYMAT_OPERATOR_SPECTRUM_CONFIG_MAX_RESTARTS = 1, ///< int32_t: Configures the max number of restarted iterations of the block Krylov algorithm (defaults to 20)
225+
CUDENSITYMAT_OPERATOR_SPECTRUM_CONFIG_MAX_RESTARTS = 1, ///< int32_t: Configures the max number of restarted iterations of the block Krylov algorithm (defaults to 19)
226226
CUDENSITYMAT_OPERATOR_SPECTRUM_CONFIG_MIN_BLOCK_SIZE = 2, ///< int32_t: Configures the min block size of the block Krylov algorithm (defaults to 1)
227227
} cudensitymatOperatorSpectrumConfig_t;
228228

@@ -2032,33 +2032,6 @@ cudensitymatStatus_t cudensitymatAttachBatchedCoefficients(
20322032
void * operatorProductBatchedCoeffsTmp[],
20332033
void * operatorProductBatchedCoeffs[]);
20342034

2035-
/**
2036-
* \brief Configures the operator action on a quantum state.
2037-
*
2038-
* \param[in] handle Library handle.
2039-
* \param[inout] superoperator Operator.
2040-
* \param[in] stateIn Representative input quantum state on which the operator
2041-
* is supposed to act. The actual quantum state acted on during computation
2042-
* may be different, but it has to be of the same shape, kind,
2043-
* and factorization structure (topology, bond dimensions, etc).
2044-
* \param[in] stateOut Representative output quantum state produced by the action
2045-
* of the operator on the input quantum state. The actual quantum state acted on
2046-
* during computation may be different, but it has to be of the same shape,
2047-
* kind, and factorization structure (topology, bond dimensions, etc).
2048-
* \param[in] attribute Configuration attribute.
2049-
* \param[in] attributeValue Pointer to the configuration attribute value (type-erased).
2050-
* \param[in] attributeSize The size of the configuration attribute value.
2051-
* \return cudensitymatStatus_t
2052-
*/
2053-
cudensitymatStatus_t cudensitymatOperatorConfigureAction(
2054-
const cudensitymatHandle_t handle,
2055-
cudensitymatOperator_t superoperator,
2056-
const cudensitymatState_t stateIn,
2057-
const cudensitymatState_t stateOut,
2058-
//cudensitymatOperatorActionAttributes_t attribute, //`FIXME
2059-
const void * attributeValue,
2060-
size_t attributeSize);
2061-
20622035
/**
20632036
* \brief Prepares the operator for an action on a quantum state.
20642037
*

python/samples/densitymat/operator_mpi_nccl_example.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,11 @@ def ordered_print(msg):
6767

6868
# Setup NCCL communicator (initialized via nvmath.distributed)
6969
nvmath.distributed.initialize(dev.id, comm, backends=["nccl"])
70-
nccl_comm_ptr = nvmath.distributed.get_context().nccl_comm
70+
nccl_comm = nvmath.distributed.get_context().nccl_comm
71+
if isinstance(nccl_comm, int):
72+
nccl_comm_ptr = nccl_comm
73+
else:
74+
nccl_comm_ptr = nccl_comm.ptr
7175
ctx.set_communicator(nccl_comm_ptr, provider="NCCL")
7276
ordered_print("Set NCCL communicator on execution context, enabling distributed computation.")
7377

python/samples/densitymat/operator_spectrum_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def take_complex_conjugate_transpose(arr):
138138
max_num_eigvals = 5
139139
min_block_size = 4
140140
max_buffer_ratio = 25
141-
max_restarts = 10
141+
max_restarts = 9
142142

143143

144144
# Create a sequence of pure states |ψ_i⟩

python/setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@
3737
'nvmath-python>=0.7.0, <1.0.0', # ">=0.7.0,<1.0.0"
3838
# 'torch', # <-- PyTorch is optional; also, the PyPI version does not support GPU...
3939
f'custatevec-cu{utils.cuda_major_ver}>=1.13.1, <2', # ">=1.13.1,<2"
40-
f'cutensornet-cu{utils.cuda_major_ver}>=2.12.1, <3', # ">=2.12.0,<3"
41-
f'cudensitymat-cu{utils.cuda_major_ver}>=0.5.1, <0.6', # ">=0.5.1,<0.6.0"
42-
f'cupauliprop-cu{utils.cuda_major_ver}>=0.3.1, <0.4', # ">=0.3.1,<0.4.0"
40+
f'cutensornet-cu{utils.cuda_major_ver}>=2.12.2, <3', # ">=2.12.2,<3"
41+
f'cudensitymat-cu{utils.cuda_major_ver}>=0.5.2, <0.6', # ">=0.5.1,<0.6.0"
42+
f'cupauliprop-cu{utils.cuda_major_ver}>=0.3.2, <0.4', # ">=0.3.2,<0.4.0"
4343
f'custabilizer-cu{utils.cuda_major_ver}>=0.3.0, <0.4', # ">=0.3.0,<0.4.0"
4444
]
4545
if utils.cuda_major_ver == '12':

python/tests/cuquantum_tests/bindings/test_internal.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,5 +64,5 @@ def test_data_type_alignment_with_nvmath():
6464
if val.name != ref_val.name:
6565
raise ValueError(f"{val.name} from nvmath has a different name than cuquantum.cudaDataType")
6666
except ValueError:
67-
# nvmath.CudaDataType has two additional values that are not captured by cuquantum.cudaDataType
68-
assert val.name in {"CUDA_R_8F_E4M3", "CUDA_R_8F_E5M2"}
67+
# nvmath.CudaDataType has additional values that are not captured by cuquantum.cudaDataType
68+
assert val.name in {"CUDA_R_8F_E4M3", "CUDA_R_8F_E5M2", "CUDA_R_4F_E2M1"}

python/tests/cuquantum_tests/densitymat/distributed_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@ def get_available_provider():
4242
if AVAILABLE_PROVIDER == "NCCL":
4343
with cp.cuda.Device(device_id):
4444
nvmath.distributed.initialize(device_id, mpi_comm, backends=["nccl"])
45-
NCCL_COMM_PTR = nvmath.distributed.get_context().nccl_comm
45+
nccl_comm = nvmath.distributed.get_context().nccl_comm
46+
if isinstance(nccl_comm, int):
47+
NCCL_COMM_PTR = nccl_comm
48+
else:
49+
NCCL_COMM_PTR = nccl_comm.ptr
4650

4751
def skip_if_provider_unavailable(provider: str):
4852
"""Skip test if the requested provider doesn't match the loaded interface."""

python/tests/cuquantum_tests/densitymat/test_work_stream_mpi.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
from mpi4py import MPI
1010
import pytest
1111

12-
from nvmath.bindings import nccl
12+
# nvmath-python >= 0.9 no longer ships its own NCCL bindings; NCCL is now
13+
# provided by the standalone ``nccl4py`` package (imported as ``nccl.core``).
14+
import nccl.core as nccl
1315

1416
from .distributed_utils import (
1517
mpi_comm as comm,
@@ -72,37 +74,42 @@ def test_work_stream_mpi_communicator_from_int_pointer():
7274
assert rank == ctx.get_proc_rank()
7375

7476

77+
def _bootstrap_nccl_communicator(rank, size):
78+
"""Create an externally-managed NCCL communicator using nccl4py.
79+
80+
All ranks must call this collectively. A unique id is generated on every
81+
rank to obtain a same-sized buffer, then rank 0's bytes are broadcast via
82+
MPI so all ranks join the same communicator.
83+
"""
84+
unique_id = nccl.get_unique_id()
85+
comm.Bcast(unique_id.as_ndarray.view(np.int8), root=0)
86+
return nccl.Communicator.init(nranks=size, rank=rank, unique_id=unique_id)
87+
88+
7589
@pytest.mark.parametrize("sequence_type", [tuple, list])
7690
def test_work_stream_nccl_communicator_from_pointer(sequence_type):
7791
"""Test setting NCCL communicator from (pointer, size) sequence with externally managed ncclComm_t."""
7892
skip_if_provider_unavailable("NCCL")
79-
93+
8094
rank = comm.Get_rank()
8195
size = comm.Get_size()
8296
device_id = CURRENT_DEVICE_ID
8397

8498
with cp.cuda.Device(device_id):
85-
# Bootstrap NCCL communicator externally (following library_handle pattern)
86-
unique_id = nccl.UniqueId()
87-
if rank == 0:
88-
nccl.get_unique_id(unique_id.ptr)
89-
comm.Bcast(unique_id._data.view(np.int8), root=0)
90-
91-
nccl_comm_ptr = nccl.comm_init_rank(size, unique_id.ptr, rank)
92-
99+
nccl_comm = _bootstrap_nccl_communicator(rank, size)
93100
try:
94101
ctx = WorkStream(device_id=device_id)
95102
# Pass (ncclComm_t value, size) - library_handle wraps it in numpy array internally
96103
# The size value is not actually used (library uses itemsize of internal holder)
97104
ctx.set_communicator(
98-
sequence_type([nccl_comm_ptr, np.dtype(np.intp).itemsize]),
105+
sequence_type([nccl_comm.ptr, np.dtype(np.intp).itemsize]),
99106
provider="NCCL"
100107
)
101108
assert size == ctx.get_num_ranks()
102109
assert rank == ctx.get_proc_rank()
103110
finally:
104111
# Clean up externally managed NCCL communicator
105-
nccl.comm_destroy(nccl_comm_ptr)
112+
nccl_comm.destroy()
106113

107114

108115
def test_work_stream_nccl_communicator_from_int_pointer():
@@ -114,21 +121,14 @@ def test_work_stream_nccl_communicator_from_int_pointer():
114121
device_id = CURRENT_DEVICE_ID
115122

116123
with cp.cuda.Device(device_id):
117-
# Bootstrap NCCL communicator externally (following library_handle pattern)
118-
unique_id = nccl.UniqueId()
119-
if rank == 0:
120-
nccl.get_unique_id(unique_id.ptr)
121-
comm.Bcast(unique_id._data.view(np.int8), root=0)
122-
123-
nccl_comm_ptr = nccl.comm_init_rank(size, unique_id.ptr, rank)
124-
124+
nccl_comm = _bootstrap_nccl_communicator(rank, size)
125125
try:
126126
ctx = WorkStream(device_id=device_id)
127-
ctx.set_communicator(int(nccl_comm_ptr), provider="NCCL")
127+
ctx.set_communicator(int(nccl_comm.ptr), provider="NCCL")
128128
assert size == ctx.get_num_ranks()
129129
assert rank == ctx.get_proc_rank()
130130
finally:
131131
ctx = None
132132
cp.cuda.Device().synchronize()
133-
nccl.comm_finalize(nccl_comm_ptr)
134-
nccl.comm_destroy(nccl_comm_ptr)
133+
nccl_comm.finalize()
134+
nccl_comm.destroy()

0 commit comments

Comments
 (0)