PaddlePaddle · yongqiangma · Oct 13, 2025 · Oct 9, 2025 · Oct 9, 2025 · Oct 9, 2025
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
@@ -234,7 +234,8 @@ def new_init(self, *args, **kwargs):
     set_grad_enabled,
 )
 from .device import (  # noqa: F401
-    PaddleStream as Stream,
+    Event,
+    Stream,
     device_guard,
     get_cudnn_version,
     get_default_device,
@@ -247,6 +248,7 @@ def new_init(self, *args, **kwargs):
     is_compiled_with_ipu,
     is_compiled_with_rocm,
     is_compiled_with_xpu,
+    set_default_device,
     set_device,
 )
 from .distributed import DataParallel
@@ -933,6 +935,7 @@ def __dir__(self):
                         raise err
             kernel32.SetErrorMode(prev_error_mode)
 
+
 disable_static()
 
 from .pir_utils import IrGuard

diff --git a/python/paddle/cuda/__init__.py b/python/paddle/cuda/__init__.py
@@ -21,11 +21,17 @@
 import paddle
 from paddle import base, core, device as paddle_device, framework
 from paddle.device import (
-    PaddleStream as Stream,
+    Event,
+    Stream,
     _device_to_paddle as _device_to_paddle,
+    device,
     is_available as _device_is_available,
+    is_bf16_supported,
     is_current_stream_capturing as _is_current_stream_capturing,
+    manual_seed,
     manual_seed_all as device_manual_seed_all,
+    reset_peak_memory_stats,
+    set_stream,
     stream_guard as _PaddleStreamGuard,
 )
 
@@ -644,6 +650,109 @@ def memory_allocated(device: DeviceLike = None) -> int:
     return paddle_device.memory_allocated(device)
 
 
+def max_memory_allocated(device: DeviceLike = None) -> int:
+    '''
+    Return the peak size of memory that is allocated to tensor of the given device.
+
+    Note:
+        The size of memory allocated to tensor is 256-byte aligned in Paddle, which may larger than the memory size that tensor actually need.
+        For instance, a float32 0-D Tensor with shape [] will take up 256 bytes memory, even though storing a float32 data requires only 4 bytes.
+
+    Args:
+        device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
+            the string name of device like 'gpu:x'. If device is None, the device is the current device.
+            Default: None.
+
+    Return:
+        int: The peak size of memory that is allocated to tensor of the given device, in bytes.
+
+    Examples:
+        .. code-block:: python
+
+            >>> # doctest: +REQUIRES(env:GPU)
+            >>> import paddle
+            >>> paddle.device.set_device('gpu')  # or '<custom_device>'
+
+            >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated(paddle.CUDAPlace(0))
+            >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated(0)
+            >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated("gpu:0")
+    '''
+    return paddle_device.max_memory_allocated(device)
+
+
+def max_memory_reserved(device: DeviceLike = None) -> int:
+    '''
+    Return the peak size of memory that is held by the allocator of the given device.
+
+    Args:
+        device(paddle.Place|int|str|None, optional): The device, the id of the device or
+            the string name of device like 'gpu:x'. If device is None, the device is the current device.
+            Default: None.
+
+    Return:
+        int: The peak size of memory that is held by the allocator of the given device, in bytes.
+
+    Examples:
+        .. code-block:: python
+
+            >>> # doctest: +REQUIRES(env:GPU)
+            >>> import paddle
+            >>> paddle.device.set_device('gpu')  # or '<custom_device>'
+
+            >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved(paddle.CUDAPlace(0))
+            >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved(0)
+            >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved("gpu:0")
+    '''
+    return paddle_device.max_memory_reserved(device)
+
+
+def reset_max_memory_allocated(device: DeviceLike | None = None) -> None:
+    '''
+    Reset the peak size of memory that is allocated to tensor of the given device.
+
+    Args:
+        device(paddle.Place|int|str|None, optional): The device, the id of the device or
+            the string name of device like 'gpu:x'. If device is None, the device is the current device.
+            Default: None.
+
+    Examples:
+        .. code-block:: python
+
+            >>> # doctest: +REQUIRES(env:GPU)
+            >>> import paddle
+            >>> paddle.device.set_device('gpu')  # or '<custom_device>'
+
+            >>> paddle.cuda.reset_max_memory_allocated(paddle.CUDAPlace(0))
+            >>> paddle.cuda.reset_max_memory_allocated(0)
+            >>> paddle.cuda.reset_max_memory_allocated("gpu:0")
+    '''
+
+    return paddle_device.reset_max_memory_allocated(device)
+
+
+def reset_max_memory_reserved(device: DeviceLike | None = None) -> None:
+    '''
+    Reset the peak size of memory that is held by the allocator of the given device.
+
+    Args:
+        device(paddle.Place|int|str|None, optional): The device, the id of the device or
+            the string name of device like 'gpu:x'. If device is None, the device is the current device.
+            Default: None.
+
+    Examples:
+        .. code-block:: python
+
+            >>> # doctest: +REQUIRES(env:GPU)
+            >>> import paddle
+            >>> paddle.device.set_device('gpu')  # or '<custom_device>'
+
+            >>> paddle.cuda.reset_max_memory_reserved(paddle.CUDAPlace(0))
+            >>> paddle.cuda.reset_max_memory_reserved(0)
+            >>> paddle.cuda.reset_max_memory_reserved("gpu:0")
+    '''
+    return paddle_device.reset_max_memory_reserved(device)
+
+
 def memory_reserved(device: DeviceLike = None) -> int:
     """
     Return the current device memory managed by the caching allocator in bytes for a given device.
@@ -796,7 +905,14 @@ def get_stream_from_external(
     "memory_allocated",
     "memory_reserved",
     "set_device",
+    "set_stream",
     "manual_seed_all",
     "get_rng_state",
     "set_rng_state",
+    "device",
+    "is_bf16_supported",
+    "manual_seed",
+    "max_memory_allocated",
+    "reset_peak_memory_stats",
+    "Event",
 ]