Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion python/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ def new_init(self, *args, **kwargs):
set_grad_enabled,
)
from .device import ( # noqa: F401
PaddleStream as Stream,
Event,
Stream,
device_guard,
get_cudnn_version,
get_default_device,
Expand All @@ -247,6 +248,7 @@ def new_init(self, *args, **kwargs):
is_compiled_with_ipu,
is_compiled_with_rocm,
is_compiled_with_xpu,
set_default_device,
set_device,
)
from .distributed import DataParallel
Expand Down Expand Up @@ -933,6 +935,7 @@ def __dir__(self):
raise err
kernel32.SetErrorMode(prev_error_mode)


disable_static()

from .pir_utils import IrGuard
Expand Down
118 changes: 117 additions & 1 deletion python/paddle/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,17 @@
import paddle
from paddle import base, core, device as paddle_device, framework
from paddle.device import (
PaddleStream as Stream,
Event,
Stream,
_device_to_paddle as _device_to_paddle,
device,
is_available as _device_is_available,
is_bf16_supported,
is_current_stream_capturing as _is_current_stream_capturing,
manual_seed,
manual_seed_all as device_manual_seed_all,
reset_peak_memory_stats,
set_stream,
stream_guard as _PaddleStreamGuard,
)

Expand Down Expand Up @@ -644,6 +650,109 @@ def memory_allocated(device: DeviceLike = None) -> int:
return paddle_device.memory_allocated(device)


def max_memory_allocated(device: DeviceLike = None) -> int:
'''
Return the peak size of memory that is allocated to tensor of the given device.

Note:
The size of memory allocated to tensor is 256-byte aligned in Paddle, which may larger than the memory size that tensor actually need.
For instance, a float32 0-D Tensor with shape [] will take up 256 bytes memory, even though storing a float32 data requires only 4 bytes.

Args:
device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Return:
int: The peak size of memory that is allocated to tensor of the given device, in bytes.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu') # or '<custom_device>'

>>> max_memory_allocated_size = paddle.cuda.max_memory_allocated(paddle.CUDAPlace(0))
>>> max_memory_allocated_size = paddle.cuda.max_memory_allocated(0)
>>> max_memory_allocated_size = paddle.cuda.max_memory_allocated("gpu:0")
'''
return paddle_device.max_memory_allocated(device)


def max_memory_reserved(device: DeviceLike = None) -> int:
'''
Return the peak size of memory that is held by the allocator of the given device.

Args:
device(paddle.Place|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Return:
int: The peak size of memory that is held by the allocator of the given device, in bytes.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu') # or '<custom_device>'

>>> max_memory_reserved_size = paddle.cuda.max_memory_reserved(paddle.CUDAPlace(0))
>>> max_memory_reserved_size = paddle.cuda.max_memory_reserved(0)
>>> max_memory_reserved_size = paddle.cuda.max_memory_reserved("gpu:0")
'''
return paddle_device.max_memory_reserved(device)


def reset_max_memory_allocated(device: DeviceLike | None = None) -> None:
'''
Reset the peak size of memory that is allocated to tensor of the given device.

Args:
device(paddle.Place|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu') # or '<custom_device>'

>>> paddle.cuda.reset_max_memory_allocated(paddle.CUDAPlace(0))
>>> paddle.cuda.reset_max_memory_allocated(0)
>>> paddle.cuda.reset_max_memory_allocated("gpu:0")
'''

return paddle_device.reset_max_memory_allocated(device)


def reset_max_memory_reserved(device: DeviceLike | None = None) -> None:
'''
Reset the peak size of memory that is held by the allocator of the given device.

Args:
device(paddle.Place|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu') # or '<custom_device>'

>>> paddle.cuda.reset_max_memory_reserved(paddle.CUDAPlace(0))
>>> paddle.cuda.reset_max_memory_reserved(0)
>>> paddle.cuda.reset_max_memory_reserved("gpu:0")
'''
return paddle_device.reset_max_memory_reserved(device)


def memory_reserved(device: DeviceLike = None) -> int:
"""
Return the current device memory managed by the caching allocator in bytes for a given device.
Expand Down Expand Up @@ -796,7 +905,14 @@ def get_stream_from_external(
"memory_allocated",
"memory_reserved",
"set_device",
"set_stream",
"manual_seed_all",
"get_rng_state",
"set_rng_state",
"device",
"is_bf16_supported",
"manual_seed",
"max_memory_allocated",
"reset_peak_memory_stats",
"Event",
]
Loading