diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 9bfc7c9d91791..1414f9490d686 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -234,7 +234,8 @@ def new_init(self, *args, **kwargs): set_grad_enabled, ) from .device import ( # noqa: F401 - PaddleStream as Stream, + Event, + Stream, device_guard, get_cudnn_version, get_default_device, @@ -247,6 +248,7 @@ def new_init(self, *args, **kwargs): is_compiled_with_ipu, is_compiled_with_rocm, is_compiled_with_xpu, + set_default_device, set_device, ) from .distributed import DataParallel @@ -933,6 +935,7 @@ def __dir__(self): raise err kernel32.SetErrorMode(prev_error_mode) + disable_static() from .pir_utils import IrGuard diff --git a/python/paddle/cuda/__init__.py b/python/paddle/cuda/__init__.py index 650df07b77c87..277f2d3248626 100644 --- a/python/paddle/cuda/__init__.py +++ b/python/paddle/cuda/__init__.py @@ -21,11 +21,17 @@ import paddle from paddle import base, core, device as paddle_device, framework from paddle.device import ( - PaddleStream as Stream, + Event, + Stream, _device_to_paddle as _device_to_paddle, + device, is_available as _device_is_available, + is_bf16_supported, is_current_stream_capturing as _is_current_stream_capturing, + manual_seed, manual_seed_all as device_manual_seed_all, + reset_peak_memory_stats, + set_stream, stream_guard as _PaddleStreamGuard, ) @@ -644,6 +650,109 @@ def memory_allocated(device: DeviceLike = None) -> int: return paddle_device.memory_allocated(device) +def max_memory_allocated(device: DeviceLike = None) -> int: + ''' + Return the peak size of memory that is allocated to tensor of the given device. + + Note: + The size of memory allocated to tensor is 256-byte aligned in Paddle, which may larger than the memory size that tensor actually need. + For instance, a float32 0-D Tensor with shape [] will take up 256 bytes memory, even though storing a float32 data requires only 4 bytes. + + Args: + device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or + the string name of device like 'gpu:x'. If device is None, the device is the current device. + Default: None. + + Return: + int: The peak size of memory that is allocated to tensor of the given device, in bytes. + + Examples: + .. code-block:: python + + >>> # doctest: +REQUIRES(env:GPU) + >>> import paddle + >>> paddle.device.set_device('gpu') # or '' + + >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated(paddle.CUDAPlace(0)) + >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated(0) + >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated("gpu:0") + ''' + return paddle_device.max_memory_allocated(device) + + +def max_memory_reserved(device: DeviceLike = None) -> int: + ''' + Return the peak size of memory that is held by the allocator of the given device. + + Args: + device(paddle.Place|int|str|None, optional): The device, the id of the device or + the string name of device like 'gpu:x'. If device is None, the device is the current device. + Default: None. + + Return: + int: The peak size of memory that is held by the allocator of the given device, in bytes. + + Examples: + .. code-block:: python + + >>> # doctest: +REQUIRES(env:GPU) + >>> import paddle + >>> paddle.device.set_device('gpu') # or '' + + >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved(paddle.CUDAPlace(0)) + >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved(0) + >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved("gpu:0") + ''' + return paddle_device.max_memory_reserved(device) + + +def reset_max_memory_allocated(device: DeviceLike | None = None) -> None: + ''' + Reset the peak size of memory that is allocated to tensor of the given device. + + Args: + device(paddle.Place|int|str|None, optional): The device, the id of the device or + the string name of device like 'gpu:x'. If device is None, the device is the current device. + Default: None. + + Examples: + .. code-block:: python + + >>> # doctest: +REQUIRES(env:GPU) + >>> import paddle + >>> paddle.device.set_device('gpu') # or '' + + >>> paddle.cuda.reset_max_memory_allocated(paddle.CUDAPlace(0)) + >>> paddle.cuda.reset_max_memory_allocated(0) + >>> paddle.cuda.reset_max_memory_allocated("gpu:0") + ''' + + return paddle_device.reset_max_memory_allocated(device) + + +def reset_max_memory_reserved(device: DeviceLike | None = None) -> None: + ''' + Reset the peak size of memory that is held by the allocator of the given device. + + Args: + device(paddle.Place|int|str|None, optional): The device, the id of the device or + the string name of device like 'gpu:x'. If device is None, the device is the current device. + Default: None. + + Examples: + .. code-block:: python + + >>> # doctest: +REQUIRES(env:GPU) + >>> import paddle + >>> paddle.device.set_device('gpu') # or '' + + >>> paddle.cuda.reset_max_memory_reserved(paddle.CUDAPlace(0)) + >>> paddle.cuda.reset_max_memory_reserved(0) + >>> paddle.cuda.reset_max_memory_reserved("gpu:0") + ''' + return paddle_device.reset_max_memory_reserved(device) + + def memory_reserved(device: DeviceLike = None) -> int: """ Return the current device memory managed by the caching allocator in bytes for a given device. @@ -796,7 +905,14 @@ def get_stream_from_external( "memory_allocated", "memory_reserved", "set_device", + "set_stream", "manual_seed_all", "get_rng_state", "set_rng_state", + "device", + "is_bf16_supported", + "manual_seed", + "max_memory_allocated", + "reset_peak_memory_stats", + "Event", ] diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 5d79c5bd07a81..6450ca62813d9 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -73,6 +73,7 @@ empty_cache, get_device_properties as _get_device_properties, get_rng_state, + manual_seed, max_memory_allocated, max_memory_reserved, memory_allocated, @@ -88,6 +89,7 @@ device_count, empty_cache, get_rng_state, + manual_seed, max_memory_allocated, max_memory_reserved, memory_allocated, @@ -109,6 +111,7 @@ empty_cache, get_device_properties as _get_device_properties, get_rng_state, + manual_seed, max_memory_allocated, max_memory_reserved, memory_allocated, @@ -122,6 +125,11 @@ from .cpu import ( device_count, get_rng_state, + manual_seed, + max_memory_allocated, + max_memory_reserved, + reset_max_memory_allocated, + reset_max_memory_reserved, set_rng_state, ) @@ -165,6 +173,10 @@ 'get_device_capability', 'get_rng_state', 'set_rng_state', + 'device', + 'is_bf16_supported', + 'manual_seed', + 'reset_peak_memory_stats', ] _cudnn_version = None @@ -442,7 +454,72 @@ def _convert_to_place(device: PlaceLike) -> Place: return place -def set_device(device: str) -> PlaceLike: +class device: + r"""Context-manager that changes the selected device. + + Args: + device (paddle.Place, int or str): device index to select. + + Examples: + .. code-block:: python + >>> import paddle + + >>> print(paddle.device.get_device()) # gpu:0 + >>> with paddle.device.device("cpu"): + ... print(paddle.device.get_device()) # cpu + + >>> # paddle.cuda.device is an alias of paddle.device.device + >>> with paddle.cuda.device("cpu"): + ... print(paddle.device.get_device()) # cpu + >>> print(paddle.device.get_device()) + """ + + def __init__(self, device: Place | int | str | None = None): + self.place = device_to_place(device) + self.prev_place_str = "-1" + + def __enter__(self): + self.prev_place_str = get_device() + set_device(self.place) + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, + ) -> bool | None: + set_device(self.prev_place_str) + return False + + +def is_bf16_supported(including_emulation: bool = True) -> bool: + """ + Return a bool indicating if the current CUDA/ROCm device supports dtype bfloat16. + + Args: + including_emulation (bool = True): Whether to treat software-emulated BF16 as supported; if False, only native hardware BF16 support is considered. + + Returns: + bool: A boolean value which indicates whether the current CUDA/ROCm device supports dtype bfloat16. + + Examples: + + .. code-block:: python + + >>> import paddle + + >>> paddle.device.is_bf16_supported() + >>> # paddle.cuda.is_bf16_supported() is an alias of paddle.device.is_bf16_supported() + >>> paddle.cuda.is_bf16_supported() + + """ + # including_emulation is not used here, but kept for compatibility with the original implementation + return core.is_bfloat16_supported( + paddle.framework._current_expected_place() + ) + + +def set_device(device: PlaceLike | int) -> PlaceLike: """ Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU. @@ -450,7 +527,7 @@ def set_device(device: str) -> PlaceLike: which the OP will run. Args: - device(str): This parameter determines the specific running device. + device(str, Place or int): This parameter determines the specific running device. It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``, where ``x`` is the index of the GPUs, XPUs or NPUs. @@ -469,12 +546,12 @@ def set_device(device: str) -> PlaceLike: >>> data = paddle.stack([x1,x2], axis=1) """ - place = _convert_to_place(device) + place = device_to_place(device) framework._set_expected_place(place) return place -def get_device() -> str: +def get_device(input: paddle.Tensor = None) -> str | int: """ This function can get the current global device of the program is running. @@ -482,6 +559,18 @@ def get_device() -> str: set, it will return a string which is 'gpu:x' when cuda is available or it will return a string which is 'cpu' when cuda is not available. + Returns: + if input is Tensor, this function will return the device ID where the given Tensor is located. + int: + - -1, if the Tensor is on CPU. + - The device ID (e.g., 0, 1, ...) if the Tensor is on GPU. + + if input is not Tensor, this function will return the device name where the program is running. + str: + - 'cpu': If the program is running on CPU. + - 'gpu:x': If the program is running on GPU, where `x` is the index of the GPU. + - 'xpu:x': If the program is running on XPU, where `x` is the index of the XPU. + - 'npu:x': If the program is running on NPU, where `x` is the index of Examples: .. code-block:: python @@ -489,7 +578,16 @@ def get_device() -> str: >>> import paddle >>> device = paddle.device.get_device() + >>> x_cpu = paddle.to_tensor([1, 2, 3], place=paddle.CPUPlace()) + >>> id = paddle.get_device(x_cpu) # -1 + + + """ + if isinstance(input, paddle.Tensor): + if 'cpu' in str(input.place): + return -1 + return input.place.gpu_device_id() device = '' place = framework._current_expected_place_() if isinstance(place, core.CPUPlace): @@ -525,6 +623,25 @@ def get_default_device() -> paddle.device: return paddle.device(get_device().replace("gpu", "cuda")) +def set_default_device(device: PlaceLike | int) -> None: + """ + Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU. + This function can specify the global device which the OP will run. + + Args: + device(str, Place or int): This parameter determines the specific running device. + It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``, + where ``x`` is the index of the GPUs, XPUs or NPUs. + + Examples: + .. code-block:: python + + >>> import paddle + >>> paddle.device.set_device("cpu") + """ + set_device(device) + + def get_all_device_type() -> list[str]: """ @@ -1049,13 +1166,14 @@ class Stream: ''' A device stream wrapper around StreamBase. + paddle.cuda.Stream() is equivalent to paddle.device.Stream(). Args: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)|None): Which device the stream run on. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevice, where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). priority(int, optional): priority of the CUDA stream. Can be either - 1 (high priority) or 2 (low priority). By default, streams have + 1 or -1 (high priority) or 0 or 2 (low priority). By default, streams have priority 2. Returns: @@ -1076,11 +1194,12 @@ class Stream: ''' stream_base: _InitStreamBase - device: PlaceLike + device: PlaceLike | int + _priority_map: dict[int, int] = {-1: 1, 0: 2, 1: 1, 2: 2} def __init__( self, - device: PlaceLike | None = None, + device: PlaceLike | int | None = None, priority: int = 2, stream_base: _InitStreamBase | None = None, ) -> None: @@ -1096,13 +1215,7 @@ def __init__( "stream_base should be CUDAStream, XPUStream, CustomDeviceStream" ) return - - if device is None: - self.device = paddle.framework._current_expected_place_() - elif isinstance(device, str): - self.device = paddle.device._convert_to_place(device) - else: - self.device = device + self.device = device_to_place(device) device_id = ( self.device.get_device_id() @@ -1114,7 +1227,7 @@ def __init__( if hasattr(self.device, 'get_device_type') else None ) - + priority = self._priority_map.get(priority, 2) self.stream_base = _create_stream_base( device_id=device_id, priority=priority, @@ -1296,40 +1409,6 @@ def _device_to_paddle( return dev -class PaddleStream(Stream): - """Wrapper class for Paddle CUDA/XPU Stream, supporting standard device/priority handling. - - This class inherits from the base `Stream` (renamed to `StreamBase` to avoid naming conflict) - and adds: - 1. Unified device string conversion via `_device_to_paddle` - 2. Priority mapping for user-friendly priority values - 3. Clear parameter validation and error handling - - Attributes: - _priority_map (dict[int, int]): Mapping from user-facing priority values to Paddle internal priority codes. - - User input: -1 (high priority), 0/2 (low priority), 1 (high priority) - - Internal code: 1 (high), 2 (low) - """ - - _priority_map: dict[int, int] = {-1: 1, 0: 2, 1: 1, 2: 2} - - def __init__( - self, - device: paddle.CUDAPlace | paddle.CustomPlace | int | str | None = None, - priority: int = 0, - *args, - **kwargs, - ): - paddle_device = _device_to_paddle(device) - paddle_priority = self._priority_map.get(priority, 2) - super().__init__( - device=paddle_device, - priority=paddle_priority, - *args, - **kwargs, - ) - - def current_stream(device: PlaceLike | None = None) -> Stream: ''' @@ -1404,6 +1483,7 @@ def set_stream(stream: Stream) -> Stream: >>> paddle.set_device('custom_cpu') >>> s = paddle.device.Stream() + >>> # paddle.cuda.set_stream(s) is equivalent to paddle.device.set_stream(s) >>> paddle.device.set_stream(s) ''' @@ -1675,6 +1755,27 @@ def manual_seed_all(seed: int) -> None: paddle.seed(seed) +def reset_peak_memory_stats(device: PlaceLike | int | None = None) -> None: + """ + Resets all devices' peak memory statistics. + + This method resets the peak memory usage recorded for each device during the execution of the program. + It sets the peak memory usage back to zero for all devices. + + Example: + >>> # doctest: +REQUIRES(env:GPU) + >>> import paddle + >>> paddle.device.set_device('gpu') # or '' + + >>> # paddle.cuda.reset_max_memory_allocated() is equivalent to paddle.device.reset_max_memory_allocated() + + >>> paddle.device.reset_max_memory_allocated(paddle.CUDAPlace(0)) + >>> paddle.device.reset_max_memory_allocated(0) + >>> paddle.device.reset_max_memory_allocated("gpu:0") + """ + reset_max_memory_allocated() + + class Device(str): """ Paddle computing device. diff --git a/python/paddle/device/cpu.py b/python/paddle/device/cpu.py index c9706a812733d..af7914f7fd44a 100644 --- a/python/paddle/device/cpu.py +++ b/python/paddle/device/cpu.py @@ -107,3 +107,75 @@ def set_rng_state( >>> paddle.device.set_rng_state(state) """ core.default_cpu_generator().set_state(new_state) + + +def manual_seed(seed: int) -> None: + r"""Set the seed for generating random numbers for the current Device. + + .. warning:: + If you are working with a multi-Device model, this function is insufficient + to get determinism. To seed all Devices, use :func:`manual_seed_all`. + + Sets the seed for global default generator, which manages the random number generation. + + Args: + seed(int): The random seed to set. + + Returns: + None + + Examples: + .. code-block:: python + + >>> import paddle + >>> paddle.device.manual_seed(102) + >>> # paddle.cuda.manual_seed(102) is equivalent to paddle.device.manual_seed(102) + >>> paddle.cuda.manual_seed(102) + + """ + seed = int(seed) + core.default_cpu_generator().manual_seed(seed) + + +def max_memory_allocated(device: _CPUPlaceLike | None = None) -> int: + r""" + The API max_memory_allocated is not supported in CPU PaddlePaddle. + Please reinstall PaddlePaddle with GPU or XPU support to call this API. + """ + raise ValueError( + "The API paddle.device.max_memory_allocated is not supported in CPU PaddlePaddle. " + "Please reinstall PaddlePaddle with GPU or XPU support to call this API." + ) + + +def max_memory_reserved(device: _CPUPlaceLike | None = None) -> int: + r""" + The API max_memory_reserved is not supported in CPU PaddlePaddle. + Please reinstall PaddlePaddle with GPU or XPU support to call this API. + """ + raise ValueError( + "The API paddle.device.max_memory_reserved is not supported in CPU PaddlePaddle. " + "Please reinstall PaddlePaddle with GPU or XPU support to call this API." + ) + + +def reset_max_memory_allocated(device: _CPUPlaceLike | None = None) -> None: + r""" + The API reset_max_memory_allocated is not supported in CPU PaddlePaddle. + Please reinstall PaddlePaddle with GPU or XPU support to call this API. + """ + raise ValueError( + "The API paddle.device.reset_max_memory_allocated is not supported in CPU PaddlePaddle. " + "Please reinstall PaddlePaddle with GPU or XPU support to call this API." + ) + + +def reset_max_memory_reserved(device: _CPUPlaceLike | None = None) -> None: + r""" + The API reset_max_memory_reserved is not supported in CPU PaddlePaddle. + Please reinstall PaddlePaddle with GPU or XPU support to call this API. + """ + raise ValueError( + "The API paddle.device.reset_max_memory_reserved is not supported in CPU PaddlePaddle. " + "Please reinstall PaddlePaddle with GPU or XPU support to call this API." + ) diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py index 3bc294527f21a..ceaf180451b19 100644 --- a/python/paddle/device/cuda/__init__.py +++ b/python/paddle/device/cuda/__init__.py @@ -776,3 +776,36 @@ def set_rng_state( core.default_cpu_generator().set_state(new_state) else: core.default_cuda_generator(place.get_device_id()).set_state(new_state) + + +def manual_seed(seed: int) -> None: + """Set the seed for generating random numbers for the current Device. + + .. warning:: + If you are working with a multi-Device model, this function is insufficient + to get determinism. To seed all Devices, use :func:`manual_seed_all`. + If current Device is CPU, this function will set the seed of the default CPU generator. + + Sets the seed for global default generator, which manages the random number generation. + + Args: + seed(int): The random seed to set. + + Returns: + None + + Examples: + .. code-block:: python + >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE) + >>> import paddle + >>> paddle.device.manual_seed(102) + >>> # paddle.cuda.manual_seed(102) is equivalent to paddle.device.manual_seed(102) + >>> paddle.cuda.manual_seed(102) + + """ + seed = int(seed) + place = paddle.framework._current_expected_place_() + if isinstance(place, core.CPUPlace): + core.default_cpu_generator().manual_seed(seed) + else: + core.default_cuda_generator(place.get_device_id()).manual_seed(seed) diff --git a/python/paddle/device/custom_device.py b/python/paddle/device/custom_device.py index 06b631f48cc1a..7075f60209582 100644 --- a/python/paddle/device/custom_device.py +++ b/python/paddle/device/custom_device.py @@ -570,3 +570,35 @@ def set_rng_state( core.default_cpu_generator().set_state(new_state) else: core.default_custom_device_generator(place).set_state(new_state) + + +def manual_seed(seed: int) -> None: + r"""Set the seed for generating random numbers for the current Device. + + .. warning:: + If you are working with a multi-Device model, this function is insufficient + to get determinism. To seed all Devices, use :func:`manual_seed_all`. + If current Device is CPU, this function will set the seed of the default CPU generator. + + Sets the seed for global default generator, which manages the random number generation. + + Args: + seed(int): The random seed to set. + + Returns: + None + + Examples: + .. code-block:: python + >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE) + >>> import paddle + >>> paddle.device.manual_seed(102) + >>> # paddle.cuda.manual_seed(102) is equivalent to paddle.device.manual_seed(102) + + """ + seed = int(seed) + place = paddle.framework._current_expected_place() + if isinstance(place, core.CPUPlace): + core.default_cpu_generator().manual_seed(seed) + else: + core.default_custom_device_generator(place).manual_seed(seed) diff --git a/python/paddle/device/xpu/__init__.py b/python/paddle/device/xpu/__init__.py index 8f585658a3472..15563a640aa97 100644 --- a/python/paddle/device/xpu/__init__.py +++ b/python/paddle/device/xpu/__init__.py @@ -579,3 +579,36 @@ def set_rng_state( core.default_cpu_generator().set_state(new_state) else: core.default_xpu_generator(place.get_device_id()).set_state(new_state) + + +def manual_seed(seed: int) -> None: + r"""Set the seed for generating random numbers for the current Device. + + .. warning:: + If you are working with a multi-Device model, this function is insufficient + to get determinism. To seed all Devices, use :func:`manual_seed_all`. + If current Device is CPU, this function will set the seed of the default CPU generator. + + Sets the seed for global default generator, which manages the random number generation. + + Args: + seed(int): The random seed to set. + + Returns: + None + + Examples: + .. code-block:: python + >>> # doctest: +REQUIRES(env:XPU) + >>> import paddle + >>> paddle.device.manual_seed(102) + >>> # paddle.cuda.manual_seed(102) is equivalent to paddle.device.manual_seed(102) + >>> paddle.cuda.manual_seed(102) + + """ + seed = int(seed) + place = paddle.framework._current_expected_place_() + if isinstance(place, core.CPUPlace): + core.default_cpu_generator().manual_seed(seed) + else: + core.default_xpu_generator(place.get_device_id()).manual_seed(seed) diff --git a/test/compat/test_device_apis.py b/test/compat/test_device_apis.py index 04a499aa3173e..894241564edf4 100644 --- a/test/compat/test_device_apis.py +++ b/test/compat/test_device_apis.py @@ -27,6 +27,51 @@ def is_custom_device(): return False +def only_has_cpu(): + return ( + not core.is_compiled_with_cuda() + and not core.is_compiled_with_xpu() + and not is_custom_device() + ) + + +class TestErrorCPU(unittest.TestCase): + def test_max_memory_allocated_raises_on_cpu(self): + if only_has_cpu(): + with self.assertRaisesRegex( + ValueError, "not supported in CPU PaddlePaddle" + ): + paddle.cuda.max_memory_allocated() + with self.assertRaisesRegex( + ValueError, "not supported in CPU PaddlePaddle" + ): + paddle.device.max_memory_allocated() + with self.assertRaisesRegex( + ValueError, "not supported in CPU PaddlePaddle" + ): + paddle.cuda.max_memory_reserved() + with self.assertRaisesRegex( + ValueError, "not supported in CPU PaddlePaddle" + ): + paddle.device.max_memory_reserved() + with self.assertRaisesRegex( + ValueError, "not supported in CPU PaddlePaddle" + ): + paddle.cuda.reset_max_memory_allocated() + with self.assertRaisesRegex( + ValueError, "not supported in CPU PaddlePaddle" + ): + paddle.device.reset_max_memory_allocated() + with self.assertRaisesRegex( + ValueError, "not supported in CPU PaddlePaddle" + ): + paddle.cuda.reset_max_memory_reserved() + with self.assertRaisesRegex( + ValueError, "not supported in CPU PaddlePaddle" + ): + paddle.device.reset_max_memory_reserved() + + class TestDeviceAPIs(unittest.TestCase): """Test paddle.device APIs across different hardware types.""" @@ -164,6 +209,23 @@ def test_memory_apis_cuda(self): self.assertIsInstance(mem7, int) self.assertGreaterEqual(mem7, 0) + # Test max_memory_allocated with different input types + mem1 = paddle.cuda.max_memory_allocated() + self.assertIsInstance(mem1, int) + self.assertGreaterEqual(mem1, 0) + + mem2 = paddle.cuda.max_memory_allocated('gpu:0') + self.assertIsInstance(mem2, int) + self.assertGreaterEqual(mem2, 0) + + mem3 = paddle.cuda.max_memory_allocated(0) + self.assertIsInstance(mem3, int) + self.assertGreaterEqual(mem3, 0) + + mem7 = paddle.cuda.max_memory_allocated(paddle.CUDAPlace(0)) + self.assertIsInstance(mem7, int) + self.assertGreaterEqual(mem7, 0) + # Test max_memory_reserved with different input types mem4 = paddle.device.max_memory_reserved() self.assertIsInstance(mem4, int) @@ -173,6 +235,14 @@ def test_memory_apis_cuda(self): self.assertIsInstance(mem8, int) self.assertGreaterEqual(mem8, 0) + mem4 = paddle.cuda.max_memory_reserved() + self.assertIsInstance(mem4, int) + self.assertGreaterEqual(mem4, 0) + + mem8 = paddle.cuda.max_memory_reserved('gpu:0') + self.assertIsInstance(mem8, int) + self.assertGreaterEqual(mem8, 0) + mem9 = paddle.device.max_memory_reserved(0) self.assertIsInstance(mem9, int) self.assertGreaterEqual(mem9, 0) @@ -508,11 +578,38 @@ def test_reset_memory_apis_cuda(self): paddle.device.reset_max_memory_allocated(0) paddle.device.reset_max_memory_allocated(paddle.CUDAPlace(0)) + # Test reset functions with different input types + paddle.device.reset_peak_memory_stats() + paddle.device.reset_peak_memory_stats('gpu:0') + paddle.device.reset_peak_memory_stats('cuda:0') + paddle.device.reset_peak_memory_stats(0) + paddle.device.reset_peak_memory_stats(paddle.CUDAPlace(0)) + + # Test reset functions with different input types + paddle.cuda.reset_peak_memory_stats() + paddle.cuda.reset_peak_memory_stats('gpu:0') + paddle.cuda.reset_peak_memory_stats(0) + paddle.cuda.reset_peak_memory_stats(paddle.CUDAPlace(0)) + paddle.device.reset_max_memory_reserved() paddle.device.reset_max_memory_reserved('gpu:0') + paddle.device.reset_max_memory_reserved('cuda:0') paddle.device.reset_max_memory_reserved(0) paddle.device.reset_max_memory_reserved(paddle.CUDAPlace(0)) + # Test reset functions with different input types + paddle.cuda.reset_max_memory_allocated() + paddle.cuda.reset_max_memory_allocated('gpu:0') + paddle.cuda.reset_max_memory_allocated('cuda:0') + paddle.cuda.reset_max_memory_allocated(0) + paddle.cuda.reset_max_memory_allocated(paddle.CUDAPlace(0)) + + paddle.cuda.reset_max_memory_reserved() + paddle.cuda.reset_max_memory_reserved('gpu:0') + paddle.cuda.reset_max_memory_reserved('cuda:0') + paddle.cuda.reset_max_memory_reserved(0) + paddle.cuda.reset_max_memory_reserved(paddle.CUDAPlace(0)) + # Check that max memory has been reset max_allocated_after_reset = paddle.device.max_memory_allocated() max_reserved_after_reset = paddle.device.max_memory_reserved() diff --git a/test/compat/test_event_stream_apis.py b/test/compat/test_event_stream_apis.py index 311bac55b7a1e..926f74fc0ba38 100644 --- a/test/compat/test_event_stream_apis.py +++ b/test/compat/test_event_stream_apis.py @@ -116,6 +116,9 @@ def _test_event_stream_apis_impl(self, device_str): prev_stream = paddle.device.set_stream(stream1) self.assertIsInstance(prev_stream, paddle.device.Stream) + prev_stream = paddle.cuda.set_stream(stream1) + self.assertIsInstance(prev_stream, paddle.cuda.Stream) + # Test Event.record() with default stream event1.record() # Query result may be True immediately for some devices @@ -350,5 +353,54 @@ def test_event_stream_timing_functionality(self): self.assertGreater(elapsed_time, 0) # Should take some time +class TestEventAPIs(unittest.TestCase): + """Unified test for paddle.Event, paddle.device.Event, and paddle.cuda.Event.""" + + def setUp(self): + if not paddle.device.is_compiled_with_cuda(): + self.skipTest("This test requires CUDA.") + self.device = "gpu:0" + paddle.device.set_device(self.device) + + self.event_classes = [ + ("paddle.Event", paddle.Event), + ("paddle.cuda.Event", paddle.cuda.Event), + ] + + def test_event_timing_consistency(self): + """Check timing consistency across different Event APIs.""" + for name, EventCls in self.event_classes: + with self.subTest(api=name): + start = EventCls(enable_timing=True) + end = EventCls(enable_timing=True) + + start.record() + + x = paddle.randn([2048, 2048], dtype="float32") + y = paddle.randn([2048, 2048], dtype="float32") + z = paddle.matmul(x, y) + _ = z.mean() + + end.record() + end.synchronize() + + elapsed = start.elapsed_time(end) + self.assertIsInstance(elapsed, (int, float)) + self.assertGreater( + elapsed, + 0.0, + f"{name} should measure positive elapsed time.", + ) + + def test_event_methods_available(self): + """Ensure all Event variants expose expected methods.""" + for name, EventCls in self.event_classes: + with self.subTest(api=name): + e = EventCls(enable_timing=True) + self.assertTrue(hasattr(e, "record")) + self.assertTrue(hasattr(e, "synchronize")) + self.assertTrue(hasattr(e, "elapsed_time")) + + if __name__ == '__main__': unittest.main() diff --git a/test/compat/test_paddle_cuda_apis.py b/test/compat/test_paddle_cuda_apis.py index 7c35079390373..4531a92498023 100644 --- a/test/compat/test_paddle_cuda_apis.py +++ b/test/compat/test_paddle_cuda_apis.py @@ -464,5 +464,58 @@ def test_set_device_invalid_param(self): self.assertIn("Unsupported device type", str(context.exception)) +class TestBf16Supported(unittest.TestCase): + def test_is_bf16_supported(self): + self.assertIsInstance(paddle.cuda.is_bf16_supported(), bool) + self.assertIsInstance(paddle.device.is_bf16_supported(), bool) + self.assertIsInstance(paddle.device.is_bf16_supported(True), bool) + self.assertIsInstance(paddle.cuda.is_bf16_supported(False), bool) + if should_skip_tests(): + self.assertFalse(paddle.cuda.is_bf16_supported()) + self.assertFalse(paddle.device.is_bf16_supported()) + + +class TestManualSeed(unittest.TestCase): + def test_device_manual_seed(self): + paddle.device.manual_seed(102) + x1 = paddle.randn([2, 3]) + + paddle.device.manual_seed(999) + x2 = paddle.randn([2, 3]) + + paddle.device.manual_seed(102) + x3 = paddle.randn([2, 3]) + + self.assertTrue( + paddle.equal_all(x1, x3), + "Random outputs should be identical with the same seed", + ) + + self.assertFalse( + paddle.equal_all(x1, x2), + "Random outputs should differ with different seeds", + ) + + def test_cuda_manual_seed(self): + paddle.cuda.manual_seed(102) + x1 = paddle.randn([2, 3], dtype='float32') + + paddle.cuda.manual_seed(999) + x2 = paddle.randn([2, 3], dtype='float32') + + paddle.cuda.manual_seed(102) + x3 = paddle.randn([2, 3], dtype='float32') + + self.assertTrue( + paddle.equal_all(x1, x3), + "Random outputs should be identical with the same seed", + ) + + self.assertFalse( + paddle.equal_all(x1, x2), + "Random outputs should differ with different seeds", + ) + + if __name__ == '__main__': unittest.main() diff --git a/test/legacy_test/test_cuda_unittest.py b/test/legacy_test/test_cuda_unittest.py index 9e82878fe1f14..c0c4bce76ccd0 100644 --- a/test/legacy_test/test_cuda_unittest.py +++ b/test/legacy_test/test_cuda_unittest.py @@ -151,6 +151,22 @@ def test_get_default_device(self): if paddle.is_compiled_with_cuda(): self.assertEqual(paddle.get_default_device(), paddle.device('cuda')) + def test_get_device(self): + x_cpu = paddle.to_tensor([1, 2, 3], place=paddle.CPUPlace()) + self.assertEqual(paddle.get_device(x_cpu), -1) + if paddle.device.is_compiled_with_cuda(): + x_gpu = paddle.to_tensor([1, 2, 3], place=paddle.CUDAPlace(0)) + self.assertEqual(paddle.get_device(x_gpu), 0) + + def test_set_default_device(self): + if paddle.is_compiled_with_cuda(): + paddle.set_default_device("gpu") + self.assertEqual(paddle.get_default_device(), paddle.device('cuda')) + + if paddle.is_compiled_with_xpu(): + paddle.set_default_device("xpu") + self.assertEqual(paddle.get_default_device(), paddle.device('xpu')) + @unittest.skipIf( ( not paddle.device.is_compiled_with_cuda() @@ -347,5 +363,21 @@ def test_get_stream_from_external(self): ) +class TestDeviceDvice(unittest.TestCase): + def test_device_device(self): + current = paddle.device.get_device() + with paddle.device.device("cpu"): + self.assertEqual(paddle.device.get_device(), 'cpu') + self.assertEqual(paddle.device.get_device(), current) + + +class TestCudaDvice(unittest.TestCase): + def test_device_device(self): + current = paddle.device.get_device() + with paddle.cuda.device("cpu"): + self.assertEqual(paddle.device.get_device(), 'cpu') + self.assertEqual(paddle.device.get_device(), current) + + if __name__ == '__main__': unittest.main()