|
39 | 39 | #include <shared_mutex> |
40 | 40 | #include <tuple> |
41 | 41 |
|
| 42 | +#include "paddle/common/macros.h" |
42 | 43 | #include "paddle/phi/backends/gpu/forwards.h" |
43 | 44 |
|
44 | 45 | namespace c10 { |
@@ -95,40 +96,41 @@ inline int64_t getNumGPUs() { return c10::cuda::device_count(); } |
95 | 96 | inline bool is_available() { return c10::cuda::device_count() > 0; } |
96 | 97 |
|
97 | 98 | #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) |
98 | | -CUDAContextDeviceProp* getCurrentDeviceProperties(); |
| 99 | +PADDLE_API CUDAContextDeviceProp* getCurrentDeviceProperties(); |
99 | 100 |
|
100 | | -int warp_size(); |
| 101 | +PADDLE_API int warp_size(); |
101 | 102 |
|
102 | | -CUDAContextDeviceProp* getDeviceProperties(c10::DeviceIndex device); |
| 103 | +PADDLE_API CUDAContextDeviceProp* getDeviceProperties(c10::DeviceIndex device); |
103 | 104 |
|
104 | | -bool canDeviceAccessPeer(c10::DeviceIndex device, c10::DeviceIndex peer_device); |
| 105 | +PADDLE_API bool canDeviceAccessPeer(c10::DeviceIndex device, |
| 106 | + c10::DeviceIndex peer_device); |
105 | 107 |
|
106 | 108 | /* Handles */ |
107 | | -CUDAContextSparseHandle getCurrentCUDASparseHandle(); |
108 | | -CUDAContextBlasHandle getCurrentCUDABlasHandle(); |
109 | | -CUDAContextBlasLtHandle getCurrentCUDABlasLtHandle(); |
| 109 | +PADDLE_API CUDAContextSparseHandle getCurrentCUDASparseHandle(); |
| 110 | +PADDLE_API CUDAContextBlasHandle getCurrentCUDABlasHandle(); |
| 111 | +PADDLE_API CUDAContextBlasLtHandle getCurrentCUDABlasLtHandle(); |
110 | 112 |
|
111 | | -void clearCublasWorkspaces(); |
| 113 | +PADDLE_API void clearCublasWorkspaces(); |
112 | 114 | struct WorkspaceMapWithMutex { |
113 | 115 | std::map<std::tuple<void*, void*>, at::DataPtr> map; |
114 | 116 | std::shared_mutex mutex; |
115 | 117 | }; |
116 | 118 |
|
117 | | -WorkspaceMapWithMutex& cublas_handle_stream_to_workspace(); |
118 | | -WorkspaceMapWithMutex& cublaslt_handle_stream_to_workspace(); |
119 | | -size_t getChosenWorkspaceSize(); |
120 | | -size_t getCUDABlasLtWorkspaceSize(); |
121 | | -void* getCUDABlasLtWorkspace(); |
| 119 | +PADDLE_API WorkspaceMapWithMutex& cublas_handle_stream_to_workspace(); |
| 120 | +PADDLE_API WorkspaceMapWithMutex& cublaslt_handle_stream_to_workspace(); |
| 121 | +PADDLE_API size_t getChosenWorkspaceSize(); |
| 122 | +PADDLE_API size_t getCUDABlasLtWorkspaceSize(); |
| 123 | +PADDLE_API void* getCUDABlasLtWorkspace(); |
122 | 124 |
|
123 | | -CUDAContextSolverHandle getCurrentCUDASolverDnHandle(); |
| 125 | +PADDLE_API CUDAContextSolverHandle getCurrentCUDASolverDnHandle(); |
124 | 126 |
|
125 | 127 | #if defined(USE_CUDSS) |
126 | | -cudssHandle_t getCurrentCudssHandle(); |
| 128 | +PADDLE_API cudssHandle_t getCurrentCudssHandle(); |
127 | 129 | #endif |
128 | 130 |
|
129 | 131 | // Get the CUDA device allocator for the current device. |
130 | 132 | // Returns a pointer to a c10::Allocator that allocates GPU memory. |
131 | | -c10::Allocator* getCUDADeviceAllocator(); |
| 133 | +PADDLE_API c10::Allocator* getCUDADeviceAllocator(); |
132 | 134 | #endif |
133 | 135 |
|
134 | 136 | } // namespace at::cuda |
0 commit comments