Skip to content

Commit 6aac169

Browse files
committed
[Cpp API Compatibility] Support Compat tests on Windows (PaddlePaddle#78670)
1 parent 7875c6c commit 6aac169

24 files changed

Lines changed: 324 additions & 222 deletions

paddle/phi/api/include/compat/ATen/Utils.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include <algorithm>
2626

27+
#include "paddle/common/macros.h"
2728
#include "paddle/phi/api/include/sparse_api.h"
2829
#include "paddle/phi/api/include/tensor.h"
2930

@@ -71,24 +72,24 @@ Tensor tensor_complex_backend(ArrayRef<T> values,
7172

7273
} // namespace detail
7374

74-
#define TENSOR(T, _1) \
75-
Tensor tensor(ArrayRef<T> values, const TensorOptions& options) { \
76-
if (options.device().type() != c10::DeviceType::CPU) { \
77-
return at::detail::tensor_backend(values, options); \
78-
} else { \
79-
return at::detail::tensor_cpu(values, options); \
80-
} \
75+
#define TENSOR(T, _1) \
76+
PADDLE_API Tensor tensor(ArrayRef<T> values, const TensorOptions& options) { \
77+
if (options.device().type() != c10::DeviceType::CPU) { \
78+
return at::detail::tensor_backend(values, options); \
79+
} else { \
80+
return at::detail::tensor_cpu(values, options); \
81+
} \
8182
}
8283
AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TENSOR)
8384
#undef TENSOR
8485

85-
#define TENSOR(T, _1) \
86-
Tensor tensor(ArrayRef<T> values, const TensorOptions& options) { \
87-
if (options.device().type() != c10::DeviceType::CPU) { \
88-
return at::detail::tensor_complex_backend(values, options); \
89-
} else { \
90-
return at::detail::tensor_complex_cpu(values, options); \
91-
} \
86+
#define TENSOR(T, _1) \
87+
PADDLE_API Tensor tensor(ArrayRef<T> values, const TensorOptions& options) { \
88+
if (options.device().type() != c10::DeviceType::CPU) { \
89+
return at::detail::tensor_complex_backend(values, options); \
90+
} else { \
91+
return at::detail::tensor_complex_cpu(values, options); \
92+
} \
9293
}
9394
AT_FORALL_COMPLEX_TYPES(TENSOR)
9495
#undef TENSOR

paddle/phi/api/include/compat/ATen/cuda/CUDABlas.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include <ATen/OpMathType.h>
3232
#include <ATen/cuda/CUDAContext.h>
3333

34+
#include "paddle/common/macros.h"
35+
3436
namespace at::cuda::blas {
3537

3638
/* LEVEL 3 BLAS FUNCTIONS */
@@ -54,16 +56,18 @@ inline void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
5456
}
5557

5658
template <>
57-
void gemm<double>(CUDABLAS_GEMM_ARGTYPES(double));
59+
PADDLE_API void gemm<double>(CUDABLAS_GEMM_ARGTYPES(double));
5860
template <>
59-
void gemm<float>(CUDABLAS_GEMM_ARGTYPES(float));
61+
PADDLE_API void gemm<float>(CUDABLAS_GEMM_ARGTYPES(float));
6062
template <>
61-
void gemm<c10::complex<double>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<double>));
63+
PADDLE_API void gemm<c10::complex<double>>(
64+
CUDABLAS_GEMM_ARGTYPES(c10::complex<double>));
6265
template <>
63-
void gemm<c10::complex<float>>(CUDABLAS_GEMM_ARGTYPES(c10::complex<float>));
66+
PADDLE_API void gemm<c10::complex<float>>(
67+
CUDABLAS_GEMM_ARGTYPES(c10::complex<float>));
6468
template <>
65-
void gemm<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
69+
PADDLE_API void gemm<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
6670
template <>
67-
void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
71+
PADDLE_API void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
6872

6973
} // namespace at::cuda::blas

paddle/phi/api/include/compat/ATen/cuda/CUDAContextLight.h

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <shared_mutex>
4040
#include <tuple>
4141

42+
#include "paddle/common/macros.h"
4243
#include "paddle/phi/backends/gpu/forwards.h"
4344

4445
namespace c10 {
@@ -95,40 +96,41 @@ inline int64_t getNumGPUs() { return c10::cuda::device_count(); }
9596
inline bool is_available() { return c10::cuda::device_count() > 0; }
9697

9798
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
98-
CUDAContextDeviceProp* getCurrentDeviceProperties();
99+
PADDLE_API CUDAContextDeviceProp* getCurrentDeviceProperties();
99100

100-
int warp_size();
101+
PADDLE_API int warp_size();
101102

102-
CUDAContextDeviceProp* getDeviceProperties(c10::DeviceIndex device);
103+
PADDLE_API CUDAContextDeviceProp* getDeviceProperties(c10::DeviceIndex device);
103104

104-
bool canDeviceAccessPeer(c10::DeviceIndex device, c10::DeviceIndex peer_device);
105+
PADDLE_API bool canDeviceAccessPeer(c10::DeviceIndex device,
106+
c10::DeviceIndex peer_device);
105107

106108
/* Handles */
107-
CUDAContextSparseHandle getCurrentCUDASparseHandle();
108-
CUDAContextBlasHandle getCurrentCUDABlasHandle();
109-
CUDAContextBlasLtHandle getCurrentCUDABlasLtHandle();
109+
PADDLE_API CUDAContextSparseHandle getCurrentCUDASparseHandle();
110+
PADDLE_API CUDAContextBlasHandle getCurrentCUDABlasHandle();
111+
PADDLE_API CUDAContextBlasLtHandle getCurrentCUDABlasLtHandle();
110112

111-
void clearCublasWorkspaces();
113+
PADDLE_API void clearCublasWorkspaces();
112114
struct WorkspaceMapWithMutex {
113115
std::map<std::tuple<void*, void*>, at::DataPtr> map;
114116
std::shared_mutex mutex;
115117
};
116118

117-
WorkspaceMapWithMutex& cublas_handle_stream_to_workspace();
118-
WorkspaceMapWithMutex& cublaslt_handle_stream_to_workspace();
119-
size_t getChosenWorkspaceSize();
120-
size_t getCUDABlasLtWorkspaceSize();
121-
void* getCUDABlasLtWorkspace();
119+
PADDLE_API WorkspaceMapWithMutex& cublas_handle_stream_to_workspace();
120+
PADDLE_API WorkspaceMapWithMutex& cublaslt_handle_stream_to_workspace();
121+
PADDLE_API size_t getChosenWorkspaceSize();
122+
PADDLE_API size_t getCUDABlasLtWorkspaceSize();
123+
PADDLE_API void* getCUDABlasLtWorkspace();
122124

123-
CUDAContextSolverHandle getCurrentCUDASolverDnHandle();
125+
PADDLE_API CUDAContextSolverHandle getCurrentCUDASolverDnHandle();
124126

125127
#if defined(USE_CUDSS)
126-
cudssHandle_t getCurrentCudssHandle();
128+
PADDLE_API cudssHandle_t getCurrentCudssHandle();
127129
#endif
128130

129131
// Get the CUDA device allocator for the current device.
130132
// Returns a pointer to a c10::Allocator that allocates GPU memory.
131-
c10::Allocator* getCUDADeviceAllocator();
133+
PADDLE_API c10::Allocator* getCUDADeviceAllocator();
132134
#endif
133135

134136
} // namespace at::cuda

paddle/phi/api/include/compat/ATen/cuda/EmptyTensor.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,18 @@
1515
#pragma once
1616
#include <ATen/core/TensorBody.h>
1717

18+
#include "paddle/common/macros.h"
19+
1820
namespace at::detail {
1921

2022
using at::Tensor;
21-
at::Tensor empty_cuda(IntArrayRef size,
22-
ScalarType dtype,
23-
std::optional<Device> device_opt,
24-
std::optional<c10::MemoryFormat> memory_format_opt);
23+
PADDLE_API at::Tensor empty_cuda(
24+
IntArrayRef size,
25+
ScalarType dtype,
26+
std::optional<Device> device_opt,
27+
std::optional<c10::MemoryFormat> memory_format_opt);
2528

26-
at::Tensor empty_cuda(IntArrayRef size, const TensorOptions &options);
29+
PADDLE_API at::Tensor empty_cuda(IntArrayRef size,
30+
const TensorOptions &options);
2731

2832
} // namespace at::detail

paddle/phi/api/include/compat/ATen/ops/tensor.h

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,25 @@
2020
#include <ATen/core/Tensor.h>
2121
#include <c10/core/ScalarType.h>
2222

23+
#include "paddle/common/macros.h"
24+
2325
namespace at {
2426

25-
#define TENSOR(T, S) \
26-
Tensor tensor(ArrayRef<T> values, const TensorOptions& options); \
27-
inline Tensor tensor(std::initializer_list<T> values, \
28-
const TensorOptions& options) { \
29-
return at::tensor(ArrayRef<T>(values), options); \
30-
} \
31-
inline Tensor tensor(T value, const TensorOptions& options) { \
32-
return at::tensor(ArrayRef<T>(value), options); \
33-
} \
34-
inline Tensor tensor(ArrayRef<T> values) { \
35-
return at::tensor(std::move(values), at::dtype(k##S)); \
36-
} \
37-
inline Tensor tensor(std::initializer_list<T> values) { \
38-
return at::tensor(ArrayRef<T>(values)); \
39-
} \
27+
#define TENSOR(T, S) \
28+
PADDLE_API Tensor tensor(ArrayRef<T> values, const TensorOptions& options); \
29+
inline Tensor tensor(std::initializer_list<T> values, \
30+
const TensorOptions& options) { \
31+
return at::tensor(ArrayRef<T>(values), options); \
32+
} \
33+
inline Tensor tensor(T value, const TensorOptions& options) { \
34+
return at::tensor(ArrayRef<T>(value), options); \
35+
} \
36+
inline Tensor tensor(ArrayRef<T> values) { \
37+
return at::tensor(std::move(values), at::dtype(k##S)); \
38+
} \
39+
inline Tensor tensor(std::initializer_list<T> values) { \
40+
return at::tensor(ArrayRef<T>(values)); \
41+
} \
4042
inline Tensor tensor(T value) { return at::tensor(ArrayRef<T>(value)); }
4143
AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TENSOR)
4244
AT_FORALL_COMPLEX_TYPES(TENSOR)

paddle/phi/api/include/compat/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
collect_srcs(api_srcs SRCS c10/core/Device.cpp)
2+
collect_srcs(api_srcs SRCS c10/core/DefaultDtype.cpp)
23
collect_srcs(api_srcs SRCS c10/core/Stream.cpp)
34
collect_srcs(api_srcs SRCS c10/cuda/CUDAFunctions.cpp)
45
collect_srcs(api_srcs SRCS c10/cuda/CUDAStream.cpp)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include <c10/core/DefaultDtype.h>
16+
#include <c10/util/complex.h>
17+
#include <c10/util/typeid.h>
18+
19+
namespace c10 {
20+
static auto default_dtype = caffe2::TypeMeta::Make<float>();
21+
static auto default_dtype_as_scalartype = default_dtype.toScalarType();
22+
static auto default_complex_dtype =
23+
caffe2::TypeMeta::Make<c10::complex<float>>();
24+
25+
void set_default_dtype(caffe2::TypeMeta dtype) {
26+
default_dtype = dtype;
27+
default_dtype_as_scalartype = default_dtype.toScalarType();
28+
switch (default_dtype_as_scalartype) {
29+
case ScalarType::Half:
30+
default_complex_dtype = ScalarType::ComplexHalf;
31+
break;
32+
case ScalarType::Double:
33+
default_complex_dtype = ScalarType::ComplexDouble;
34+
break;
35+
default:
36+
default_complex_dtype = ScalarType::ComplexFloat;
37+
break;
38+
}
39+
}
40+
41+
const caffe2::TypeMeta get_default_dtype() { return default_dtype; }
42+
43+
ScalarType get_default_dtype_as_scalartype() {
44+
return default_dtype_as_scalartype;
45+
}
46+
47+
const caffe2::TypeMeta get_default_complex_dtype() {
48+
return default_complex_dtype;
49+
}
50+
} // namespace c10

paddle/phi/api/include/compat/c10/core/DefaultDtype.h

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,16 @@
1515
#pragma once
1616

1717
#include <c10/core/ScalarType.h>
18-
#include <c10/util/typeid.h>
1918

20-
namespace c10 {
21-
static auto default_dtype = ScalarType::Float;
22-
static auto default_complex_dtype = ScalarType::ComplexFloat;
23-
24-
void inline set_default_dtype(ScalarType dtype) { default_dtype = dtype; }
19+
#include "paddle/common/macros.h"
2520

26-
ScalarType inline get_default_dtype_as_scalartype() { return default_dtype; }
21+
namespace caffe2 {
22+
class TypeMeta;
23+
} // namespace caffe2
2724

28-
ScalarType inline get_default_complex_dtype() { return default_complex_dtype; }
29-
30-
/// Returns default dtype as caffe2::TypeMeta (the canonical form, mirrors
31-
/// PyTorch).
32-
inline caffe2::TypeMeta get_default_dtype() {
33-
return caffe2::TypeMeta::fromScalarType(default_dtype);
34-
}
25+
namespace c10 {
26+
PADDLE_API void set_default_dtype(caffe2::TypeMeta dtype);
27+
PADDLE_API const caffe2::TypeMeta get_default_dtype();
28+
PADDLE_API ScalarType get_default_dtype_as_scalartype();
29+
PADDLE_API const caffe2::TypeMeta get_default_complex_dtype();
3530
} // namespace c10

paddle/phi/api/include/compat/c10/core/Device.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ using gpuStream_t = hipStream_t;
3333
#include <string>
3434
#include <utility>
3535

36+
#include "paddle/common/macros.h"
3637
#include "paddle/phi/core/platform/device/gpu/gpu_info.h"
3738
#include "paddle/phi/core/platform/device_event_base.h"
3839

3940
namespace c10 {
4041
using DeviceIndex = int8_t;
4142

42-
struct Device final {
43+
struct PADDLE_API Device final {
4344
using Type = DeviceType;
4445
Device() = default;
4546
Device(phi::Place place)
@@ -161,7 +162,7 @@ struct Device final {
161162
}
162163
};
163164

164-
std::ostream& operator<<(std::ostream& stream, const Device& device);
165+
PADDLE_API std::ostream& operator<<(std::ostream& stream, const Device& device);
165166

166167
} // namespace c10
167168

paddle/phi/api/include/compat/c10/core/Stream.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include <functional>
2323
#include <ostream>
2424

25+
#include "paddle/common/macros.h"
26+
2527
namespace c10 {
2628

2729
using StreamId = int64_t;
@@ -32,7 +34,7 @@ struct StreamData3 {
3234
DeviceType device_type;
3335
};
3436

35-
class Stream final {
37+
class PADDLE_API Stream final {
3638
private:
3739
Device device_;
3840
StreamId id_;

0 commit comments

Comments
 (0)