Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
a45e7e0
[Cpp API Compatibility] Delete useless code and rename test files
youge325 Apr 3, 2026
7cee6e8
fix
youge325 Apr 3, 2026
3dc2c05
Revert "fix"
youge325 Apr 5, 2026
53acaba
fix
youge325 Apr 5, 2026
82768bc
Merge branch 'develop' into cNorm
youge325 Apr 5, 2026
a0be3de
complement CPU compiling branch
youge325 Apr 5, 2026
88c0a55
try to compile test files with cpu
youge325 Apr 5, 2026
90a73da
fix
youge325 Apr 5, 2026
aaac6b3
fix
youge325 Apr 5, 2026
d825db2
fix
youge325 Apr 5, 2026
d6e9be4
fix xpu
youge325 Apr 5, 2026
ce84dbe
fix Mac-CPU TensorOptions compilation error
youge325 Apr 5, 2026
003897d
fix torch_compat.h: add missing DispatchKey.h include and fix namespace
youge325 Apr 5, 2026
f8e3a74
fix dcu build with symbol visibility hidden
youge325 Apr 5, 2026
85ae6b8
Revert "fix xpu"
youge325 Apr 5, 2026
7c0fb0f
Fix ATen compat operator[] to return view instead of copy
youge325 Apr 5, 2026
f9a4cb5
Fix as_strided to work with non-contiguous tensors
youge325 Apr 5, 2026
3d9f777
fix dcu again
youge325 Apr 5, 2026
c661ed6
Revert "fix dcu build with symbol visibility hidden"
youge325 Apr 5, 2026
4447f88
Revert "fix dcu again"
youge325 Apr 6, 2026
ad97976
move dcu related tests to nv_test
youge325 Apr 6, 2026
ac7bab4
skip xpu test when FLAGS_use_stride_kernel is disabled
youge325 Apr 6, 2026
283802c
fix xpu build
youge325 Apr 6, 2026
253d526
fix dcu build, which will be removed due to deepep deprecated API
youge325 Apr 6, 2026
89106a5
fix cpu build due to google test version too old
youge325 Apr 6, 2026
1fcc43d
skip dcu for all tests
youge325 Apr 6, 2026
ea0fa1f
try to compile all tests in cpu environment, except dcu environment, …
youge325 Apr 6, 2026
52662f3
fix all cpu build, and move cuda only test to nv_test
youge325 Apr 6, 2026
934b82a
Merge branch 'develop' into cNorm
SigureMo Apr 6, 2026
d283912
replace `torch::DispatchKey` with `c10::DispatchKey`
SigureMo Apr 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions test/cpp/compat/ATen_CUDABlas_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "test/cpp/compat/cuda_test_utils.h"

// Helper: allocate three same-sized device buffers, copy host data in,
// invoke a kernel via |fn|, copy results back, synchronize, then free.
Expand Down Expand Up @@ -73,7 +72,6 @@ class GemmTester {
static double toDouble(T val) { return static_cast<double>(val); }

void Run() {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
std::vector<T> h_a = {T(1), T(3), T(2), T(4)};
std::vector<T> h_b = {T(5), T(7), T(6), T(8)};
std::vector<T> h_c(N * N, T(0));
Expand All @@ -95,7 +93,6 @@ class GemmTester {
// transA='T': C = alpha * A^T * B + beta * C
// A^T = [[1,3],[2,4]], A^T * B = [[26,30],[38,44]]
void RunTransA() {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
std::vector<T> h_a = {T(1), T(3), T(2), T(4)};
std::vector<T> h_b = {T(5), T(7), T(6), T(8)};
std::vector<T> h_c(N * N, T(0));
Expand Down Expand Up @@ -136,7 +133,6 @@ TEST(CUDABlasTest, GemmFloatTransA) {
}

TEST(CUDABlasTest, GemmFloatTransALowercase) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
constexpr int64_t N = 2;

std::vector<float> h_a = {1.F, 3.F, 2.F, 4.F};
Expand Down Expand Up @@ -181,7 +177,6 @@ TEST(CUDABlasTest, GemmBFloat16) {
// A stored col-major: col0={1+i,2+2i}, col1={3+3i,4+4i}
// A^H stored col-major: col0={1-i,3-3i}, col1={2-2i,4-4i}
TEST(CUDABlasTest, GemmComplexFloatConjTrans) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
constexpr int64_t N = 2;
using T = c10::complex<float>;

Expand Down Expand Up @@ -209,7 +204,6 @@ TEST(CUDABlasTest, GemmComplexFloatConjTrans) {

// Same as above but uses lowercase 'c'/'n' to exercise that switch-case branch.
TEST(CUDABlasTest, GemmComplexDoubleConjTransLower) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
constexpr int64_t N = 2;
using T = c10::complex<double>;

Expand Down
16 changes: 0 additions & 16 deletions test/cpp/compat/ATen_CUDAContext_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,24 @@

#include "gtest/gtest.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "test/cpp/compat/cuda_test_utils.h"

// ---------------------------------------------------------------------------
// CUDAFunctions.h — covers the 2 missing lines:
// c10::cuda::device_synchronize() and c10::cuda::stream_synchronize()
// ---------------------------------------------------------------------------

TEST(CUDAFunctionsTest, DeviceSynchronize) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
// Exercises the PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()) branch
ASSERT_NO_THROW(c10::cuda::device_synchronize());
}

TEST(CUDAFunctionsTest, StreamSynchronize) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
// Exercises phi::backends::gpu::GpuStreamSync()
auto stream = c10::cuda::getCurrentCUDAStream();
ASSERT_NO_THROW(c10::cuda::stream_synchronize(stream));
}

TEST(CUDAFunctionsTest, AtNamespaceAliases) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
// Exercises the using aliases in at::cuda namespace
ASSERT_NO_THROW(at::cuda::device_synchronize());
auto stream = c10::cuda::getCurrentCUDAStream();
Comment on lines 31 to 53
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@youge325 话说这个当初为啥要加,现在又为啥要删?是因为只在 CUDA 上跑吗?其实我觉得如果可以的话,我还是希望这些单测能测就尽可能在 CPU 上测一测的,当初 CMakeListsWITH_GPU 只是早期希望快速跑通而已

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

当初用 Agent 加测试提高 coverage rate 的时候自动加的,按我的理解,如果能编译成功的话,那么测试也不能随便就 skip,所以现在就删了。

有些单测也确实可以在CPU上跑,条件编译一下测试用例就行,比如 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP), 就 ASSERT_NO_THROW#elseASSERT_THROW,我看看有哪些测试用例可以这样改

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ShigureNyako 帮我看看 test/cpp/compat/CMakeLists.txt 是否可以把 if(WITH_GPU) 删掉

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

刚刚 auth 过期了,@ShigureNyako 看看呢!

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

召唤 @ShigureNyako

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ShigureNyako 请给我一个 GPU build 但运行时无可用 device/runtime 的例子,在 CI 当中并没有出现这种情况, GPU build 就是为了在有可用 device/runtime 的场景中运行,我原来的代码写错了所以现在删掉,你可能倒果为因了

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ShigureNyako 如果从严谨性上来看,确实会有编译期 WITH_GPU 但是实际 runtime 没有的场景,比如我们 CI 里所有机器都是 CPU 编包的,即便是 Coverage build 也是,但是虽然 Coverage build 本身在 CPU 上,但其产物一定会在后续 runtime 有 GPU 的 Coverage test 上运行,也就是说目前 CI 上基本上有一个假设就是,编译时带了 GPU,那运行时就一定带 GPU,这其实不应该是考虑的重点

当初早期 C++ 兼容性第一个 PR #74402 为赶 3.2 发版在拉分支前夕合入临时跳过了一些内容,应该是 defd6a3 跳过了一些新硬件(根据上一个 commit 99f7b88 的报错和该 commit message 看起来是 XPU 和 NPU,但日志已经过期看不到了)上的运行时问题,请着眼于该场景分析下具体可能存在的问题,以及是否有推荐的解决方案

注意,我们即便不能推广到所有硬件(比如 XPU 和 NPU),即便跳过部分硬件,也比直接放弃 CPU 测试更好,我们的目标不是一次修复所有硬件上的问题(当然可以更好),只是希望在确保 CPU 上能够有所监控

Expand All @@ -65,14 +61,12 @@ TEST(CUDAContextLightTest, IsAvailable) {

// getNumGPUs() delegages to c10::cuda::device_count()
TEST(CUDAContextLightTest, GetNumGPUs) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
int64_t n = at::cuda::getNumGPUs();
ASSERT_GE(n, 1);
}

// getCurrentDeviceProperties() / getDeviceProperties()
TEST(CUDAContextLightTest, DeviceProperties) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
ASSERT_NE(prop, nullptr);
// Sanity-check a few well-known fields
Expand All @@ -87,15 +81,13 @@ TEST(CUDAContextLightTest, DeviceProperties) {

// warp_size()
TEST(CUDAContextLightTest, WarpSize) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
int ws = at::cuda::warp_size();
// All NVIDIA and AMD GPU architectures have warp size of 32 or 64
ASSERT_TRUE(ws == 32 || ws == 64);
}

// canDeviceAccessPeer() — a device cannot peer-access itself
TEST(CUDAContextLightTest, CanDeviceAccessPeer) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
int device_id = phi::backends::gpu::GetCurrentDeviceId();
// Self-to-self peer access is always false per CUDA spec
bool self_peer = at::cuda::canDeviceAccessPeer(device_id, device_id);
Expand All @@ -104,26 +96,22 @@ TEST(CUDAContextLightTest, CanDeviceAccessPeer) {

// Handle accessors — all must return non-null handles
TEST(CUDAContextLightTest, GetCurrentCUDABlasHandle) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
cublasHandle_t h = at::cuda::getCurrentCUDABlasHandle();
ASSERT_NE(h, nullptr);
}

TEST(CUDAContextLightTest, GetCurrentCUDABlasLtHandle) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
cublasLtHandle_t h = at::cuda::getCurrentCUDABlasLtHandle();
ASSERT_NE(h, nullptr);
}

TEST(CUDAContextLightTest, GetCurrentCUDASparseHandle) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
cusparseHandle_t h = at::cuda::getCurrentCUDASparseHandle();
ASSERT_NE(h, nullptr);
}

#if defined(CUDART_VERSION) || defined(USE_ROCM)
TEST(CUDAContextLightTest, GetCurrentCUDASolverDnHandle) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
cusolverDnHandle_t h = at::cuda::getCurrentCUDASolverDnHandle();
ASSERT_NE(h, nullptr);
}
Expand Down Expand Up @@ -160,7 +148,6 @@ TEST(CUDAContextLightTest, GetChosenWorkspaceSize) {

// getCUDABlasLtWorkspaceSize() / getCUDABlasLtWorkspace()
TEST(CUDAContextLightTest, CUDABlasLtWorkspace) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
size_t sz = at::cuda::getCUDABlasLtWorkspaceSize();
ASSERT_GT(sz, 0UL);

Expand All @@ -176,7 +163,6 @@ TEST(CUDAContextLightTest, CUDADeviceAllocatorSingleton) {
}

TEST(CUDAContextLightTest, CUDADeviceAllocatorCloneAndCopyData) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
c10::Allocator* alloc = at::cuda::getCUDADeviceAllocator();
ASSERT_NE(alloc, nullptr);

Expand Down Expand Up @@ -207,7 +193,6 @@ TEST(CUDAContextLightTest, CUDADeviceAllocatorCloneAndCopyData) {
}

TEST(CUDAContextLightTest, CUDADeviceAllocatorCloneZeroBytes) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
c10::Allocator* alloc = at::cuda::getCUDADeviceAllocator();
ASSERT_NE(alloc, nullptr);

Expand All @@ -220,7 +205,6 @@ TEST(CUDAContextLightTest, CUDADeviceAllocatorCloneZeroBytes) {
}

TEST(CUDAContextLightTest, AllocatorZeroSizeAndNoopCopyBranches) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
c10::Allocator* alloc = at::cuda::getCUDADeviceAllocator();
ASSERT_NE(alloc, nullptr);

Expand Down
3 changes: 0 additions & 3 deletions test/cpp/compat/ATen_Utils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
#include "ATen/ATen.h"
#include "gtest/gtest.h"
#include "paddle/phi/common/float16.h"
#include "test/cpp/compat/cuda_test_utils.h"
#include "torch/all.h"

// ============================================================
Expand Down Expand Up @@ -153,7 +152,6 @@ TEST(ATenUtilsTest, TensorBackend_CPUDevice_MatchesTensorCPU) {

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST(ATenUtilsTest, TensorBackend_GPUDevice) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
std::vector<float> data = {7.0f, 8.0f};
at::TensorOptions opts =
at::TensorOptions().dtype(at::kFloat).device(c10::Device(c10::kCUDA, 0));
Expand All @@ -164,7 +162,6 @@ TEST(ATenUtilsTest, TensorBackend_GPUDevice) {
}

TEST(ATenUtilsTest, TensorComplexBackend_GPUDevice) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
std::vector<c10::complex<float>> data = {{1.0f, 0.0f}};
at::TensorOptions opts = at::TensorOptions()
.dtype(at::kComplexFloat)
Expand Down
1 change: 1 addition & 0 deletions test/cpp/compat/ATen_as_strided_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ TEST_F(TensorAsStridedTest, AsStridedInplaceWithOffset) {
t.as_strided_({2, 3}, {3, 1}, 1);

ASSERT_EQ(t.sizes(), c10::IntArrayRef({2, 3}));
ASSERT_NE(t.data_ptr<float>(), original_data_ptr);

float* data = t.data_ptr<float>();
ASSERT_FLOAT_EQ(data[0], 1.0f);
Expand Down
1 change: 1 addition & 0 deletions test/cpp/compat/ATen_clamp_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ TEST_F(TensorOperatorIndexTest, OperatorIndexOutOfBounds) {
}
// Note: Depending on implementation, this may or may not throw
// We accept either behavior (return empty/invalid tensor or throw)
(void)threw_exception; // Silence unused variable warning
}

// ======================= Additional clamp edge case tests
Expand Down
11 changes: 2 additions & 9 deletions test/cpp/compat/ATen_cuda_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@
#include <c10/core/ScalarType.h>
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include <c10/cuda/CUDAFunctions.h>
#endif

#include "ATen/ATen.h"
#include "gtest/gtest.h"
#include "test/cpp/compat/cuda_test_utils.h"
#include "torch/all.h"

// ============================================================
Expand All @@ -33,7 +31,6 @@

// After cuda(), the tensor should reside on a GPU device.
TEST(TensorCudaTest, CpuTensorMovesToCuda) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor cpu_t = at::tensor({1.0f, 2.0f, 3.0f}, at::kFloat);
ASSERT_TRUE(cpu_t.is_cpu());

Expand All @@ -44,7 +41,6 @@ TEST(TensorCudaTest, CpuTensorMovesToCuda) {

// dtype and numel must be preserved.
TEST(TensorCudaTest, DtypeAndNumelPreserved) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor cpu_t = at::tensor({1, 2, 3, 4}, at::kInt);
at::Tensor cuda_t = cpu_t.cuda();

Expand All @@ -54,7 +50,6 @@ TEST(TensorCudaTest, DtypeAndNumelPreserved) {

// Values should round-trip back to CPU intact.
TEST(TensorCudaTest, ValuesPreservedAfterRoundTrip) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
std::vector<float> data = {1.0f, 2.5f, -3.0f, 4.75f};
at::Tensor cpu_t = at::tensor(data, at::kFloat);
at::Tensor cuda_t = cpu_t.cuda();
Expand All @@ -68,7 +63,6 @@ TEST(TensorCudaTest, ValuesPreservedAfterRoundTrip) {

// shape (sizes) should be preserved.
TEST(TensorCudaTest, ShapePreserved) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor cpu_t = at::zeros({2, 3, 4}, at::kFloat);
at::Tensor cuda_t = cpu_t.cuda();

Expand All @@ -80,7 +74,6 @@ TEST(TensorCudaTest, ShapePreserved) {

// An already-CUDA tensor should still be CUDA after another cuda() call.
TEST(TensorCudaTest, AlreadyCudaTensorStaysCuda) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor cpu_t = at::tensor({7.0f}, at::kFloat);
at::Tensor cuda_t = cpu_t.cuda();
at::Tensor cuda_t2 = cuda_t.cuda();
Expand All @@ -91,7 +84,6 @@ TEST(TensorCudaTest, AlreadyCudaTensorStaysCuda) {

// device() should report a CUDA device.
TEST(TensorCudaTest, DeviceIsCuda) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor cpu_t = at::tensor({0.0f}, at::kFloat);
at::Tensor cuda_t = cpu_t.cuda();

Expand All @@ -100,10 +92,11 @@ TEST(TensorCudaTest, DeviceIsCuda) {

// is_cuda() / is_cpu() are mutually exclusive.
TEST(TensorCudaTest, IsCudaAndIsCpuMutuallyExclusive) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor cpu_t = at::tensor({1.0f, 2.0f}, at::kFloat);
at::Tensor cuda_t = cpu_t.cuda();

ASSERT_TRUE(cuda_t.is_cuda());
ASSERT_FALSE(cuda_t.is_cpu());
}

#endif
6 changes: 0 additions & 6 deletions test/cpp/compat/ATen_empty_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

#include "ATen/ATen.h"
#include "gtest/gtest.h"
#include "test/cpp/compat/cuda_test_utils.h"
#include "torch/all.h"

// ======================== at::empty basic tests ========================
Expand Down Expand Up @@ -57,7 +56,6 @@ TEST(ATenEmptyTest, ExplicitArgsCpu) {

// TensorOptions overload: pin_memory via options
TEST(ATenEmptyTest, PinMemoryViaTensorOptions) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::TensorOptions opts =
at::TensorOptions().dtype(at::kFloat).pinned_memory(true);
at::Tensor t = at::empty({4, 4}, opts);
Expand All @@ -67,7 +65,6 @@ TEST(ATenEmptyTest, PinMemoryViaTensorOptions) {

// 6-argument overload: pin_memory = true (must use CPU device)
TEST(ATenEmptyTest, PinMemoryViaExplicitArgs) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor t =
at::empty({8}, at::kFloat, at::kStrided, at::kCPU, true, std::nullopt);
ASSERT_TRUE(t.is_pinned())
Expand All @@ -76,7 +73,6 @@ TEST(ATenEmptyTest, PinMemoryViaExplicitArgs) {

// pin_memory = false must NOT produce a pinned tensor
TEST(ATenEmptyTest, NoPinMemoryViaExplicitArgs) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor t =
at::empty({8}, at::kFloat, at::kStrided, at::kCUDA, false, std::nullopt);
ASSERT_FALSE(t.is_pinned())
Expand All @@ -85,7 +81,6 @@ TEST(ATenEmptyTest, NoPinMemoryViaExplicitArgs) {

// Pinned tensor lives in pinned (host) memory, not on the GPU device itself
TEST(ATenEmptyTest, PinnedTensorIsNotCuda) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::TensorOptions opts =
at::TensorOptions().dtype(at::kFloat).pinned_memory(true);
at::Tensor t = at::empty({16}, opts);
Expand All @@ -96,7 +91,6 @@ TEST(ATenEmptyTest, PinnedTensorIsNotCuda) {

// Data pointer of a pinned tensor must be non-null
TEST(ATenEmptyTest, PinnedTensorDataPtrNonNull) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::TensorOptions opts =
at::TensorOptions().dtype(at::kFloat).pinned_memory(true);
at::Tensor t = at::empty({32}, opts);
Expand Down
3 changes: 0 additions & 3 deletions test/cpp/compat/ATen_equal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <c10/core/TensorOptions.h>

#include "gtest/gtest.h"
#include "test/cpp/compat/cuda_test_utils.h"

TEST(TensorEqualTest, DifferentShapeReturnsFalse) {
at::Tensor a = at::ones({2, 2}, at::kFloat);
Expand All @@ -41,8 +40,6 @@ TEST(TensorEqualTest, DtypeMismatchCastsOtherTensor) {

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST(TensorEqualTest, DeviceMismatchThrows) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();

at::Tensor cpu = at::ones({2, 2}, at::kFloat);
at::Tensor gpu =
at::ones({2, 2}, at::TensorOptions().dtype(at::kFloat).device(at::kCUDA));
Expand Down
3 changes: 0 additions & 3 deletions test/cpp/compat/ATen_eye_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include "ATen/ATen.h"
#include "gtest/gtest.h"
#include "paddle/phi/common/float16.h"
#include "test/cpp/compat/cuda_test_utils.h"
#include "torch/all.h"

// ============================================================
Expand Down Expand Up @@ -159,15 +158,13 @@ TEST(ATenEyeTest, OneByOne) {

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST(ATenEyeTest, SquareOnGPU) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor t =
at::eye(4, at::TensorOptions().dtype(at::kFloat).device(at::kCUDA));
at::Tensor t_cpu = t.to(at::kCPU);
CheckEye(t_cpu, 4, 4);
}

TEST(ATenEyeTest, RectangularOnGPU) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
at::Tensor t =
at::eye(3, 5, at::TensorOptions().dtype(at::kFloat).device(at::kCUDA));
at::Tensor t_cpu = t.to(at::kCPU);
Expand Down
Loading
Loading