Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,12 @@ set(TORCH_LIBRARIES "")
file(GLOB_RECURSE TORCH_LIBRARIES "${TORCH_DIR}/lib/*.so"
"${TORCH_DIR}/lib/*.a")

find_package(CUDAToolkit QUIET)
set(TORCH_INCLUDE_DIR "${TORCH_DIR}/include"
"${TORCH_DIR}/include/torch/csrc/api/include/")
if(CUDAToolkit_FOUND)
list(APPEND TORCH_INCLUDE_DIR "${CUDAToolkit_INCLUDE_DIRS}")
endif()
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

find_package(CUDAToolkit QUIET) + conditional include path append can still leave builds failing later with missing cuda_runtime_api.h (e.g., CUDA-enabled libtorch headers included but CUDA toolkit not found), and the failure will be a compiler error with no actionable CMake message. Consider adding a clear message(FATAL_ERROR ...) (or at least message(WARNING ...)) when CUDA-dependent headers/tests are being built but CUDAToolkit is not found, so the configuration fails early and is easier to debug.

Suggested change
endif()
endif()
if(NOT CUDAToolkit_FOUND)
message(
FATAL_ERROR
"CUDAToolkit was not found, but CUDA-dependent Torch tests are enabled. "
"Please install the CUDA toolkit or disable building these Torch tests.")
endif()

Copilot uses AI. Check for mistakes.

set(TORCH_TARGET_FOLDER ${CMAKE_BINARY_DIR}/torch)
set(BIN_PREFIX "torch_")
Expand Down Expand Up @@ -120,6 +124,9 @@ set(PADDLE_INCLUDE_DIR
"${PADDLE_DIR}/include/paddle/phi/api/include/compat/"
"${PADDLE_DIR}/include/paddle/phi/api/include/compat/torch/csrc/api/include/"
)
if(CUDAToolkit_FOUND)
list(APPEND PADDLE_INCLUDE_DIR "${CUDAToolkit_INCLUDE_DIRS}")
endif()
Comment on lines +127 to +130
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appending ${CUDAToolkit_INCLUDE_DIRS} to PADDLE_INCLUDE_DIR is guarded on CUDAToolkit_FOUND, but PADDLE_LIBRARIES unconditionally includes GPU libraries (e.g. libphi_gpu.so). If the environment is missing CUDA toolkit headers, the build will fail with confusing compile errors. Consider either (1) making CUDA toolkit a required dependency when linking Paddle GPU libs, or (2) gating the GPU libs and CUDA include dirs behind a single explicit option/check so CPU-only environments fail fast or build a CPU-only variant consistently.

Copilot uses AI. Check for mistakes.

set(PADDLE_LIBRARIES
"${PADDLE_DIR}/base/libpaddle.so" "${PADDLE_DIR}/libs/libcommon.so"
Expand Down
262 changes: 262 additions & 0 deletions test/RecordStreamTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
#include <ATen/ATen.h>
#include <ATen/core/Tensor.h>
#include <ATen/ops/zeros.h>
#include <c10/core/Stream.h>
#include <gtest/gtest.h>
#include <torch/all.h>

// Paddle compat 的 c10/cuda/CUDAStream.h 依赖 PADDLE_WITH_CUDA 宏,
// 不能在普通编译环境中直接包含。libtorch 的版本则依赖 cuda_runtime_api.h。
// 两者均只在 USE_PADDLE_API=0(libtorch build)下包含。
#if !USE_PADDLE_API
#include <c10/cuda/CUDAStream.h>
#endif

#include <string>
#include <vector>

#include "../src/file_manager.h"

extern paddle_api_test::ThreadSafeParam g_custom_param;

namespace at {
namespace test {

using paddle_api_test::FileManerger;
using paddle_api_test::ThreadSafeParam;

class RecordStreamTest : public ::testing::Test {
protected:
void SetUp() override { cpu_tensor = at::zeros({2, 3}, at::kFloat); }
at::Tensor cpu_tensor;
};

// 返回一个指向 device 0 默认 CUDA stream 的 at::Stream
// libtorch: 通过 CUDAStream(有 operator Stream() 隐式转换)
// Paddle compat: CUDAStream 未提供隐式转换,手动以 DEFAULT stream id 0 构造
static at::Stream get_default_cuda_stream() {
#if USE_PADDLE_API
// Paddle: 直接构造(id=0 = CUDA null/default stream)
return at::Stream(at::Stream::DEFAULT, c10::Device(c10::DeviceType::CUDA, 0));
#else
// libtorch: CUDAStream 隐式转换为 at::Stream
return c10::cuda::getCurrentCUDAStream(0);
#endif
}

// --- 基础功能测试:CUDA tensor + CUDA stream ---

// kFloat, shape {2,3} (small)
TEST_F(RecordStreamTest, CudaFloat2x3) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.createFile();
file << "CudaFloat2x3 ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = cpu_tensor.cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kDouble, shape {4} (small, different dtype)
TEST_F(RecordStreamTest, CudaDouble4) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaDouble4 ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({4}, at::kDouble).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kInt, shape {100,100} (large, >= 10000 elements)
TEST_F(RecordStreamTest, CudaInt100x100) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaInt100x100 ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({100, 100}, at::kInt).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kLong, shape {} (0-d scalar tensor)
TEST_F(RecordStreamTest, CudaLongScalar) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaLongScalar ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({}, at::kLong).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kFloat, shape {0} (空 tensor,边界 shape)
TEST_F(RecordStreamTest, CudaEmptyShape) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaEmptyShape ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({0}, at::kFloat).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kFloat, shape {1,1,1} (全一维度,边界 shape)
TEST_F(RecordStreamTest, CudaAllOnes) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaAllOnes ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({1, 1, 1}, at::kFloat).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kFloat, 非连续 tensor(经 transpose)
TEST_F(RecordStreamTest, CudaNonContiguous) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaNonContiguous ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor base = at::zeros({3, 4}, at::kFloat).cuda();
at::Tensor t = base.transpose(0, 1); // 非连续
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// --- 异常路径:CPU tensor + CUDA stream(如有 CUDA) ---
// record_stream 在两个框架下对 CPU tensor 的处理行为
TEST_F(RecordStreamTest, CpuTensorCudaStream) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CpuTensorCudaStream ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Stream stream = get_default_cuda_stream();
cpu_tensor.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// --- 异常路径:CPU tensor + CPU stream(无 CUDA 依赖) ---
// record_stream 是 CUDA-only API,CPU stream 应触发异常
TEST_F(RecordStreamTest, CpuTensorCpuStream) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CpuTensorCpuStream ";
c10::Stream stream(c10::Stream::DEFAULT,
c10::Device(c10::DeviceType::CPU, 0));
try {
cpu_tensor.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

} // namespace test
} // namespace at
Loading