Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,12 @@ set(TORCH_LIBRARIES "")
file(GLOB_RECURSE TORCH_LIBRARIES "${TORCH_DIR}/lib/*.so"
"${TORCH_DIR}/lib/*.a")

find_package(CUDAToolkit QUIET)
set(TORCH_INCLUDE_DIR "${TORCH_DIR}/include"
"${TORCH_DIR}/include/torch/csrc/api/include/")
if(CUDAToolkit_FOUND)
list(APPEND TORCH_INCLUDE_DIR "${CUDAToolkit_INCLUDE_DIRS}")
endif()
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

find_package(CUDAToolkit QUIET) + conditional include path append can still leave builds failing later with missing cuda_runtime_api.h (e.g., CUDA-enabled libtorch headers included but CUDA toolkit not found), and the failure will be a compiler error with no actionable CMake message. Consider adding a clear message(FATAL_ERROR ...) (or at least message(WARNING ...)) when CUDA-dependent headers/tests are being built but CUDAToolkit is not found, so the configuration fails early and is easier to debug.

Suggested change
endif()
endif()
if(NOT CUDAToolkit_FOUND)
message(
FATAL_ERROR
"CUDAToolkit was not found, but CUDA-dependent Torch tests are enabled. "
"Please install the CUDA toolkit or disable building these Torch tests.")
endif()

Copilot uses AI. Check for mistakes.

set(TORCH_TARGET_FOLDER ${CMAKE_BINARY_DIR}/torch)
set(BIN_PREFIX "torch_")
Expand Down Expand Up @@ -120,11 +124,18 @@ set(PADDLE_INCLUDE_DIR
"${PADDLE_DIR}/include/paddle/phi/api/include/compat/"
"${PADDLE_DIR}/include/paddle/phi/api/include/compat/torch/csrc/api/include/"
)
if(CUDAToolkit_FOUND)
list(APPEND PADDLE_INCLUDE_DIR "${CUDAToolkit_INCLUDE_DIRS}")
list(APPEND PADDLE_INCLUDE_DIR "${CUDAToolkit_INCLUDE_DIRS}/cccl")
endif()

set(PADDLE_LIBRARIES
"${PADDLE_DIR}/base/libpaddle.so" "${PADDLE_DIR}/libs/libcommon.so"
"${PADDLE_DIR}/libs/libphi.so" "${PADDLE_DIR}/libs/libphi_core.so"
"${PADDLE_DIR}/libs/libphi_gpu.so")
if(CUDAToolkit_FOUND)
list(APPEND PADDLE_LIBRARIES CUDA::cudart)
endif()
link_directories("${PADDLE_DIR}/base")
link_directories("${PADDLE_DIR}/libs")

Expand Down
14 changes: 14 additions & 0 deletions cmake/build.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ function(
target_compile_definitions(${_test_name}
PRIVATE USE_PADDLE_API=${USE_PADDLE_API})
message(STATUS "USE_PADDLE_API: ${USE_PADDLE_API}")
if(USE_PADDLE_API AND CUDAToolkit_FOUND)
target_compile_definitions(${_test_name} PRIVATE PADDLE_WITH_CUDA)
endif()
if(NOT USE_PADDLE_API)
# libtorch_cuda.so registers CUDA hooks via static initializers. Linux's
# --as-needed would normally strip it from DT_NEEDED since no symbols are
# directly referenced; force-load it with --no-as-needed.
foreach(_dep_lib ${DEPS_LIBRARIES})
if("${_dep_lib}" MATCHES "libtorch_cuda\\.so$")
target_link_libraries(${_test_name}
"-Wl,--no-as-needed,${_dep_lib},--as-needed")
endif()
endforeach()
endif()
add_test(NAME ${_test_name} COMMAND ${_test_name})
set_tests_properties(${_test_name} PROPERTIES TIMEOUT 5)
set_target_properties(${_test_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY
Expand Down
247 changes: 247 additions & 0 deletions test/RecordStreamTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
#include <ATen/ATen.h>
#include <ATen/core/Tensor.h>
#include <ATen/ops/zeros.h>
#include <c10/core/Stream.h>
#include <c10/cuda/CUDAStream.h>
#include <gtest/gtest.h>
#include <torch/all.h>

#include <string>
#include <vector>

#include "../src/file_manager.h"

extern paddle_api_test::ThreadSafeParam g_custom_param;

namespace at {
namespace test {

using paddle_api_test::FileManerger;
using paddle_api_test::ThreadSafeParam;

class RecordStreamTest : public ::testing::Test {
protected:
void SetUp() override { cpu_tensor = at::zeros({2, 3}, at::kFloat); }
at::Tensor cpu_tensor;
};

static at::Stream get_default_cuda_stream() {
return c10::cuda::getCurrentCUDAStream(0);
}

// --- 基础功能测试:CUDA tensor + CUDA stream ---

// kFloat, shape {2,3} (small)
TEST_F(RecordStreamTest, CudaFloat2x3) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.createFile();
file << "CudaFloat2x3 ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = cpu_tensor.cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kDouble, shape {4} (small, different dtype)
TEST_F(RecordStreamTest, CudaDouble4) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaDouble4 ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({4}, at::kDouble).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kInt, shape {100,100} (large, >= 10000 elements)
TEST_F(RecordStreamTest, CudaInt100x100) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaInt100x100 ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({100, 100}, at::kInt).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kLong, shape {} (0-d scalar tensor)
TEST_F(RecordStreamTest, CudaLongScalar) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaLongScalar ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({}, at::kLong).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kFloat, shape {0} (空 tensor,边界 shape)
TEST_F(RecordStreamTest, CudaEmptyShape) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaEmptyShape ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({0}, at::kFloat).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kFloat, shape {1,1,1} (全一维度,边界 shape)
TEST_F(RecordStreamTest, CudaAllOnes) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaAllOnes ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor t = at::zeros({1, 1, 1}, at::kFloat).cuda();
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// kFloat, 非连续 tensor(经 transpose)
TEST_F(RecordStreamTest, CudaNonContiguous) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CudaNonContiguous ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Tensor base = at::zeros({3, 4}, at::kFloat).cuda();
at::Tensor t = base.transpose(0, 1); // 非连续
at::Stream stream = get_default_cuda_stream();
t.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// --- 异常路径:CPU tensor + CUDA stream(如有 CUDA) ---
// record_stream 在两个框架下对 CPU tensor 的处理行为
TEST_F(RecordStreamTest, CpuTensorCudaStream) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CpuTensorCudaStream ";
if (!torch::cuda::is_available()) {
file << "no_cuda";
file << "\n";
file.saveFile();
return;
}
try {
at::Stream stream = get_default_cuda_stream();
cpu_tensor.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

// --- 异常路径:CPU tensor + CPU stream(无 CUDA 依赖) ---
// record_stream 是 CUDA-only API,CPU stream 应触发异常
TEST_F(RecordStreamTest, CpuTensorCpuStream) {
auto file_name = g_custom_param.get();
FileManerger file(file_name);
file.openAppend();
file << "CpuTensorCpuStream ";
c10::Stream stream(c10::Stream::DEFAULT,
c10::Device(c10::DeviceType::CPU, 0));
try {
cpu_tensor.record_stream(stream);
file << "1";
} catch (const std::exception& e) {
file << "exception";
}
file << "\n";
file.saveFile();
}

} // namespace test
} // namespace at