diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b29c5b..3c479cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,8 +87,12 @@ set(TORCH_LIBRARIES "") file(GLOB_RECURSE TORCH_LIBRARIES "${TORCH_DIR}/lib/*.so" "${TORCH_DIR}/lib/*.a") +find_package(CUDAToolkit QUIET) set(TORCH_INCLUDE_DIR "${TORCH_DIR}/include" "${TORCH_DIR}/include/torch/csrc/api/include/") +if(CUDAToolkit_FOUND) + list(APPEND TORCH_INCLUDE_DIR "${CUDAToolkit_INCLUDE_DIRS}") +endif() set(TORCH_TARGET_FOLDER ${CMAKE_BINARY_DIR}/torch) set(BIN_PREFIX "torch_") @@ -120,11 +124,18 @@ set(PADDLE_INCLUDE_DIR "${PADDLE_DIR}/include/paddle/phi/api/include/compat/" "${PADDLE_DIR}/include/paddle/phi/api/include/compat/torch/csrc/api/include/" ) +if(CUDAToolkit_FOUND) + list(APPEND PADDLE_INCLUDE_DIR "${CUDAToolkit_INCLUDE_DIRS}") + list(APPEND PADDLE_INCLUDE_DIR "${CUDAToolkit_INCLUDE_DIRS}/cccl") +endif() set(PADDLE_LIBRARIES "${PADDLE_DIR}/base/libpaddle.so" "${PADDLE_DIR}/libs/libcommon.so" "${PADDLE_DIR}/libs/libphi.so" "${PADDLE_DIR}/libs/libphi_core.so" "${PADDLE_DIR}/libs/libphi_gpu.so") +if(CUDAToolkit_FOUND) + list(APPEND PADDLE_LIBRARIES CUDA::cudart) +endif() link_directories("${PADDLE_DIR}/base") link_directories("${PADDLE_DIR}/libs") diff --git a/cmake/build.cmake b/cmake/build.cmake index 2bcaf4e..e6fe44a 100644 --- a/cmake/build.cmake +++ b/cmake/build.cmake @@ -24,6 +24,20 @@ function( target_compile_definitions(${_test_name} PRIVATE USE_PADDLE_API=${USE_PADDLE_API}) message(STATUS "USE_PADDLE_API: ${USE_PADDLE_API}") + if(USE_PADDLE_API AND CUDAToolkit_FOUND) + target_compile_definitions(${_test_name} PRIVATE PADDLE_WITH_CUDA) + endif() + if(NOT USE_PADDLE_API) + # libtorch_cuda.so registers CUDA hooks via static initializers. Linux's + # --as-needed would normally strip it from DT_NEEDED since no symbols are + # directly referenced; force-load it with --no-as-needed. + foreach(_dep_lib ${DEPS_LIBRARIES}) + if("${_dep_lib}" MATCHES "libtorch_cuda\\.so$") + target_link_libraries(${_test_name} + "-Wl,--no-as-needed,${_dep_lib},--as-needed") + endif() + endforeach() + endif() add_test(NAME ${_test_name} COMMAND ${_test_name}) set_tests_properties(${_test_name} PROPERTIES TIMEOUT 5) set_target_properties(${_test_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY diff --git a/test/RecordStreamTest.cpp b/test/RecordStreamTest.cpp new file mode 100644 index 0000000..c0704c9 --- /dev/null +++ b/test/RecordStreamTest.cpp @@ -0,0 +1,247 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../src/file_manager.h" + +extern paddle_api_test::ThreadSafeParam g_custom_param; + +namespace at { +namespace test { + +using paddle_api_test::FileManerger; +using paddle_api_test::ThreadSafeParam; + +class RecordStreamTest : public ::testing::Test { + protected: + void SetUp() override { cpu_tensor = at::zeros({2, 3}, at::kFloat); } + at::Tensor cpu_tensor; +}; + +static at::Stream get_default_cuda_stream() { + return c10::cuda::getCurrentCUDAStream(0); +} + +// --- 基础功能测试:CUDA tensor + CUDA stream --- + +// kFloat, shape {2,3} (small) +TEST_F(RecordStreamTest, CudaFloat2x3) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.createFile(); + file << "CudaFloat2x3 "; + if (!torch::cuda::is_available()) { + file << "no_cuda"; + file << "\n"; + file.saveFile(); + return; + } + try { + at::Tensor t = cpu_tensor.cuda(); + at::Stream stream = get_default_cuda_stream(); + t.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +// kDouble, shape {4} (small, different dtype) +TEST_F(RecordStreamTest, CudaDouble4) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.openAppend(); + file << "CudaDouble4 "; + if (!torch::cuda::is_available()) { + file << "no_cuda"; + file << "\n"; + file.saveFile(); + return; + } + try { + at::Tensor t = at::zeros({4}, at::kDouble).cuda(); + at::Stream stream = get_default_cuda_stream(); + t.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +// kInt, shape {100,100} (large, >= 10000 elements) +TEST_F(RecordStreamTest, CudaInt100x100) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.openAppend(); + file << "CudaInt100x100 "; + if (!torch::cuda::is_available()) { + file << "no_cuda"; + file << "\n"; + file.saveFile(); + return; + } + try { + at::Tensor t = at::zeros({100, 100}, at::kInt).cuda(); + at::Stream stream = get_default_cuda_stream(); + t.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +// kLong, shape {} (0-d scalar tensor) +TEST_F(RecordStreamTest, CudaLongScalar) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.openAppend(); + file << "CudaLongScalar "; + if (!torch::cuda::is_available()) { + file << "no_cuda"; + file << "\n"; + file.saveFile(); + return; + } + try { + at::Tensor t = at::zeros({}, at::kLong).cuda(); + at::Stream stream = get_default_cuda_stream(); + t.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +// kFloat, shape {0} (空 tensor,边界 shape) +TEST_F(RecordStreamTest, CudaEmptyShape) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.openAppend(); + file << "CudaEmptyShape "; + if (!torch::cuda::is_available()) { + file << "no_cuda"; + file << "\n"; + file.saveFile(); + return; + } + try { + at::Tensor t = at::zeros({0}, at::kFloat).cuda(); + at::Stream stream = get_default_cuda_stream(); + t.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +// kFloat, shape {1,1,1} (全一维度,边界 shape) +TEST_F(RecordStreamTest, CudaAllOnes) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.openAppend(); + file << "CudaAllOnes "; + if (!torch::cuda::is_available()) { + file << "no_cuda"; + file << "\n"; + file.saveFile(); + return; + } + try { + at::Tensor t = at::zeros({1, 1, 1}, at::kFloat).cuda(); + at::Stream stream = get_default_cuda_stream(); + t.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +// kFloat, 非连续 tensor(经 transpose) +TEST_F(RecordStreamTest, CudaNonContiguous) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.openAppend(); + file << "CudaNonContiguous "; + if (!torch::cuda::is_available()) { + file << "no_cuda"; + file << "\n"; + file.saveFile(); + return; + } + try { + at::Tensor base = at::zeros({3, 4}, at::kFloat).cuda(); + at::Tensor t = base.transpose(0, 1); // 非连续 + at::Stream stream = get_default_cuda_stream(); + t.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +// --- 异常路径:CPU tensor + CUDA stream(如有 CUDA) --- +// record_stream 在两个框架下对 CPU tensor 的处理行为 +TEST_F(RecordStreamTest, CpuTensorCudaStream) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.openAppend(); + file << "CpuTensorCudaStream "; + if (!torch::cuda::is_available()) { + file << "no_cuda"; + file << "\n"; + file.saveFile(); + return; + } + try { + at::Stream stream = get_default_cuda_stream(); + cpu_tensor.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +// --- 异常路径:CPU tensor + CPU stream(无 CUDA 依赖) --- +// record_stream 是 CUDA-only API,CPU stream 应触发异常 +TEST_F(RecordStreamTest, CpuTensorCpuStream) { + auto file_name = g_custom_param.get(); + FileManerger file(file_name); + file.openAppend(); + file << "CpuTensorCpuStream "; + c10::Stream stream(c10::Stream::DEFAULT, + c10::Device(c10::DeviceType::CPU, 0)); + try { + cpu_tensor.record_stream(stream); + file << "1"; + } catch (const std::exception& e) { + file << "exception"; + } + file << "\n"; + file.saveFile(); +} + +} // namespace test +} // namespace at