diff --git a/perception/autoware_tensorrt_plugins/CMakeLists.txt b/perception/autoware_tensorrt_plugins/CMakeLists.txt index da02879375a5d..28689ddc456e0 100644 --- a/perception/autoware_tensorrt_plugins/CMakeLists.txt +++ b/perception/autoware_tensorrt_plugins/CMakeLists.txt @@ -147,6 +147,28 @@ if(TRT_AVAIL AND CUDA_AVAIL AND SPCONV_AVAIL) spconv::spconv ) + if(BUILD_TESTING) + find_package(ament_cmake_gtest REQUIRED) + + ament_add_gtest(reference_kernels_test + test/argsort_kernel_test.cpp + test/unique_kernel_test.cpp + ) + if(TARGET reference_kernels_test) + target_link_libraries(reference_kernels_test + CUDA::cudart + cuda_ops + ) + target_include_directories(reference_kernels_test PRIVATE + include + test + ${autoware_cuda_utils_INCLUDE_DIRS} + ${CUDA_INCLUDE_DIRS} + ) + target_compile_definitions(reference_kernels_test PRIVATE _GLIBCXX_USE_CXX11_ABI=1) + endif() + endif() + install( TARGETS ${PROJECT_NAME} DESTINATION share/${PROJECT_NAME}/plugins diff --git a/perception/autoware_tensorrt_plugins/package.xml b/perception/autoware_tensorrt_plugins/package.xml index f05496a2e993d..be1f7373ea835 100644 --- a/perception/autoware_tensorrt_plugins/package.xml +++ b/perception/autoware_tensorrt_plugins/package.xml @@ -20,6 +20,7 @@ autoware_cuda_dependency_meta autoware_cuda_utils + ament_cmake_gtest ament_cmake_ros ament_lint_auto autoware_lint_common diff --git a/perception/autoware_tensorrt_plugins/src/unique_ops/unique.cu b/perception/autoware_tensorrt_plugins/src/unique_ops/unique.cu index 4c4a15bdcf8b8..933f1c0bda19a 100644 --- a/perception/autoware_tensorrt_plugins/src/unique_ops/unique.cu +++ b/perception/autoware_tensorrt_plugins/src/unique_ops/unique.cu @@ -157,8 +157,7 @@ std::int64_t unique( sorted_input; cudaMemcpyAsync( - range_ptr + num_out * sizeof(int64_t), &num_input_elements, sizeof(std::int64_t), - cudaMemcpyHostToDevice, stream); + range_ptr + num_out, &num_input_elements, sizeof(std::int64_t), cudaMemcpyHostToDevice, stream); thrust::adjacent_difference(policy, range_ptr + 1, range_ptr + num_out + 1, unique_counts); diff --git a/perception/autoware_tensorrt_plugins/test/argsort_kernel_test.cpp b/perception/autoware_tensorrt_plugins/test/argsort_kernel_test.cpp new file mode 100644 index 0000000000000..c6dbd7e095019 --- /dev/null +++ b/perception/autoware_tensorrt_plugins/test/argsort_kernel_test.cpp @@ -0,0 +1,82 @@ +// Copyright 2026 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/argsort_ops/argsort.hpp" +#include "test_utils.hpp" + +#include + +#include +#include + +#include +#include +#include +#include + +namespace +{ + +using autoware::tensorrt_plugins::test::copy_to_device; +using autoware::tensorrt_plugins::test::copy_to_host; +using autoware::tensorrt_plugins::test::CudaStreamGuard; +using autoware::tensorrt_plugins::test::DeviceBuffer; + +std::size_t get_argsort_total_workspace_size(const std::size_t num_elements) +{ + const auto temp_size = get_argsort_workspace_size(num_elements); + const auto scratch_offset = + ((temp_size + alignof(std::int64_t) - 1U) / alignof(std::int64_t)) * alignof(std::int64_t); + return scratch_offset + sizeof(std::int64_t) * 2U * num_elements; +} + +std::vector make_argsort_reference(const std::vector & input) +{ + std::vector indices(input.size()); + for (std::size_t index = 0; index < input.size(); ++index) { + indices[index] = static_cast(index); + } + + std::stable_sort( + indices.begin(), indices.end(), + [&input](const std::int64_t lhs, const std::int64_t rhs) { return input[lhs] < input[rhs]; }); + + return indices; +} + +TEST(ReferenceKernelsTest, ArgsortMatchesCpuReference) +{ + SKIP_TEST_IF_CUDA_UNAVAILABLE(); + + const std::vector input{7, 3, 7, 5, 3, 3, 9, 5, 11, 7}; + const auto reference = make_argsort_reference(input); + + CudaStreamGuard stream; + DeviceBuffer input_d(input.size()); + DeviceBuffer output_d(input.size()); + DeviceBuffer workspace_d(get_argsort_total_workspace_size(input.size())); + + copy_to_device(input_d.get(), input); + + ASSERT_EQ( + argsort( + input_d.get(), output_d.get(), workspace_d.get(), input.size(), + get_argsort_workspace_size(input.size()), stream.get()), + cudaSuccess); + ASSERT_EQ(cudaStreamSynchronize(stream.get()), cudaSuccess); + + EXPECT_EQ(copy_to_host(output_d.get(), input.size()), reference); +} + +} // namespace diff --git a/perception/autoware_tensorrt_plugins/test/test_utils.hpp b/perception/autoware_tensorrt_plugins/test/test_utils.hpp new file mode 100644 index 0000000000000..b134dcfe9a5c8 --- /dev/null +++ b/perception/autoware_tensorrt_plugins/test/test_utils.hpp @@ -0,0 +1,109 @@ +// Copyright 2026 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include +#include +#include + +namespace autoware::tensorrt_plugins::test +{ + +class CudaStreamGuard +{ +public: + CudaStreamGuard() + { + const cudaError_t status = cudaStreamCreate(&stream_); + if (status != cudaSuccess) { + throw std::runtime_error(cudaGetErrorString(status)); + } + } + + ~CudaStreamGuard() + { + if (stream_ != nullptr) { + cudaStreamDestroy(stream_); + } + } + + CudaStreamGuard(const CudaStreamGuard &) = delete; + CudaStreamGuard & operator=(const CudaStreamGuard &) = delete; + CudaStreamGuard(CudaStreamGuard &&) = delete; + CudaStreamGuard & operator=(CudaStreamGuard &&) = delete; + + [[nodiscard]] cudaStream_t get() const { return stream_; } + +private: + cudaStream_t stream_{nullptr}; +}; + +template +class DeviceBuffer +{ +public: + explicit DeviceBuffer(std::size_t element_count) : element_count_(element_count) + { + const cudaError_t status = cudaMalloc(&data_, sizeof(T) * element_count_); + if (status != cudaSuccess) { + throw std::runtime_error(cudaGetErrorString(status)); + } + } + + ~DeviceBuffer() + { + if (data_ != nullptr) { + cudaFree(data_); + } + } + + DeviceBuffer(const DeviceBuffer &) = delete; + DeviceBuffer & operator=(const DeviceBuffer &) = delete; + DeviceBuffer(DeviceBuffer &&) = delete; + DeviceBuffer & operator=(DeviceBuffer &&) = delete; + + [[nodiscard]] T * get() const { return static_cast(data_); } + +private: + void * data_{nullptr}; + std::size_t element_count_{0U}; +}; + +template +void copy_to_device(T * device_ptr, const std::vector & host_values) +{ + const cudaError_t status = cudaMemcpy( + device_ptr, host_values.data(), sizeof(T) * host_values.size(), cudaMemcpyHostToDevice); + if (status != cudaSuccess) { + throw std::runtime_error(cudaGetErrorString(status)); + } +} + +template +std::vector copy_to_host(const T * device_ptr, const std::size_t element_count) +{ + std::vector host_values(element_count); + const cudaError_t status = + cudaMemcpy(host_values.data(), device_ptr, sizeof(T) * element_count, cudaMemcpyDeviceToHost); + if (status != cudaSuccess) { + throw std::runtime_error(cudaGetErrorString(status)); + } + return host_values; +} + +} // namespace autoware::tensorrt_plugins::test diff --git a/perception/autoware_tensorrt_plugins/test/unique_kernel_test.cpp b/perception/autoware_tensorrt_plugins/test/unique_kernel_test.cpp new file mode 100644 index 0000000000000..f707b00b12f23 --- /dev/null +++ b/perception/autoware_tensorrt_plugins/test/unique_kernel_test.cpp @@ -0,0 +1,99 @@ +// Copyright 2026 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/unique_ops/unique.hpp" +#include "test_utils.hpp" + +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace +{ + +using autoware::tensorrt_plugins::test::copy_to_device; +using autoware::tensorrt_plugins::test::copy_to_host; +using autoware::tensorrt_plugins::test::CudaStreamGuard; +using autoware::tensorrt_plugins::test::DeviceBuffer; + +struct UniqueReference +{ + std::vector unique_values; + std::vector inverse_indices; + std::vector counts; +}; + +UniqueReference make_unique_reference(const std::vector & input) +{ + UniqueReference reference; + reference.unique_values = input; + std::sort(reference.unique_values.begin(), reference.unique_values.end()); + reference.unique_values.erase( + std::unique(reference.unique_values.begin(), reference.unique_values.end()), + reference.unique_values.end()); + + std::map value_to_index; + for (std::size_t index = 0; index < reference.unique_values.size(); ++index) { + value_to_index.emplace(reference.unique_values[index], static_cast(index)); + } + reference.counts.resize(reference.unique_values.size(), 0); + + reference.inverse_indices.reserve(input.size()); + for (const auto value : input) { + const auto index = value_to_index.at(value); + reference.inverse_indices.push_back(index); + reference.counts[static_cast(index)] += 1; + } + + return reference; +} + +TEST(ReferenceKernelsTest, UniqueMatchesCpuReference) +{ + SKIP_TEST_IF_CUDA_UNAVAILABLE(); + + const std::vector input{7, 3, 7, 5, 3, 3, 9, 5, 11, 7}; + const UniqueReference reference = make_unique_reference(input); + + CudaStreamGuard stream; + DeviceBuffer input_d(input.size()); + DeviceBuffer unique_d(input.size()); + DeviceBuffer inverse_d(input.size()); + DeviceBuffer counts_d(input.size()); + DeviceBuffer workspace_d(get_unique_workspace_size(input.size())); + + copy_to_device(input_d.get(), input); + + const auto num_unique = unique( + input_d.get(), unique_d.get(), inverse_d.get(), counts_d.get(), workspace_d.get(), input.size(), + get_unique_workspace_size(input.size()), stream.get()); + ASSERT_EQ(cudaStreamSynchronize(stream.get()), cudaSuccess); + + const auto unique_values = copy_to_host(unique_d.get(), static_cast(num_unique)); + const auto inverse_indices = copy_to_host(inverse_d.get(), input.size()); + const auto counts = copy_to_host(counts_d.get(), static_cast(num_unique)); + + EXPECT_EQ(unique_values, reference.unique_values); + EXPECT_EQ(inverse_indices, reference.inverse_indices); + EXPECT_EQ(counts, reference.counts); +} + +} // namespace