diff --git a/perception/autoware_tensorrt_plugins/CMakeLists.txt b/perception/autoware_tensorrt_plugins/CMakeLists.txt
index da02879375a5d..28689ddc456e0 100644
--- a/perception/autoware_tensorrt_plugins/CMakeLists.txt
+++ b/perception/autoware_tensorrt_plugins/CMakeLists.txt
@@ -147,6 +147,28 @@ if(TRT_AVAIL AND CUDA_AVAIL AND SPCONV_AVAIL)
spconv::spconv
)
+ if(BUILD_TESTING)
+ find_package(ament_cmake_gtest REQUIRED)
+
+ ament_add_gtest(reference_kernels_test
+ test/argsort_kernel_test.cpp
+ test/unique_kernel_test.cpp
+ )
+ if(TARGET reference_kernels_test)
+ target_link_libraries(reference_kernels_test
+ CUDA::cudart
+ cuda_ops
+ )
+ target_include_directories(reference_kernels_test PRIVATE
+ include
+ test
+ ${autoware_cuda_utils_INCLUDE_DIRS}
+ ${CUDA_INCLUDE_DIRS}
+ )
+ target_compile_definitions(reference_kernels_test PRIVATE _GLIBCXX_USE_CXX11_ABI=1)
+ endif()
+ endif()
+
install(
TARGETS ${PROJECT_NAME}
DESTINATION share/${PROJECT_NAME}/plugins
diff --git a/perception/autoware_tensorrt_plugins/package.xml b/perception/autoware_tensorrt_plugins/package.xml
index f05496a2e993d..be1f7373ea835 100644
--- a/perception/autoware_tensorrt_plugins/package.xml
+++ b/perception/autoware_tensorrt_plugins/package.xml
@@ -20,6 +20,7 @@
autoware_cuda_dependency_meta
autoware_cuda_utils
+ ament_cmake_gtest
ament_cmake_ros
ament_lint_auto
autoware_lint_common
diff --git a/perception/autoware_tensorrt_plugins/src/unique_ops/unique.cu b/perception/autoware_tensorrt_plugins/src/unique_ops/unique.cu
index 4c4a15bdcf8b8..933f1c0bda19a 100644
--- a/perception/autoware_tensorrt_plugins/src/unique_ops/unique.cu
+++ b/perception/autoware_tensorrt_plugins/src/unique_ops/unique.cu
@@ -157,8 +157,7 @@ std::int64_t unique(
sorted_input;
cudaMemcpyAsync(
- range_ptr + num_out * sizeof(int64_t), &num_input_elements, sizeof(std::int64_t),
- cudaMemcpyHostToDevice, stream);
+ range_ptr + num_out, &num_input_elements, sizeof(std::int64_t), cudaMemcpyHostToDevice, stream);
thrust::adjacent_difference(policy, range_ptr + 1, range_ptr + num_out + 1, unique_counts);
diff --git a/perception/autoware_tensorrt_plugins/test/argsort_kernel_test.cpp b/perception/autoware_tensorrt_plugins/test/argsort_kernel_test.cpp
new file mode 100644
index 0000000000000..c6dbd7e095019
--- /dev/null
+++ b/perception/autoware_tensorrt_plugins/test/argsort_kernel_test.cpp
@@ -0,0 +1,82 @@
+// Copyright 2026 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "autoware/argsort_ops/argsort.hpp"
+#include "test_utils.hpp"
+
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+namespace
+{
+
+using autoware::tensorrt_plugins::test::copy_to_device;
+using autoware::tensorrt_plugins::test::copy_to_host;
+using autoware::tensorrt_plugins::test::CudaStreamGuard;
+using autoware::tensorrt_plugins::test::DeviceBuffer;
+
+std::size_t get_argsort_total_workspace_size(const std::size_t num_elements)
+{
+ const auto temp_size = get_argsort_workspace_size(num_elements);
+ const auto scratch_offset =
+ ((temp_size + alignof(std::int64_t) - 1U) / alignof(std::int64_t)) * alignof(std::int64_t);
+ return scratch_offset + sizeof(std::int64_t) * 2U * num_elements;
+}
+
+std::vector make_argsort_reference(const std::vector & input)
+{
+ std::vector indices(input.size());
+ for (std::size_t index = 0; index < input.size(); ++index) {
+ indices[index] = static_cast(index);
+ }
+
+ std::stable_sort(
+ indices.begin(), indices.end(),
+ [&input](const std::int64_t lhs, const std::int64_t rhs) { return input[lhs] < input[rhs]; });
+
+ return indices;
+}
+
+TEST(ReferenceKernelsTest, ArgsortMatchesCpuReference)
+{
+ SKIP_TEST_IF_CUDA_UNAVAILABLE();
+
+ const std::vector input{7, 3, 7, 5, 3, 3, 9, 5, 11, 7};
+ const auto reference = make_argsort_reference(input);
+
+ CudaStreamGuard stream;
+ DeviceBuffer input_d(input.size());
+ DeviceBuffer output_d(input.size());
+ DeviceBuffer workspace_d(get_argsort_total_workspace_size(input.size()));
+
+ copy_to_device(input_d.get(), input);
+
+ ASSERT_EQ(
+ argsort(
+ input_d.get(), output_d.get(), workspace_d.get(), input.size(),
+ get_argsort_workspace_size(input.size()), stream.get()),
+ cudaSuccess);
+ ASSERT_EQ(cudaStreamSynchronize(stream.get()), cudaSuccess);
+
+ EXPECT_EQ(copy_to_host(output_d.get(), input.size()), reference);
+}
+
+} // namespace
diff --git a/perception/autoware_tensorrt_plugins/test/test_utils.hpp b/perception/autoware_tensorrt_plugins/test/test_utils.hpp
new file mode 100644
index 0000000000000..b134dcfe9a5c8
--- /dev/null
+++ b/perception/autoware_tensorrt_plugins/test/test_utils.hpp
@@ -0,0 +1,109 @@
+// Copyright 2026 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include
+#include
+
+#include
+#include
+#include
+
+namespace autoware::tensorrt_plugins::test
+{
+
+class CudaStreamGuard
+{
+public:
+ CudaStreamGuard()
+ {
+ const cudaError_t status = cudaStreamCreate(&stream_);
+ if (status != cudaSuccess) {
+ throw std::runtime_error(cudaGetErrorString(status));
+ }
+ }
+
+ ~CudaStreamGuard()
+ {
+ if (stream_ != nullptr) {
+ cudaStreamDestroy(stream_);
+ }
+ }
+
+ CudaStreamGuard(const CudaStreamGuard &) = delete;
+ CudaStreamGuard & operator=(const CudaStreamGuard &) = delete;
+ CudaStreamGuard(CudaStreamGuard &&) = delete;
+ CudaStreamGuard & operator=(CudaStreamGuard &&) = delete;
+
+ [[nodiscard]] cudaStream_t get() const { return stream_; }
+
+private:
+ cudaStream_t stream_{nullptr};
+};
+
+template
+class DeviceBuffer
+{
+public:
+ explicit DeviceBuffer(std::size_t element_count) : element_count_(element_count)
+ {
+ const cudaError_t status = cudaMalloc(&data_, sizeof(T) * element_count_);
+ if (status != cudaSuccess) {
+ throw std::runtime_error(cudaGetErrorString(status));
+ }
+ }
+
+ ~DeviceBuffer()
+ {
+ if (data_ != nullptr) {
+ cudaFree(data_);
+ }
+ }
+
+ DeviceBuffer(const DeviceBuffer &) = delete;
+ DeviceBuffer & operator=(const DeviceBuffer &) = delete;
+ DeviceBuffer(DeviceBuffer &&) = delete;
+ DeviceBuffer & operator=(DeviceBuffer &&) = delete;
+
+ [[nodiscard]] T * get() const { return static_cast(data_); }
+
+private:
+ void * data_{nullptr};
+ std::size_t element_count_{0U};
+};
+
+template
+void copy_to_device(T * device_ptr, const std::vector & host_values)
+{
+ const cudaError_t status = cudaMemcpy(
+ device_ptr, host_values.data(), sizeof(T) * host_values.size(), cudaMemcpyHostToDevice);
+ if (status != cudaSuccess) {
+ throw std::runtime_error(cudaGetErrorString(status));
+ }
+}
+
+template
+std::vector copy_to_host(const T * device_ptr, const std::size_t element_count)
+{
+ std::vector host_values(element_count);
+ const cudaError_t status =
+ cudaMemcpy(host_values.data(), device_ptr, sizeof(T) * element_count, cudaMemcpyDeviceToHost);
+ if (status != cudaSuccess) {
+ throw std::runtime_error(cudaGetErrorString(status));
+ }
+ return host_values;
+}
+
+} // namespace autoware::tensorrt_plugins::test
diff --git a/perception/autoware_tensorrt_plugins/test/unique_kernel_test.cpp b/perception/autoware_tensorrt_plugins/test/unique_kernel_test.cpp
new file mode 100644
index 0000000000000..f707b00b12f23
--- /dev/null
+++ b/perception/autoware_tensorrt_plugins/test/unique_kernel_test.cpp
@@ -0,0 +1,99 @@
+// Copyright 2026 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "autoware/unique_ops/unique.hpp"
+#include "test_utils.hpp"
+
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+#include