diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 1a12b62bdacdb..7b14713085621 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -119,7 +119,10 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { layer->setInput(1, *shape); PADDLE_ENFORCE_NOT_NULL( layer, - common::errors::InvalidArgument("TRT shuffle layer building failed.")); + common::errors::InvalidArgument( + "TensorRT returned nullptr while constructing the dynamic shuffle " + "layer for input 'input'. Verify that the supplied runtime shape is " + "compatible with the network definition.")); engine_->DeclareOutput(layer, 0, "y"); engine_->FreezeNetwork(); #if IS_TRT_VERSION_GE(8600) @@ -302,7 +305,10 @@ TEST_F(TensorRTDynamicEngineTest, test_spmm) { LOG(INFO) << "create weights"; PADDLE_ENFORCE_NOT_NULL( fc_layer, - common::errors::InvalidArgument("TRT SPMM layer building failed.")); + common::errors::InvalidArgument( + "TensorRT returned a null layer when constructing the sparse " + "matrix-multiply plugin. Ensure the SpMM plugin is registered and " + "the weight/bias dimensions are valid.")); engine_->DeclareOutput(fc_layer, 0, "y"); engine_->FreezeNetwork(); @@ -440,9 +446,12 @@ TEST_F(TensorRTDynamicTestFusedTokenPrune, test_fused_token_prune) { /*flag_varseqlen*/ false); std::vector itensors = {attn, x, mask, new_mask}; auto *layer = engine_->AddDynamicPlugin(itensors.data(), 4, plugin); - PADDLE_ENFORCE_NOT_NULL(layer, - common::errors::InvalidArgument( - "TRT fused_token_prune layer building failed.")); + PADDLE_ENFORCE_NOT_NULL( + layer, + common::errors::InvalidArgument( + "TensorRT could not add the fused_token_prune plugin layer. " + "Confirm the dynamic plugin is registered and the input tensor " + "shapes (attn/x/mask/new_mask) follow the expected layout.")); std::vector output_tensor_names{"out_slimmed_x", "out_cls_inds"}; for (size_t i = 0; i < 2; i++) { layer->getOutput(i)->setName(output_tensor_names[i].c_str()); @@ -642,9 +651,12 @@ TEST_F(TensorRTDynamicTestFusedTokenPruneHalf, test_fused_token_prune) { /*flag_varseqlen*/ false); std::vector itensors = {attn, x, mask, new_mask}; auto *layer = engine_->AddDynamicPlugin(itensors.data(), 4, plugin); - PADDLE_ENFORCE_NOT_NULL(layer, - common::errors::InvalidArgument( - "TRT fused_token_prune layer building failed.")); + PADDLE_ENFORCE_NOT_NULL( + layer, + common::errors::InvalidArgument( + "TensorRT could not add the fused_token_prune plugin layer. " + "Confirm the half-precision plugin registration and input tensor " + "layouts (attn/x/mask/new_mask) are valid.")); std::vector output_tensor_names{"out_slimmed_x", "out_cls_inds"}; for (size_t i = 0; i < 2; i++) { layer->getOutput(i)->setName(output_tensor_names[i].c_str()); @@ -987,9 +999,12 @@ TEST_F(TensorRTDynamicShapeGNTest, test_trt_dynamic_shape_groupnorm) { TRT_ENGINE_ADD_LAYER(engine_, Dequantize, *gn_tensor, *dqscale_tensor); dq_layer->setAxis(1); - PADDLE_ENFORCE_NOT_NULL(groupnorm_layer, - common::errors::InvalidArgument( - "TRT GN plugin layer building failed.")); + PADDLE_ENFORCE_NOT_NULL( + groupnorm_layer, + common::errors::InvalidArgument( + "TensorRT failed to create the group-normalization plugin layer. " + "Ensure the plugin is registered and the provided scale/bias/" + "group parameters are consistent.")); engine_->DeclareOutput(dq_layer, 0, "y"); engine_->FreezeNetwork(); diff --git a/paddle/phi/core/framework/data_type_transform.cc b/paddle/phi/core/framework/data_type_transform.cc index 6ed397d85d378..b52b7bc68c476 100644 --- a/paddle/phi/core/framework/data_type_transform.cc +++ b/paddle/phi/core/framework/data_type_transform.cc @@ -34,6 +34,74 @@ struct CastDataTypeFunctor { } }; +template <> +struct CastDataTypeFunctor<::phi::dtype::float8_e5m2, ::phi::complex64> { + HOSTDEVICE inline ::phi::complex64 operator()( + ::phi::dtype::float8_e5m2 in) const { + return ::phi::complex64(static_cast(in)); + } +}; + +template <> +struct CastDataTypeFunctor<::phi::dtype::float8_e5m2, ::phi::complex128> { + HOSTDEVICE inline ::phi::complex128 operator()( + ::phi::dtype::float8_e5m2 in) const { + return ::phi::complex128(static_cast(in)); + } +}; + +template <> +struct CastDataTypeFunctor<::phi::dtype::float8_e4m3fn, ::phi::complex64> { + HOSTDEVICE inline ::phi::complex64 operator()( + ::phi::dtype::float8_e4m3fn in) const { + return ::phi::complex64(static_cast(in)); + } +}; + +template <> +struct CastDataTypeFunctor<::phi::dtype::float8_e4m3fn, ::phi::complex128> { + HOSTDEVICE inline ::phi::complex128 operator()( + ::phi::dtype::float8_e4m3fn in) const { + return ::phi::complex128(static_cast(in)); + } +}; + +template <> +struct CastDataTypeFunctor<::phi::dtype::bfloat16, + ::phi::dtype::complex> { + HOSTDEVICE inline ::phi::dtype::complex operator()( + ::phi::dtype::bfloat16 in) const { + return ::phi::dtype::complex(static_cast(in)); + } +}; + +template <> +struct CastDataTypeFunctor<::phi::dtype::bfloat16, + ::phi::dtype::complex> { + HOSTDEVICE inline ::phi::dtype::complex operator()( + ::phi::dtype::bfloat16 in) const { + return ::phi::dtype::complex(static_cast(in)); + } +}; + +template <> +struct CastDataTypeFunctor<::phi::dtype::float16, + ::phi::dtype::complex> { + HOSTDEVICE inline ::phi::dtype::complex operator()( + ::phi::dtype::float16 in) const { + return ::phi::dtype::complex(static_cast(in)); + } +}; + +template <> +struct CastDataTypeFunctor<::phi::dtype::float16, + ::phi::dtype::complex> { + HOSTDEVICE inline ::phi::dtype::complex operator()( + ::phi::dtype::float16 in) const { + return ::phi::dtype::complex(static_cast(in)); + } +}; + #if defined(PADDLE_WITH_XPU) template diff --git a/test/cpp/fluid/framework/data_type_transform_test.cc b/test/cpp/fluid/framework/data_type_transform_test.cc index 6a510d21acdca..b92f040f50cdb 100644 --- a/test/cpp/fluid/framework/data_type_transform_test.cc +++ b/test/cpp/fluid/framework/data_type_transform_test.cc @@ -14,7 +14,14 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type_transform.h" +#include + #include "gtest/gtest.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/common/float8_e4m3fn.h" +#include "paddle/phi/common/float8_e5m2.h" TEST(DataTypeTransform, CPUTransform) { auto place = phi::CPUPlace(); @@ -40,6 +47,18 @@ TEST(DataTypeTransform, CPUTransform) { auto kernel_bool = phi::KernelKey(place, phi::DataLayout::ALL_LAYOUT, phi::DataType::BOOL); + auto kernel_fp8_e4m3 = phi::KernelKey( + place, phi::DataLayout::ALL_LAYOUT, phi::DataType::FLOAT8_E4M3FN); + + auto kernel_fp8_e5m2 = phi::KernelKey( + place, phi::DataLayout::ALL_LAYOUT, phi::DataType::FLOAT8_E5M2); + + auto kernel_complex64 = phi::KernelKey( + place, phi::DataLayout::ALL_LAYOUT, phi::DataType::COMPLEX64); + + auto kernel_complex128 = phi::KernelKey( + place, phi::DataLayout::ALL_LAYOUT, phi::DataType::COMPLEX128); + // data type transform from float32 { phi::DenseTensor in; @@ -395,4 +414,134 @@ TEST(DataTypeTransform, CPUTransform) { EXPECT_EQ(ptr[i], static_cast(in_data_bool[i])); } } + + // transform float8 to complex + { + phi::DenseTensor in; + phi::DenseTensor out; + + auto* ptr = in.mutable_data( + common::make_ddim({2, 3}), place); + const int data_number = 2 * 3; + std::vector stored_values(data_number); + + for (int i = 0; i < data_number; ++i) { + ptr[i] = phi::dtype::float8_e5m2(static_cast(i) - 2.5f); + stored_values[i] = static_cast(ptr[i]); + } + + paddle::framework::TransDataType( + kernel_fp8_e5m2, kernel_complex64, in, &out); + auto* out_data_complex64 = out.data>(); + for (int i = 0; i < data_number; ++i) { + EXPECT_FLOAT_EQ(out_data_complex64[i].real, stored_values[i]); + EXPECT_FLOAT_EQ(out_data_complex64[i].imag, 0.0f); + } + + paddle::framework::TransDataType( + kernel_fp8_e5m2, kernel_complex128, in, &out); + auto* out_data_complex128 = out.data>(); + for (int i = 0; i < data_number; ++i) { + EXPECT_DOUBLE_EQ(out_data_complex128[i].real, + static_cast(stored_values[i])); + EXPECT_DOUBLE_EQ(out_data_complex128[i].imag, 0.0); + } + } + + // transform float8_e4m3 to complex + { + phi::DenseTensor in; + phi::DenseTensor out; + + auto* ptr = in.mutable_data( + common::make_ddim({2, 3}), place); + const int data_number = 2 * 3; + std::vector stored_values(data_number); + + for (int i = 0; i < data_number; ++i) { + ptr[i] = phi::dtype::float8_e4m3fn(static_cast(i) - 1.75f); + stored_values[i] = static_cast(ptr[i]); + } + + paddle::framework::TransDataType( + kernel_fp8_e4m3, kernel_complex64, in, &out); + auto* out_data_complex64 = out.data>(); + for (int i = 0; i < data_number; ++i) { + EXPECT_FLOAT_EQ(out_data_complex64[i].real, stored_values[i]); + EXPECT_FLOAT_EQ(out_data_complex64[i].imag, 0.0f); + } + + paddle::framework::TransDataType( + kernel_fp8_e4m3, kernel_complex128, in, &out); + auto* out_data_complex128 = out.data>(); + for (int i = 0; i < data_number; ++i) { + EXPECT_DOUBLE_EQ(out_data_complex128[i].real, + static_cast(stored_values[i])); + EXPECT_DOUBLE_EQ(out_data_complex128[i].imag, 0.0); + } + } + + // transform float16 to complex + { + phi::DenseTensor in; + phi::DenseTensor out; + + auto* ptr = + in.mutable_data(common::make_ddim({2, 3}), place); + const int data_number = 2 * 3; + std::vector stored_values(data_number); + std::vector stored_values_double(data_number); + + for (int i = 0; i < data_number; ++i) { + ptr[i] = static_cast(static_cast(i) - 3.0f); + stored_values[i] = static_cast(ptr[i]); + stored_values_double[i] = static_cast(ptr[i]); + } + + paddle::framework::TransDataType(kernel_fp16, kernel_complex64, in, &out); + auto* out_data_complex64 = out.data>(); + for (int i = 0; i < data_number; ++i) { + EXPECT_FLOAT_EQ(out_data_complex64[i].real, stored_values[i]); + EXPECT_FLOAT_EQ(out_data_complex64[i].imag, 0.0f); + } + + paddle::framework::TransDataType(kernel_fp16, kernel_complex128, in, &out); + auto* out_data_complex128 = out.data>(); + for (int i = 0; i < data_number; ++i) { + EXPECT_DOUBLE_EQ(out_data_complex128[i].real, stored_values_double[i]); + EXPECT_DOUBLE_EQ(out_data_complex128[i].imag, 0.0); + } + } + + // transform bfloat16 to complex + { + phi::DenseTensor in; + phi::DenseTensor out; + + auto* ptr = + in.mutable_data(common::make_ddim({2, 3}), place); + const int data_number = 2 * 3; + std::vector stored_values(data_number); + std::vector stored_values_double(data_number); + + for (int i = 0; i < data_number; ++i) { + ptr[i] = static_cast(static_cast(i) - 1.5f); + stored_values[i] = static_cast(ptr[i]); + stored_values_double[i] = static_cast(ptr[i]); + } + + paddle::framework::TransDataType(kernel_bf16, kernel_complex64, in, &out); + auto* out_data_complex64 = out.data>(); + for (int i = 0; i < data_number; ++i) { + EXPECT_FLOAT_EQ(out_data_complex64[i].real, stored_values[i]); + EXPECT_FLOAT_EQ(out_data_complex64[i].imag, 0.0f); + } + + paddle::framework::TransDataType(kernel_bf16, kernel_complex128, in, &out); + auto* out_data_complex128 = out.data>(); + for (int i = 0; i < data_number; ++i) { + EXPECT_DOUBLE_EQ(out_data_complex128[i].real, stored_values_double[i]); + EXPECT_DOUBLE_EQ(out_data_complex128[i].imag, 0.0); + } + } } diff --git a/test/cpp/phi/kernels/CMakeLists.txt b/test/cpp/phi/kernels/CMakeLists.txt index 08e22faf5b517..ec14d65845be7 100644 --- a/test/cpp/phi/kernels/CMakeLists.txt +++ b/test/cpp/phi/kernels/CMakeLists.txt @@ -118,6 +118,11 @@ cc_test( SRCS strided_memcpy_test.cc DEPS phi common) +cc_test( + test_isfinite_kernel + SRCS test_isfinite_kernel.cc + DEPS gtest phi common) + if(WIN32) cc_test( sequence_padding_test diff --git a/test/cpp/phi/kernels/test_isfinite_kernel.cc b/test/cpp/phi/kernels/test_isfinite_kernel.cc new file mode 100644 index 0000000000000..257623c35a642 --- /dev/null +++ b/test/cpp/phi/kernels/test_isfinite_kernel.cc @@ -0,0 +1,265 @@ +// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "paddle/phi/backends/context_pool.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/isfinite_kernel.h" + +namespace phi { +namespace tests { +namespace { + +const auto kCpuPlace = phi::CPUPlace(); + +const phi::CPUContext* GetCpuDeviceContext() { + return phi::DeviceContextPool::Instance().GetByPlace(kCpuPlace); +} + +template +void FillTensor(const phi::DeviceContext* dev_ctx, + phi::DenseTensor* tensor, + const std::vector& values) { + tensor->Resize({static_cast(values.size())}); + auto* data = dev_ctx->template Alloc(tensor); + for (size_t i = 0; i < values.size(); ++i) { + data[i] = values[i]; + } +} + +template +std::vector RunIsKernel(void (*kernel)(const phi::CPUContext&, + const DenseTensor&, + DenseTensor*), + const DenseTensor& input, + phi::DenseTensor* output, + const phi::CPUContext& ctx) { + output->Resize(input.dims()); + kernel(ctx, input, output); + const bool* result = output->data(); + return std::vector(result, result + input.numel()); +} + +} // namespace + +TEST(IsfiniteKernels, CpuIntegerTypes) { + auto* dev_ctx = GetCpuDeviceContext(); + const auto& cpu_ctx = *dev_ctx; + + phi::DenseTensor input; + FillTensor(dev_ctx, &input, {0, 42, -7, 1024}); + + phi::DenseTensor output; + auto finite = RunIsKernel( + phi::IsfiniteKernel, input, &output, cpu_ctx); + auto inf = RunIsKernel( + phi::IsinfKernel, input, &output, cpu_ctx); + auto nan = RunIsKernel( + phi::IsnanKernel, input, &output, cpu_ctx); + + for (bool value : finite) { + EXPECT_TRUE(value); + } + for (bool value : inf) { + EXPECT_FALSE(value); + } + for (bool value : nan) { + EXPECT_FALSE(value); + } +} + +TEST(IsfiniteKernels, CpuFloatTypes) { + auto* dev_ctx = GetCpuDeviceContext(); + const auto& cpu_ctx = *dev_ctx; + + const float kInf = std::numeric_limits::infinity(); + const float kNan = std::numeric_limits::quiet_NaN(); + + phi::DenseTensor float_input; + FillTensor(dev_ctx, &float_input, {0.0f, kInf, -kInf, kNan}); + + phi::DenseTensor output; + auto finite = RunIsKernel(phi::IsfiniteKernel, + float_input, + &output, + cpu_ctx); + auto inf = RunIsKernel( + phi::IsinfKernel, float_input, &output, cpu_ctx); + auto nan = RunIsKernel( + phi::IsnanKernel, float_input, &output, cpu_ctx); + + ASSERT_EQ(finite.size(), 4UL); + EXPECT_TRUE(finite[0]); + EXPECT_FALSE(finite[1]); + EXPECT_FALSE(finite[2]); + EXPECT_FALSE(finite[3]); + + ASSERT_EQ(inf.size(), 4UL); + EXPECT_FALSE(inf[0]); + EXPECT_TRUE(inf[1]); + EXPECT_TRUE(inf[2]); + EXPECT_FALSE(inf[3]); + + ASSERT_EQ(nan.size(), 4UL); + EXPECT_FALSE(nan[0]); + EXPECT_FALSE(nan[1]); + EXPECT_FALSE(nan[2]); + EXPECT_TRUE(nan[3]); + + phi::DenseTensor float16_input; + std::vector float16_values = {static_cast(0.0f), + static_cast(kInf), + static_cast(-kInf), + static_cast(kNan)}; + FillTensor(dev_ctx, &float16_input, float16_values); + + auto fp16_finite = RunIsKernel( + phi::IsfiniteKernel, + float16_input, + &output, + cpu_ctx); + auto fp16_inf = + RunIsKernel(phi::IsinfKernel, + float16_input, + &output, + cpu_ctx); + auto fp16_nan = + RunIsKernel(phi::IsnanKernel, + float16_input, + &output, + cpu_ctx); + + ASSERT_EQ(fp16_finite.size(), 4UL); + EXPECT_TRUE(fp16_finite[0]); + EXPECT_FALSE(fp16_finite[1]); + EXPECT_FALSE(fp16_finite[2]); + EXPECT_FALSE(fp16_finite[3]); + + ASSERT_EQ(fp16_inf.size(), 4UL); + EXPECT_FALSE(fp16_inf[0]); + EXPECT_TRUE(fp16_inf[1]); + EXPECT_TRUE(fp16_inf[2]); + EXPECT_FALSE(fp16_inf[3]); + + ASSERT_EQ(fp16_nan.size(), 4UL); + EXPECT_FALSE(fp16_nan[0]); + EXPECT_FALSE(fp16_nan[1]); + EXPECT_FALSE(fp16_nan[2]); + EXPECT_TRUE(fp16_nan[3]); + + phi::DenseTensor bfloat16_input; + std::vector bfloat16_values = { + static_cast(0.0f), + static_cast(kInf), + static_cast(-kInf), + static_cast(kNan)}; + FillTensor(dev_ctx, &bfloat16_input, bfloat16_values); + + auto bf16_finite = RunIsKernel( + phi::IsfiniteKernel, + bfloat16_input, + &output, + cpu_ctx); + auto bf16_inf = RunIsKernel( + phi::IsinfKernel, + bfloat16_input, + &output, + cpu_ctx); + auto bf16_nan = RunIsKernel( + phi::IsnanKernel, + bfloat16_input, + &output, + cpu_ctx); + + ASSERT_EQ(bf16_finite.size(), 4UL); + EXPECT_TRUE(bf16_finite[0]); + EXPECT_FALSE(bf16_finite[1]); + EXPECT_FALSE(bf16_finite[2]); + EXPECT_FALSE(bf16_finite[3]); + + ASSERT_EQ(bf16_inf.size(), 4UL); + EXPECT_FALSE(bf16_inf[0]); + EXPECT_TRUE(bf16_inf[1]); + EXPECT_TRUE(bf16_inf[2]); + EXPECT_FALSE(bf16_inf[3]); + + ASSERT_EQ(bf16_nan.size(), 4UL); + EXPECT_FALSE(bf16_nan[0]); + EXPECT_FALSE(bf16_nan[1]); + EXPECT_FALSE(bf16_nan[2]); + EXPECT_TRUE(bf16_nan[3]); +} + +TEST(IsfiniteKernels, CpuComplexTypes) { + auto* dev_ctx = GetCpuDeviceContext(); + const auto& cpu_ctx = *dev_ctx; + + const float kInf = std::numeric_limits::infinity(); + const float kNan = std::numeric_limits::quiet_NaN(); + + phi::DenseTensor complex_input; + std::vector values = {phi::complex64(1.0f, 2.0f), + phi::complex64(kInf, 0.0f), + phi::complex64(0.0f, kNan), + phi::complex64(kInf, kNan)}; + FillTensor(dev_ctx, &complex_input, values); + + phi::DenseTensor output; + auto finite = RunIsKernel( + phi::IsfiniteKernel, + complex_input, + &output, + cpu_ctx); + auto inf = RunIsKernel( + phi::IsinfKernel, + complex_input, + &output, + cpu_ctx); + auto nan = RunIsKernel( + phi::IsnanKernel, + complex_input, + &output, + cpu_ctx); + + ASSERT_EQ(finite.size(), 4UL); + EXPECT_TRUE(finite[0]); + EXPECT_FALSE(finite[1]); + EXPECT_FALSE(finite[2]); + EXPECT_FALSE(finite[3]); + + ASSERT_EQ(inf.size(), 4UL); + EXPECT_FALSE(inf[0]); + EXPECT_TRUE(inf[1]); + EXPECT_FALSE(inf[2]); + EXPECT_TRUE(inf[3]); + + ASSERT_EQ(nan.size(), 4UL); + EXPECT_FALSE(nan[0]); + EXPECT_FALSE(nan[1]); + EXPECT_TRUE(nan[2]); + EXPECT_TRUE(nan[3]); +} + +} // namespace tests +} // namespace phi diff --git a/test/legacy_test/test_exponential_op.py b/test/legacy_test/test_exponential_op.py index d798b1bee7913..9e525884da6b7 100644 --- a/test/legacy_test/test_exponential_op.py +++ b/test/legacy_test/test_exponential_op.py @@ -437,13 +437,13 @@ def test_check_output(self): def verify_output(self, outs): hist1, _ = np.histogram(outs[0], range=(0, 5)) - hist1 = hist1.astype(np.float16) hist1 = hist1 / float(outs[0].size) + hist1 = hist1.astype(np.float16) data_np = np.random.exponential(1.0 / self.lam, [1024, 1024]) hist2, _ = np.histogram(data_np, range=(0, 5)) - hist2 = hist2.astype(np.float16) hist2 = hist2 / float(data_np.size) + hist2 = hist2.astype(np.float16) np.testing.assert_allclose(hist1, hist2, rtol=0.05)