Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
edbce95
[API compatibility] fix bugs when compile with VS2022
youge325 Sep 29, 2025
dd827f0
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 1, 2025
4585950
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 9, 2025
c2d150f
fix overload function
youge325 Oct 9, 2025
a8a1af9
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 9, 2025
0a27ded
fix overload problem in another way
youge325 Oct 9, 2025
9fa2a2c
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 10, 2025
d352f44
fix isfinite kernel test
youge325 Oct 10, 2025
2b08db7
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 10, 2025
8329a2f
Merge branch 'vs2022' of https://github.com/youge325/Paddle into vs2022
youge325 Oct 10, 2025
7ddaefa
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 10, 2025
5e1e891
is_same_v
youge325 Oct 10, 2025
5ee4203
add new test cases to resolve coverage CI
youge325 Oct 11, 2025
6cd24cf
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 11, 2025
774e1b6
Merge branch 'vs2022' of https://github.com/youge325/Paddle into vs2022
youge325 Oct 11, 2025
4b80ec9
fix TensorRTDynamicShapeValueEngineTest
youge325 Oct 11, 2025
dc84a25
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 11, 2025
6d432fb
Merge branch 'vs2022' of https://github.com/youge325/Paddle into vs2022
youge325 Oct 11, 2025
b6fcda1
fix bfloat16 isinf implementation
youge325 Oct 11, 2025
6df8531
fix exponential op and ci for xpu device
youge325 Oct 12, 2025
de20c00
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 12, 2025
6e8104d
Merge branch 'vs2022' of https://github.com/youge325/Paddle into vs2022
youge325 Oct 12, 2025
f845c25
revert changes to index_elementwise_get_kernel.cc and index_elementwi…
youge325 Oct 13, 2025
2faa335
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 13, 2025
4285a19
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 21, 2025
3f8ccfa
Merge branch 'PaddlePaddle:develop' into vs2022
youge325 Oct 27, 2025
72573e2
Merge branch 'develop' into vs2022
youge325 Oct 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 26 additions & 11 deletions paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,10 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) {
layer->setInput(1, *shape);
PADDLE_ENFORCE_NOT_NULL(
layer,
common::errors::InvalidArgument("TRT shuffle layer building failed."));
common::errors::InvalidArgument(
"TensorRT returned nullptr while constructing the dynamic shuffle "
"layer for input 'input'. Verify that the supplied runtime shape is "
"compatible with the network definition."));
engine_->DeclareOutput(layer, 0, "y");
engine_->FreezeNetwork();
#if IS_TRT_VERSION_GE(8600)
Expand Down Expand Up @@ -302,7 +305,10 @@ TEST_F(TensorRTDynamicEngineTest, test_spmm) {
LOG(INFO) << "create weights";
PADDLE_ENFORCE_NOT_NULL(
fc_layer,
common::errors::InvalidArgument("TRT SPMM layer building failed."));
common::errors::InvalidArgument(
"TensorRT returned a null layer when constructing the sparse "
"matrix-multiply plugin. Ensure the SpMM plugin is registered and "
"the weight/bias dimensions are valid."));

engine_->DeclareOutput(fc_layer, 0, "y");
engine_->FreezeNetwork();
Expand Down Expand Up @@ -440,9 +446,12 @@ TEST_F(TensorRTDynamicTestFusedTokenPrune, test_fused_token_prune) {
/*flag_varseqlen*/ false);
std::vector<nvinfer1::ITensor *> itensors = {attn, x, mask, new_mask};
auto *layer = engine_->AddDynamicPlugin(itensors.data(), 4, plugin);
PADDLE_ENFORCE_NOT_NULL(layer,
common::errors::InvalidArgument(
"TRT fused_token_prune layer building failed."));
PADDLE_ENFORCE_NOT_NULL(
layer,
common::errors::InvalidArgument(
"TensorRT could not add the fused_token_prune plugin layer. "
"Confirm the dynamic plugin is registered and the input tensor "
"shapes (attn/x/mask/new_mask) follow the expected layout."));
std::vector<std::string> output_tensor_names{"out_slimmed_x", "out_cls_inds"};
for (size_t i = 0; i < 2; i++) {
layer->getOutput(i)->setName(output_tensor_names[i].c_str());
Expand Down Expand Up @@ -642,9 +651,12 @@ TEST_F(TensorRTDynamicTestFusedTokenPruneHalf, test_fused_token_prune) {
/*flag_varseqlen*/ false);
std::vector<nvinfer1::ITensor *> itensors = {attn, x, mask, new_mask};
auto *layer = engine_->AddDynamicPlugin(itensors.data(), 4, plugin);
PADDLE_ENFORCE_NOT_NULL(layer,
common::errors::InvalidArgument(
"TRT fused_token_prune layer building failed."));
PADDLE_ENFORCE_NOT_NULL(
layer,
common::errors::InvalidArgument(
"TensorRT could not add the fused_token_prune plugin layer. "
"Confirm the half-precision plugin registration and input tensor "
"layouts (attn/x/mask/new_mask) are valid."));
std::vector<std::string> output_tensor_names{"out_slimmed_x", "out_cls_inds"};
for (size_t i = 0; i < 2; i++) {
layer->getOutput(i)->setName(output_tensor_names[i].c_str());
Expand Down Expand Up @@ -987,9 +999,12 @@ TEST_F(TensorRTDynamicShapeGNTest, test_trt_dynamic_shape_groupnorm) {
TRT_ENGINE_ADD_LAYER(engine_, Dequantize, *gn_tensor, *dqscale_tensor);
dq_layer->setAxis(1);

PADDLE_ENFORCE_NOT_NULL(groupnorm_layer,
common::errors::InvalidArgument(
"TRT GN plugin layer building failed."));
PADDLE_ENFORCE_NOT_NULL(
groupnorm_layer,
common::errors::InvalidArgument(
"TensorRT failed to create the group-normalization plugin layer. "
"Ensure the plugin is registered and the provided scale/bias/"
"group parameters are consistent."));

engine_->DeclareOutput(dq_layer, 0, "y");
engine_->FreezeNetwork();
Expand Down
68 changes: 68 additions & 0 deletions paddle/phi/core/framework/data_type_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,74 @@ struct CastDataTypeFunctor {
}
};

template <>
struct CastDataTypeFunctor<::phi::dtype::float8_e5m2, ::phi::complex64> {
HOSTDEVICE inline ::phi::complex64 operator()(
::phi::dtype::float8_e5m2 in) const {
return ::phi::complex64(static_cast<float>(in));
}
};

template <>
struct CastDataTypeFunctor<::phi::dtype::float8_e5m2, ::phi::complex128> {
HOSTDEVICE inline ::phi::complex128 operator()(
::phi::dtype::float8_e5m2 in) const {
return ::phi::complex128(static_cast<double>(in));
}
};

template <>
struct CastDataTypeFunctor<::phi::dtype::float8_e4m3fn, ::phi::complex64> {
HOSTDEVICE inline ::phi::complex64 operator()(
::phi::dtype::float8_e4m3fn in) const {
return ::phi::complex64(static_cast<float>(in));
}
};

template <>
struct CastDataTypeFunctor<::phi::dtype::float8_e4m3fn, ::phi::complex128> {
HOSTDEVICE inline ::phi::complex128 operator()(
::phi::dtype::float8_e4m3fn in) const {
return ::phi::complex128(static_cast<double>(in));
}
};

template <>
struct CastDataTypeFunctor<::phi::dtype::bfloat16,
::phi::dtype::complex<float>> {
HOSTDEVICE inline ::phi::dtype::complex<float> operator()(
::phi::dtype::bfloat16 in) const {
return ::phi::dtype::complex<float>(static_cast<float>(in));
}
};

template <>
struct CastDataTypeFunctor<::phi::dtype::bfloat16,
::phi::dtype::complex<double>> {
HOSTDEVICE inline ::phi::dtype::complex<double> operator()(
::phi::dtype::bfloat16 in) const {
return ::phi::dtype::complex<double>(static_cast<double>(in));
}
};

template <>
struct CastDataTypeFunctor<::phi::dtype::float16,
::phi::dtype::complex<float>> {
HOSTDEVICE inline ::phi::dtype::complex<float> operator()(
::phi::dtype::float16 in) const {
return ::phi::dtype::complex<float>(static_cast<float>(in));
}
};

template <>
struct CastDataTypeFunctor<::phi::dtype::float16,
::phi::dtype::complex<double>> {
HOSTDEVICE inline ::phi::dtype::complex<double> operator()(
::phi::dtype::float16 in) const {
return ::phi::dtype::complex<double>(static_cast<double>(in));
}
};

#if defined(PADDLE_WITH_XPU)

template <typename InType, typename OutType>
Expand Down
149 changes: 149 additions & 0 deletions test/cpp/fluid/framework/data_type_transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@ limitations under the License. */

#include "paddle/fluid/framework/data_type_transform.h"

#include <vector>

#include "gtest/gtest.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/float8_e4m3fn.h"
#include "paddle/phi/common/float8_e5m2.h"

TEST(DataTypeTransform, CPUTransform) {
auto place = phi::CPUPlace();
Expand All @@ -40,6 +47,18 @@ TEST(DataTypeTransform, CPUTransform) {
auto kernel_bool =
phi::KernelKey(place, phi::DataLayout::ALL_LAYOUT, phi::DataType::BOOL);

auto kernel_fp8_e4m3 = phi::KernelKey(
place, phi::DataLayout::ALL_LAYOUT, phi::DataType::FLOAT8_E4M3FN);

auto kernel_fp8_e5m2 = phi::KernelKey(
place, phi::DataLayout::ALL_LAYOUT, phi::DataType::FLOAT8_E5M2);

auto kernel_complex64 = phi::KernelKey(
place, phi::DataLayout::ALL_LAYOUT, phi::DataType::COMPLEX64);

auto kernel_complex128 = phi::KernelKey(
place, phi::DataLayout::ALL_LAYOUT, phi::DataType::COMPLEX128);

// data type transform from float32
{
phi::DenseTensor in;
Expand Down Expand Up @@ -395,4 +414,134 @@ TEST(DataTypeTransform, CPUTransform) {
EXPECT_EQ(ptr[i], static_cast<int32_t>(in_data_bool[i]));
}
}

// transform float8 to complex
{
phi::DenseTensor in;
phi::DenseTensor out;

auto* ptr = in.mutable_data<phi::dtype::float8_e5m2>(
common::make_ddim({2, 3}), place);
const int data_number = 2 * 3;
std::vector<float> stored_values(data_number);

for (int i = 0; i < data_number; ++i) {
ptr[i] = phi::dtype::float8_e5m2(static_cast<float>(i) - 2.5f);
stored_values[i] = static_cast<float>(ptr[i]);
}

paddle::framework::TransDataType(
kernel_fp8_e5m2, kernel_complex64, in, &out);
auto* out_data_complex64 = out.data<phi::dtype::complex<float>>();
for (int i = 0; i < data_number; ++i) {
EXPECT_FLOAT_EQ(out_data_complex64[i].real, stored_values[i]);
EXPECT_FLOAT_EQ(out_data_complex64[i].imag, 0.0f);
}

paddle::framework::TransDataType(
kernel_fp8_e5m2, kernel_complex128, in, &out);
auto* out_data_complex128 = out.data<phi::dtype::complex<double>>();
for (int i = 0; i < data_number; ++i) {
EXPECT_DOUBLE_EQ(out_data_complex128[i].real,
static_cast<double>(stored_values[i]));
EXPECT_DOUBLE_EQ(out_data_complex128[i].imag, 0.0);
}
}

// transform float8_e4m3 to complex
{
phi::DenseTensor in;
phi::DenseTensor out;

auto* ptr = in.mutable_data<phi::dtype::float8_e4m3fn>(
common::make_ddim({2, 3}), place);
const int data_number = 2 * 3;
std::vector<float> stored_values(data_number);

for (int i = 0; i < data_number; ++i) {
ptr[i] = phi::dtype::float8_e4m3fn(static_cast<float>(i) - 1.75f);
stored_values[i] = static_cast<float>(ptr[i]);
}

paddle::framework::TransDataType(
kernel_fp8_e4m3, kernel_complex64, in, &out);
auto* out_data_complex64 = out.data<phi::dtype::complex<float>>();
for (int i = 0; i < data_number; ++i) {
EXPECT_FLOAT_EQ(out_data_complex64[i].real, stored_values[i]);
EXPECT_FLOAT_EQ(out_data_complex64[i].imag, 0.0f);
}

paddle::framework::TransDataType(
kernel_fp8_e4m3, kernel_complex128, in, &out);
auto* out_data_complex128 = out.data<phi::dtype::complex<double>>();
for (int i = 0; i < data_number; ++i) {
EXPECT_DOUBLE_EQ(out_data_complex128[i].real,
static_cast<double>(stored_values[i]));
EXPECT_DOUBLE_EQ(out_data_complex128[i].imag, 0.0);
}
}

// transform float16 to complex
{
phi::DenseTensor in;
phi::DenseTensor out;

auto* ptr =
in.mutable_data<phi::dtype::float16>(common::make_ddim({2, 3}), place);
const int data_number = 2 * 3;
std::vector<float> stored_values(data_number);
std::vector<double> stored_values_double(data_number);

for (int i = 0; i < data_number; ++i) {
ptr[i] = static_cast<phi::dtype::float16>(static_cast<float>(i) - 3.0f);
stored_values[i] = static_cast<float>(ptr[i]);
stored_values_double[i] = static_cast<double>(ptr[i]);
}

paddle::framework::TransDataType(kernel_fp16, kernel_complex64, in, &out);
auto* out_data_complex64 = out.data<phi::dtype::complex<float>>();
for (int i = 0; i < data_number; ++i) {
EXPECT_FLOAT_EQ(out_data_complex64[i].real, stored_values[i]);
EXPECT_FLOAT_EQ(out_data_complex64[i].imag, 0.0f);
}

paddle::framework::TransDataType(kernel_fp16, kernel_complex128, in, &out);
auto* out_data_complex128 = out.data<phi::dtype::complex<double>>();
for (int i = 0; i < data_number; ++i) {
EXPECT_DOUBLE_EQ(out_data_complex128[i].real, stored_values_double[i]);
EXPECT_DOUBLE_EQ(out_data_complex128[i].imag, 0.0);
}
}

// transform bfloat16 to complex
{
phi::DenseTensor in;
phi::DenseTensor out;

auto* ptr =
in.mutable_data<phi::dtype::bfloat16>(common::make_ddim({2, 3}), place);
const int data_number = 2 * 3;
std::vector<float> stored_values(data_number);
std::vector<double> stored_values_double(data_number);

for (int i = 0; i < data_number; ++i) {
ptr[i] = static_cast<phi::dtype::bfloat16>(static_cast<float>(i) - 1.5f);
stored_values[i] = static_cast<float>(ptr[i]);
stored_values_double[i] = static_cast<double>(ptr[i]);
}

paddle::framework::TransDataType(kernel_bf16, kernel_complex64, in, &out);
auto* out_data_complex64 = out.data<phi::dtype::complex<float>>();
for (int i = 0; i < data_number; ++i) {
EXPECT_FLOAT_EQ(out_data_complex64[i].real, stored_values[i]);
EXPECT_FLOAT_EQ(out_data_complex64[i].imag, 0.0f);
}

paddle::framework::TransDataType(kernel_bf16, kernel_complex128, in, &out);
auto* out_data_complex128 = out.data<phi::dtype::complex<double>>();
for (int i = 0; i < data_number; ++i) {
EXPECT_DOUBLE_EQ(out_data_complex128[i].real, stored_values_double[i]);
EXPECT_DOUBLE_EQ(out_data_complex128[i].imag, 0.0);
}
}
}
5 changes: 5 additions & 0 deletions test/cpp/phi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ cc_test(
SRCS strided_memcpy_test.cc
DEPS phi common)

cc_test(
test_isfinite_kernel
SRCS test_isfinite_kernel.cc
DEPS gtest phi common)

if(WIN32)
cc_test(
sequence_padding_test
Expand Down
Loading
Loading