Skip to content

Commit 0de7982

Browse files
committed
Merge branch 'develop' into n277
2 parents 4cf4736 + 778244e commit 0de7982

19 files changed

+176
-119
lines changed

paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -435,14 +435,14 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
435435
DenseTensor transformed_ddy(ddY->type());
436436
if (data_layout == DataLayout::NCHW && x_dims.size() > 2) {
437437
VLOG(3) << "Transform batchnorm output from NCHW to NHWC";
438-
// Input Tensor
438+
// Input DenseTensor
439439
ResizeToChannelLast<Context, T>(dev_ctx, X, &transformed_x);
440440
TransToChannelLast<Context, T>(dev_ctx, X, &transformed_x);
441441
ResizeToChannelLast<Context, T>(dev_ctx, dY, &transformed_dy);
442442
TransToChannelLast<Context, T>(dev_ctx, dY, &transformed_dy);
443443
ResizeToChannelLast<Context, T>(dev_ctx, ddX, &transformed_ddx);
444444
TransToChannelLast<Context, T>(dev_ctx, ddX, &transformed_ddx);
445-
// Output Tensor
445+
// Output DenseTensor
446446
ResizeToChannelLast<Context, T>(dev_ctx, dX, &transformed_dx);
447447
ResizeToChannelLast<Context, T>(dev_ctx, ddY, &transformed_ddy);
448448
} else {
@@ -458,7 +458,7 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
458458
ConstEigenVectorArrayMap<T> mean_arr(mean_data, C);
459459
ConstEigenVectorArrayMap<T> inv_var_arr(inv_var_data, C);
460460

461-
Tensor mean_tile;
461+
DenseTensor mean_tile;
462462
mean_tile.Resize({C, sample_size});
463463
EigenArrayMap<T> mean_tile_data(
464464
dev_ctx.template Alloc<T>(&mean_tile), C, sample_size);
@@ -480,7 +480,7 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
480480
ConstEigenVectorArrayMap<T> scale_arr(
481481
Scale ? Scale->data<T>() : Scale_data.data<T>(), C);
482482

483-
Tensor scale_tile;
483+
DenseTensor scale_tile;
484484
scale_tile.Resize({C, sample_size});
485485
EigenArrayMap<T> scale_tile_data(
486486
dev_ctx.template Alloc<T>(&scale_tile), C, sample_size);
@@ -505,7 +505,7 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
505505
// math: dx = (ddscale * dy) * inv_var
506506
if (ddScale) {
507507
ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C);
508-
Tensor ddscale_tile;
508+
DenseTensor ddscale_tile;
509509
ddscale_tile.Resize({C, sample_size});
510510
EigenArrayMap<T> ddscale_tile_data(
511511
dev_ctx.template Alloc<T>(&ddscale_tile), C, sample_size);
@@ -557,7 +557,7 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
557557
}
558558
if (ddScale) {
559559
ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C);
560-
Tensor ddscale_tile;
560+
DenseTensor ddscale_tile;
561561
ddscale_tile.Resize({C, sample_size});
562562
EigenArrayMap<T> ddscale_tile_data(
563563
dev_ctx.template Alloc<T>(&ddscale_tile), C, sample_size);
@@ -594,7 +594,7 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
594594
// inv_var.pow(2) * np.mean(dy * (x-mean), axis=(n,h,w)))) *
595595
// ddx
596596
if (ddX) {
597-
Tensor first_grad;
597+
DenseTensor first_grad;
598598
first_grad.Resize({C, sample_size});
599599
EigenArrayMap<T> first_grad_arr(
600600
dev_ctx.template Alloc<T>(&first_grad), C, sample_size);
@@ -645,7 +645,7 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
645645
}
646646
if (ddScale) {
647647
ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C);
648-
Tensor ddscale_tile;
648+
DenseTensor ddscale_tile;
649649
ddscale_tile.Resize({C, sample_size});
650650
EigenArrayMap<T> ddscale_tile_data(
651651
dev_ctx.template Alloc<T>(&ddscale_tile), C, sample_size);
@@ -656,7 +656,7 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
656656

657657
if (ddBias) {
658658
ConstEigenVectorArrayMap<T> ddbias_arr(ddBias->data<T>(), C);
659-
Tensor ddbias_tile;
659+
DenseTensor ddbias_tile;
660660
ddbias_tile.Resize({C, sample_size});
661661
EigenArrayMap<T> ddbias_tile_data(
662662
dev_ctx.template Alloc<T>(&ddbias_tile), C, sample_size);

paddle/phi/kernels/funcs/batch_norm_utils.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,10 @@
2020

2121
namespace phi {
2222

23-
using Tensor = DenseTensor;
24-
2523
template <typename DeviceContext, typename T>
2624
inline void ResizeToChannelFirst(const DeviceContext& dev_ctx,
27-
const Tensor* input,
28-
Tensor* transformed_input) {
25+
const DenseTensor* input,
26+
DenseTensor* transformed_input) {
2927
int dim = input->dims().size() - 2;
3028
if (dim == 3) {
3129
// input
@@ -61,8 +59,8 @@ inline void ResizeToChannelFirst(const DeviceContext& dev_ctx,
6159

6260
template <typename DeviceContext, typename T>
6361
inline void ResizeToChannelLast(const DeviceContext& dev_ctx,
64-
const Tensor* input,
65-
Tensor* transformed_input) {
62+
const DenseTensor* input,
63+
DenseTensor* transformed_input) {
6664
int dim = input->dims().size() - 2;
6765
if (dim == 3) {
6866
// input
@@ -99,8 +97,8 @@ inline void ResizeToChannelLast(const DeviceContext& dev_ctx,
9997

10098
template <typename DeviceContext, typename T>
10199
inline void TransToChannelFirst(const DeviceContext& dev_ctx,
102-
const Tensor* input,
103-
Tensor* transformed_input) {
100+
const DenseTensor* input,
101+
DenseTensor* transformed_input) {
104102
VLOG(5) << "Why am I called?";
105103
int dim = input->dims().size() - 2;
106104
if (dim == 3) {
@@ -121,8 +119,8 @@ inline void TransToChannelFirst(const DeviceContext& dev_ctx,
121119

122120
template <typename DeviceContext, typename T>
123121
inline void TransToChannelLast(const DeviceContext& dev_ctx,
124-
const Tensor* input,
125-
Tensor* transformed_input) {
122+
const DenseTensor* input,
123+
DenseTensor* transformed_input) {
126124
int dim = input->dims().size() - 2;
127125
if (dim == 3) {
128126
std::vector<int> axis{0, 2, 3, 4, 1};

paddle/phi/kernels/funcs/cross_entropy.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ limitations under the License. */
2020
namespace phi {
2121
namespace funcs {
2222

23-
using Tensor = DenseTensor;
2423
template <typename T,
2524
int MajorType = Eigen::RowMajor,
2625
typename IndexType = Eigen::DenseIndex>

paddle/phi/kernels/funcs/segment_pooling.cc

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ limitations under the License. */
2121

2222
namespace phi::funcs {
2323

24-
using Tensor = DenseTensor;
25-
2624
template <typename T, typename IndexT>
2725
class SegmentPoolFunctor<phi::CPUContext, T, IndexT> {
2826
public:
@@ -51,8 +49,8 @@ class SegmentPoolFunctor<phi::CPUContext, T, IndexT> {
5149
segment_ids[idx]));
5250
}
5351

54-
Tensor out_t = output->Slice(current_id, current_id + 1);
55-
Tensor in_t = input.Slice(last_idx, idx);
52+
DenseTensor out_t = output->Slice(current_id, current_id + 1);
53+
DenseTensor in_t = input.Slice(last_idx, idx);
5654

5755
int64_t h = idx - last_idx;
5856
auto in_e = EigenMatrix<T>::From(in_t, make_ddim({h, w}));
@@ -110,8 +108,8 @@ class SegmentPoolGradFunctor<phi::CPUContext, T, IndexT> {
110108
segment_ids[idx]));
111109
}
112110

113-
Tensor out_g_t = out_grad.Slice(current_id, current_id + 1);
114-
Tensor in_g_t = in_grad->Slice(last_idx, idx);
111+
DenseTensor out_g_t = out_grad.Slice(current_id, current_id + 1);
112+
DenseTensor in_g_t = in_grad->Slice(last_idx, idx);
115113

116114
int64_t h = idx - last_idx;
117115
auto in_g_e = EigenMatrix<T>::From(in_g_t, {h, w});
@@ -123,8 +121,8 @@ class SegmentPoolGradFunctor<phi::CPUContext, T, IndexT> {
123121
} else if (pooltype == "SUM") {
124122
in_g_e.device(place) = out_g_e.broadcast(bcast);
125123
} else if (pooltype == "MAX" || pooltype == "MIN") {
126-
Tensor out_t = output.Slice(current_id, current_id + 1);
127-
Tensor in_t = input.Slice(last_idx, idx);
124+
DenseTensor out_t = output.Slice(current_id, current_id + 1);
125+
DenseTensor in_t = input.Slice(last_idx, idx);
128126
auto in_e = EigenMatrix<T>::From(in_t, {h, w});
129127
auto out_e = EigenMatrix<T>::From(out_t, {1, w});
130128
in_g_e.device(place) =

paddle/phi/kernels/funcs/segment_pooling.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ limitations under the License. */
2525
namespace phi {
2626
namespace funcs {
2727

28-
using Tensor = DenseTensor;
29-
3028
template <typename T, typename Index, int DimTileSize>
3129
__global__ void SegmentSumIdsKernel(const Index* segment_ids,
3230
T* summed_ids,

paddle/phi/kernels/funcs/top_k_function_cuda.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ struct NumericTraits<phi::bfloat16>
8484
namespace phi {
8585
namespace funcs {
8686

87-
using Tensor = DenseTensor;
88-
8987
inline void GetDims(
9088
const DDim& dim, int axis, int64_t* pre, int64_t* n, int64_t* post) {
9189
*pre = 1;
@@ -1089,7 +1087,7 @@ bool SortTopk(const GPUContext& dev_ctx,
10891087
bool largest = true) {
10901088
auto cu_stream = dev_ctx.stream();
10911089

1092-
Tensor input_indices;
1090+
DenseTensor input_indices;
10931091
const std::vector<int64_t> dims = {num_rows, num_cols};
10941092
auto dim = make_ddim(dims);
10951093
input_indices.Resize(dim);
@@ -1130,8 +1128,8 @@ bool SortTopk(const GPUContext& dev_ctx,
11301128
T* sorted_values_ptr;
11311129
int64_t* sorted_indices_ptr;
11321130

1133-
Tensor temp_values;
1134-
Tensor temp_indices;
1131+
DenseTensor temp_values;
1132+
DenseTensor temp_indices;
11351133

11361134
const T* input = input_tensor->data<T>();
11371135
T* values = out_tensor->data<T>();
@@ -1217,7 +1215,7 @@ bool SortTopk(const GPUContext& dev_ctx,
12171215
}
12181216
#endif
12191217
}
1220-
Tensor temp_storage;
1218+
DenseTensor temp_storage;
12211219
dev_ctx.template Alloc<uint8_t>(&temp_storage, temp_storage_bytes);
12221220

12231221
if (largest) {
@@ -1299,14 +1297,14 @@ bool SortTopk(const GPUContext& dev_ctx,
12991297
const Eigen::DSizes<Eigen::DenseIndex, 2> slice_indices{0, 0};
13001298
const Eigen::DSizes<Eigen::DenseIndex, 2> slice_sizes{num_rows, k};
13011299
auto e_indices = EigenMatrix<int64_t>::From(*indices_tensor, dim);
1302-
auto e_tmp_indices =
1303-
EigenMatrix<int64_t>::From(static_cast<const Tensor>(temp_indices));
1300+
auto e_tmp_indices = EigenMatrix<int64_t>::From(
1301+
static_cast<const DenseTensor>(temp_indices));
13041302

13051303
std::vector<int> odims = {static_cast<int>(num_rows), static_cast<int>(k)};
13061304
auto dim = make_ddim(odims);
13071305
auto e_values = EigenMatrix<T>::From(*out_tensor, dim);
13081306
auto e_tmp_values =
1309-
EigenMatrix<T>::From(static_cast<const Tensor>(temp_values));
1307+
EigenMatrix<T>::From(static_cast<const DenseTensor>(temp_values));
13101308

13111309
funcs::EigenSlice<std::decay_t<decltype(dev)>, int64_t, 2>::Eval(
13121310
dev, e_indices, e_tmp_indices, slice_indices, slice_sizes);

paddle/phi/kernels/funcs/transpose_function.cu.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ limitations under the License. */
2727
namespace phi {
2828
namespace funcs {
2929

30-
using Tensor = DenseTensor;
31-
3230
struct EqualTo {
3331
constexpr bool operator()(int a, int b) const { return a == b; }
3432
};

paddle/phi/kernels/gpudnn/conv_grad_kernel.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,8 +554,8 @@ void ConvCudnnGradKernel(const Context& dev_ctx,
554554
// So we create a new padded input tensor.
555555
int data_dim = strides.size(); // 2d or 3d
556556
bool is_sys_pad = funcs::IsSymmetricPadding(paddings, data_dim);
557-
Tensor transformed_input(input.type());
558-
Tensor transformed_input_grad(input.type());
557+
DenseTensor transformed_input(input.type());
558+
DenseTensor transformed_input_grad(input.type());
559559
std::vector<int> padding_common(data_dim, 0);
560560
std::vector<int> input_pad(transformed_input_channel.dims().size() * 2, 0);
561561

paddle/phi/kernels/impl/conv_grad_kernel_impl.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ void ConvGradKernel(const Context& dev_ctx,
207207

208208
if (filter_grad) {
209209
dev_ctx.template Alloc<T>(filter_grad);
210-
Tensor filter_grad_ = *filter_grad;
210+
DenseTensor filter_grad_ = *filter_grad;
211211
filter_grad_.Resize(filter_matrix_shape);
212212
set_zero(dev_ctx, filter_grad, static_cast<T>(0));
213213
funcs::Im2ColFunctor<funcs::ColFormat::CFO, Context, T> im2col;
@@ -369,7 +369,7 @@ void ConvGradGradKernel(const Context& dev_ctx,
369369
// dx = ddw * dy ==> dx(N, Cin, H, W), ddw(Cout, Cin, kh, kw), dy(N, Cout,
370370
// oH, oW)
371371
if (dX && ddW_in) {
372-
Tensor ddW;
372+
DenseTensor ddW;
373373
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
374374
dev_ctx.template Alloc<T>(dX);
375375

@@ -436,7 +436,8 @@ void ConvGradGradKernel(const Context& dev_ctx,
436436
for (int i = 0; i < batch_size; ++i) {
437437
DenseTensor dy_batch =
438438
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
439-
Tensor ddx_batch = transformed_ddX.Slice(i, i + 1).Resize(input_shape);
439+
DenseTensor ddx_batch =
440+
transformed_ddX.Slice(i, i + 1).Resize(input_shape);
440441
for (int g = 0; g < groups; ++g) {
441442
// im2col
442443
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);

paddle/phi/kernels/impl/expand_kernel_impl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#define MAX_RANK_SUPPORTED 8
2323

2424
namespace phi {
25-
using Tensor = DenseTensor;
2625

2726
template <typename Context, typename T, int Rank>
2827
void Expand(const Context& dev_ctx,

0 commit comments

Comments
 (0)