PaddlePaddle · SigureMo · Mar 10, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 3, 2026
diff --git a/paddle/phi/api/include/compat/ATen/native/RangeUtils.h b/paddle/phi/api/include/compat/ATen/native/RangeUtils.h
@@ -0,0 +1,75 @@
+// Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <ATen/AccumulateType.h>
+#include <c10/core/Scalar.h>
+#include <limits>
+
+namespace at::native {
+
+inline void arange_check_bounds(const c10::Scalar& start,
+                                const c10::Scalar& end,
+                                const c10::Scalar& step) {
+  // use double precision for validation to avoid precision issues
+  double dstart = start.to<double>();
+  double dend = end.to<double>();
+  double dstep = step.to<double>();
+
+  TORCH_CHECK(dstep > 0 || dstep < 0, "step must be nonzero");
+  TORCH_CHECK(std::isfinite(dstart) && std::isfinite(dend),
+              "unsupported range: ",
+              dstart,
+              " -> ",
+              dend);
+  TORCH_CHECK(
+      ((dstep > 0) && (dend >= dstart)) || ((dstep < 0) && (dend <= dstart)),
+      "upper bound and lower bound inconsistent with step sign");
+}
+
+template <typename scalar_t>
+int64_t compute_arange_size(const Scalar& start,
+                            const Scalar& end,
+                            const Scalar& step) {
+  arange_check_bounds(start, end, step);
+
+  // we use double precision for (start - end) / step
+  // to compute size_d for consistency across devices.
+  // The problem with using accscalar_t is that accscalar_t might be float32 on
+  // gpu for a float32 scalar_t, but double on cpu for the same, and the
+  // effective output size starts differing on CPU vs GPU because of precision
+  // issues, which we dont want. the corner-case we do want to take into account
+  // is int64_t, which has higher precision than double
+  double size_d;
+  if constexpr (std::is_same_v<scalar_t, int64_t>) {
+    using accscalar_t = at::acc_type<scalar_t, false>;
+    auto xstart = start.to<accscalar_t>();
+    auto xend = end.to<accscalar_t>();
+    auto xstep = step.to<accscalar_t>();
+    int64_t sgn = (xstep > 0) - (xstep < 0);
+    size_d = std::ceil((xend - xstart + xstep - sgn) / xstep);
+  } else {
+    size_d =
+        std::ceil((end.to<double>() - start.to<double>()) / step.to<double>());
+  }
+
+  TORCH_CHECK(size_d >= 0 && size_d <= static_cast<double>(
+                                           std::numeric_limits<int64_t>::max()),
+              "invalid size, possible overflow?");
+
+  return static_cast<int64_t>(size_d);
+}
+
+}  // namespace at::native
diff --git a/paddle/phi/api/include/compat/ATen/ops/arange.h b/paddle/phi/api/include/compat/ATen/ops/arange.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <ATen/core/Tensor.h>
+#include <ATen/native/RangeUtils.h>
 #include <c10/core/TensorOptions.h>
 #include <optional>
 
@@ -26,7 +27,7 @@ inline at::Tensor arange(const at::Scalar& end,
                          at::TensorOptions options = {}) {
   return paddle::experimental::arange(
       paddle::experimental::full({}, 0, phi::DataType::FLOAT64),
-      paddle::experimental::full({}, end.to<int64_t>(), phi::DataType::FLOAT64),
+      paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
       paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
       compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
       options._PD_GetPlace());
@@ -39,7 +40,7 @@ inline at::Tensor arange(const at::Scalar& end,
                          ::std::optional<bool> pin_memory) {
   return paddle::experimental::arange(
       paddle::experimental::full({}, 0, phi::DataType::FLOAT64),
-      paddle::experimental::full({}, end.to<int64_t>(), phi::DataType::FLOAT64),
+      paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
       paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
       compat::_PD_AtenScalarTypeToPhiDataType(
           dtype.value_or(c10::get_default_dtype())),
@@ -51,8 +52,8 @@ inline at::Tensor arange(const at::Scalar& start,
                          at::TensorOptions options = {}) {
   return paddle::experimental::arange(
       paddle::experimental::full(
-          {}, start.to<int64_t>(), phi::DataType::FLOAT64),
-      paddle::experimental::full({}, end.to<int64_t>(), phi::DataType::FLOAT64),
+          {}, start.to<double>(), phi::DataType::FLOAT64),
+      paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
       paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
       compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
       options._PD_GetPlace());
@@ -66,8 +67,8 @@ inline at::Tensor arange(const at::Scalar& start,
                          ::std::optional<bool> pin_memory) {
   return paddle::experimental::arange(
       paddle::experimental::full(
-          {}, start.to<int64_t>(), phi::DataType::FLOAT64),
-      paddle::experimental::full({}, end.to<int64_t>(), phi::DataType::FLOAT64),
+          {}, start.to<double>(), phi::DataType::FLOAT64),
+      paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
       paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
       compat::_PD_AtenScalarTypeToPhiDataType(
           dtype.value_or(c10::get_default_dtype())),
@@ -78,12 +79,13 @@ inline at::Tensor arange(const at::Scalar& start,
                          const at::Scalar& end,
                          const at::Scalar& step,
                          at::TensorOptions options = {}) {
+  // Match PyTorch: step must be non-zero and consistent with (end - start).
+  at::native::arange_check_bounds(start, end, step);
   return paddle::experimental::arange(
       paddle::experimental::full(
-          {}, start.to<int64_t>(), phi::DataType::FLOAT64),
-      paddle::experimental::full({}, end.to<int64_t>(), phi::DataType::FLOAT64),
-      paddle::experimental::full(
-          {}, step.to<int64_t>(), phi::DataType::FLOAT64),
+          {}, start.to<double>(), phi::DataType::FLOAT64),
+      paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
+      paddle::experimental::full({}, step.to<double>(), phi::DataType::FLOAT64),
       compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
       options._PD_GetPlace());
 }
@@ -95,12 +97,13 @@ inline at::Tensor arange(const at::Scalar& start,
                          ::std::optional<at::Layout> layout,
                          ::std::optional<at::Device> device,
                          ::std::optional<bool> pin_memory) {
+  // Match PyTorch: step must be non-zero and consistent with (end - start).
+  at::native::arange_check_bounds(start, end, step);
   return paddle::experimental::arange(
       paddle::experimental::full(
-          {}, start.to<int64_t>(), phi::DataType::FLOAT64),
-      paddle::experimental::full({}, end.to<int64_t>(), phi::DataType::FLOAT64),
-      paddle::experimental::full(
-          {}, step.to<int64_t>(), phi::DataType::FLOAT64),
+          {}, start.to<double>(), phi::DataType::FLOAT64),
+      paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
+      paddle::experimental::full({}, step.to<double>(), phi::DataType::FLOAT64),
       compat::_PD_AtenScalarTypeToPhiDataType(
           dtype.value_or(c10::get_default_dtype())),
       device.value_or(at::kCPU)._PD_GetInner());

diff --git a/paddle/phi/api/include/compat/ATen/ops/narrow.h b/paddle/phi/api/include/compat/ATen/ops/narrow.h
@@ -22,6 +22,37 @@ inline at::Tensor narrow(const at::Tensor& self,
                          int64_t dim,
                          int64_t start,
                          int64_t length) {
+  // Bounds checks matching PyTorch behavior
+  PD_CHECK(self.dim() > 0, "narrow() cannot be applied to a 0-dim tensor.");
+  PD_CHECK(length >= 0, "narrow(): length must be non-negative.");
+
+  // Normalize negative dim
+  int64_t ndim = self.dim();
+  if (dim < 0) dim += ndim;
+  PD_CHECK(dim >= 0 && dim < ndim,
+           "start out of range (expected to be in range of [",
+           -ndim,
+           ", ",
+           ndim - 1,
+           "], but got ",
+           dim,
+           ")");
+
+  int64_t cur_size = self.sizes()[dim];
+
+  // Wrap negative start (matching PyTorch: only wrap when start != cur_size)
+  if (start < 0) {
+    start = start + cur_size;
+  }
+  PD_CHECK(start <= cur_size - length,
+           "start (",
+           start,
+           ") + length (",
+           length,
+           ") exceeds dimension size (",
+           cur_size,
+           ").");
+
   // Use slice to implement narrow: narrow(dim, start, length) is equivalent
   // to slice(dim, start, start + length)
   return Tensor(paddle::experimental::slice(

diff --git a/paddle/phi/api/include/compat/ATen/ops/sum.h b/paddle/phi/api/include/compat/ATen/ops/sum.h
@@ -26,24 +26,44 @@ namespace at {
 
 inline at::Tensor sum(const at::Tensor& self,
                       ::std::optional<at::ScalarType> dtype = ::std::nullopt) {
+  // Match PyTorch promotion: integer inputs -> int64; others -> keep input
+  // dtype.
+  at::ScalarType resolved_dtype;
+  if (dtype.has_value()) {
+    resolved_dtype = dtype.value();
+  } else {
+    at::ScalarType input_dtype = self.scalar_type();
+    resolved_dtype = at::isIntegralType(input_dtype, /*includeBool=*/true)
+                         ? at::kLong
+                         : input_dtype;
+  }
   return paddle::experimental::sum(
       self._PD_GetInner(),
       {},
-      compat::_PD_AtenScalarTypeToPhiDataType(
-          dtype.value_or(c10::get_default_dtype())),
+      compat::_PD_AtenScalarTypeToPhiDataType(resolved_dtype),
       /*keepdim=*/false);
 }
 
 inline at::Tensor sum(const at::Tensor& self,
                       at::OptionalIntArrayRef dim,
                       bool keepdim = false,
                       ::std::optional<at::ScalarType> dtype = ::std::nullopt) {
+  // Match PyTorch promotion: integer inputs -> int64; others -> keep input
+  // dtype.
+  at::ScalarType resolved_dtype;
+  if (dtype.has_value()) {
+    resolved_dtype = dtype.value();
+  } else {
+    at::ScalarType input_dtype = self.scalar_type();
+    resolved_dtype = at::isIntegralType(input_dtype, /*includeBool=*/true)
+                         ? at::kLong
+                         : input_dtype;
+  }
   return paddle::experimental::sum(
       self._PD_GetInner(),
       dim.has_value() ? dim.value()._PD_ToPaddleIntArray()
                       : paddle::experimental::IntArray(),
-      compat::_PD_AtenScalarTypeToPhiDataType(
-          dtype.value_or(c10::get_default_dtype())),
+      compat::_PD_AtenScalarTypeToPhiDataType(resolved_dtype),
       keepdim);
 }