Fix compat pinned fill semantics

SigureMo · codex · SigureMo · commit 861991be704b · 2026-04-14T15:18:32.000+08:00
Co-authored-by: Codex &lt;codex@openai.com&gt;
diff --git a/paddle/phi/api/include/compat/ATen/core/TensorBase.h b/paddle/phi/api/include/compat/ATen/core/TensorBase.h
@@ -25,7 +25,7 @@
 #include <c10/core/SymIntArrayRef.h>
 #include <c10/core/TensorOptions.h>
 #include <utils/int_array_ref_conversion.h>
-#include <utils/mapped_pinned_tensor.h>
+#include <utils/pinned_tensor_ops.h>
 #include <utils/scalar_type_conversion.h>
 #include <algorithm>
 #include <iostream>
@@ -113,10 +113,11 @@ class PADDLE_API TensorBase {
     return backend_str + scalar_type_str + "Type";
   }
 
-  // Returns the pointer kernels should use. For CUDA-pinned tensors this is
-  // the mapped device-visible alias rather than the raw host address.
   void* data_ptr() const {
-    return compat::_PD_GetKernelVisibleDataPtr(tensor_);
+    if (!tensor_.defined()) {
+      return nullptr;
+    }
+    return const_cast<void*>(tensor_.data());
   }
   template <typename T>
   T* data_ptr() const {
@@ -267,12 +268,12 @@ class PADDLE_API TensorBase {
   }
 
   const TensorBase& fill_(const at::Scalar& scalar) const {
-    paddle::experimental::fill_(const_cast<PaddleTensor&>(tensor_), scalar);
+    compat::_PD_FillTensorInplace(&const_cast<PaddleTensor&>(tensor_), scalar);
     return *this;
   }
 
   const TensorBase& zero_() const {
-    paddle::experimental::fill_(const_cast<PaddleTensor&>(tensor_), 0.0);
+    compat::_PD_FillTensorInplace(&const_cast<PaddleTensor&>(tensor_), 0.0);
     return *this;
   }
 
diff --git a/paddle/phi/api/include/compat/ATen/core/TensorBody.h b/paddle/phi/api/include/compat/ATen/core/TensorBody.h
@@ -23,7 +23,7 @@
 #include <c10/core/Stream.h>
 #include <c10/core/SymIntArrayRef.h>
 #include <c10/util/OptionalArrayRef.h>
-#include <utils/mapped_pinned_tensor.h>
+#include <utils/pinned_tensor_ops.h>
 #include "paddle/phi/api/include/api.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/common/int_array.h"
@@ -128,7 +128,10 @@ class Tensor : public TensorBase {
   }
 
   void* data_ptr() const {
-    return compat::_PD_GetKernelVisibleDataPtr(tensor_);
+    if (!tensor_.defined()) {
+      return nullptr;
+    }
+    return const_cast<void*>(tensor_.data());
   }
   template <typename T>
   T* data_ptr() const {
@@ -408,12 +411,12 @@ class Tensor : public TensorBase {
   at::Tensor unflatten_symint(int64_t dim, c10::SymIntArrayRef sizes) const;
 
   Tensor& fill_(const at::Scalar& value) const {
-    paddle::experimental::fill_(const_cast<PaddleTensor&>(tensor_), value);
+    compat::_PD_FillTensorInplace(&const_cast<PaddleTensor&>(tensor_), value);
     return const_cast<at::Tensor&>(*this);
   }
 
   Tensor& zero_() const {
-    paddle::experimental::fill_(const_cast<PaddleTensor&>(tensor_), 0.0);
+    compat::_PD_FillTensorInplace(&const_cast<PaddleTensor&>(tensor_), 0.0);
     return const_cast<at::Tensor&>(*this);
   }
 
@@ -490,7 +493,7 @@ class Tensor : public TensorBase {
 #endif
     }
 
-    return compat::_PD_CopyTensorToPinnedPlace(tensor_, pinned_place);
+    return tensor_.copy_to(pinned_place, /*blocking=*/true);
   }
 
   at::Tensor narrow_copy(int64_t dim, int64_t start, int64_t length) const;
diff --git a/paddle/phi/api/include/compat/ATen/ops/empty.h b/paddle/phi/api/include/compat/ATen/ops/empty.h
@@ -17,7 +17,6 @@
 #include <ATen/core/Tensor.h>
 #include <c10/core/TensorOptions.h>
 #include <utils/dense_sparse_conversion.h>
-#include <utils/mapped_pinned_tensor.h>
 #include <utils/pinned_place.h>
 #include <optional>
 #include <string_view>
@@ -42,10 +41,11 @@ inline at::Tensor empty(
     }
     phi::Place pinned_place =
         compat::_PD_GetCreatePinnedPlace(options._PD_GetPlace());
-    auto dense = compat::_PD_EmptyPinnedTensor(
+    auto dense = paddle::experimental::empty(
         size._PD_ToPaddleIntArray(),
         compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
-        pinned_place);
+        phi::CPUPlace());
+    dense = dense.copy_to(pinned_place, /*blocking=*/true);
     return compat::_PD_ConvertToSparseIfNeeded(dense, options.layout());
   }
   auto dense = paddle::experimental::empty(
diff --git a/paddle/phi/api/include/compat/ATen/ops/empty_like.h b/paddle/phi/api/include/compat/ATen/ops/empty_like.h
@@ -17,7 +17,6 @@
 #include <ATen/core/Tensor.h>
 #include <c10/core/TensorOptions.h>
 #include <utils/dense_sparse_conversion.h>
-#include <utils/mapped_pinned_tensor.h>
 #include <utils/pinned_place.h>
 
 #include <optional>
@@ -50,7 +49,7 @@ inline at::Tensor empty_like(
         phi::CPUPlace());
     phi::Place base_place = options._PD_GetPlace();
     phi::Place pinned_place = compat::_PD_GetCreatePinnedPlace(base_place);
-    dense = compat::_PD_CopyTensorToPinnedPlace(dense_cpu, pinned_place);
+    dense = dense_cpu.copy_to(pinned_place, /*blocking=*/true);
   } else {
     auto place = options.device_opt().value_or(self.device());
     dense = paddle::experimental::empty_like(
diff --git a/paddle/phi/api/include/compat/ATen/ops/new_empty.h b/paddle/phi/api/include/compat/ATen/ops/new_empty.h
@@ -16,7 +16,6 @@
 
 #include <ATen/core/Tensor.h>
 #include <c10/core/TensorOptions.h>
-#include <utils/mapped_pinned_tensor.h>
 #include <utils/pinned_place.h>
 #include <optional>
 #include <string_view>
@@ -44,8 +43,9 @@ inline Tensor Tensor::new_empty(at::IntArrayRef size,
           "pin_memory=true requires device to be CPU, but got non-CPU device");
     }
     phi::Place pinned_place = compat::_PD_GetCreatePinnedPlace(pd_place);
-    result = compat::_PD_EmptyPinnedTensor(
-        size._PD_ToPaddleIntArray(), pd_dtype, pinned_place);
+    result = paddle::experimental::empty(
+                 size._PD_ToPaddleIntArray(), pd_dtype, phi::CPUPlace())
+                 .copy_to(pinned_place, /*blocking=*/true);
   } else {
     result = paddle::experimental::empty(
         size._PD_ToPaddleIntArray(), pd_dtype, pd_place);
diff --git a/paddle/phi/api/include/compat/utils/mapped_pinned_tensor.h b/paddle/phi/api/include/compat/utils/mapped_pinned_tensor.h
diff --git a/paddle/phi/api/include/compat/utils/pinned_tensor_ops.h b/paddle/phi/api/include/compat/utils/pinned_tensor_ops.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <c10/core/Scalar.h>
+
+#include "paddle/common/ddim.h"
+#include "paddle/phi/api/include/api.h"
+#include "paddle/phi/api/include/tensor.h"
+#include "paddle/phi/common/int_array.h"
+#include "paddle/phi/common/place.h"
+
+namespace compat {
+
+inline bool _PD_IsHostPinnedTensor(const paddle::Tensor& tensor) {
+  const auto& place = tensor.place();
+  return phi::is_cuda_pinned_place(place) || phi::is_xpu_pinned_place(place);
+}
+
+inline void _PD_FillTensorInplace(paddle::Tensor* tensor,
+                                  const c10::Scalar& value) {
+  if (!_PD_IsHostPinnedTensor(*tensor)) {
+    paddle::experimental::fill_(*tensor, value);
+    return;
+  }
+
+  auto cpu_src = paddle::experimental::full(
+      phi::IntArray(common::vectorize<int64_t>(tensor->dims())),
+      value,
+      tensor->dtype(),
+      phi::CPUPlace());
+  tensor->copy_(cpu_src, tensor->place(), /*blocking=*/true);
+}
+
+}  // namespace compat
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
@@ -1134,6 +1134,26 @@ def _attr_offsets_check(offset_val):
     return out
 
 
+@dygraph_only
+def _is_host_pinned_tensor(x: Tensor) -> bool:
+    place = x.place
+    return (
+        hasattr(place, "is_cuda_pinned_place") and place.is_cuda_pinned_place()
+    ) or (hasattr(place, "is_xpu_pinned_place") and place.is_xpu_pinned_place())
+
+
+@dygraph_only
+def _fill_host_pinned_tensor_inplace(x: Tensor, value: float) -> Tensor:
+    cpu_src = paddle.full(
+        shape=x.shape,
+        fill_value=value,
+        dtype=x.dtype,
+        device=paddle.CPUPlace(),
+    )
+    x.copy_(cpu_src, True)
+    return x
+
+
 @dygraph_only
 def fill_(x: Tensor, value: float) -> Tensor:
     """
@@ -1165,6 +1185,8 @@ def fill_(x: Tensor, value: float) -> Tensor:
         raise TypeError(
             f"The type of 'value'  must be int or float, but received {type(value)}."
         )
+    if _is_host_pinned_tensor(x):
+        return _fill_host_pinned_tensor_inplace(x, value)
     return _C_ops.fill_(x, value)
 
 
@@ -1194,6 +1216,8 @@ def zero_(x: Tensor) -> Tensor:
             [0, 0, 0, 0, 0]
 
     """
+    if _is_host_pinned_tensor(x):
+        return _fill_host_pinned_tensor_inplace(x, 0.0)
     return _C_ops.fill_(x, 0.0)
 
 
diff --git a/test/cpp/compat/ATen_pin_memory_creation_test.cc b/test/cpp/compat/ATen_pin_memory_creation_test.cc
diff --git a/test/legacy_test/test_tensor_fill_.py b/test/legacy_test/test_tensor_fill_.py