nv-legate · ipdemes · Oct 19, 2022 · Sep 9, 2022 · Sep 12, 2022 · Sep 12, 2022
diff --git a/cunumeric/array.py b/cunumeric/array.py
@@ -919,12 +919,8 @@ def _convert_key(self, key: Any, first: bool = True) -> Any:
             key = convert_to_cunumeric_ndarray(key)
             if key.dtype != bool and not np.issubdtype(key.dtype, np.integer):
                 raise TypeError("index arrays should be int or bool type")
-            if key.dtype != bool and key.dtype != np.int64:
-                runtime.warn(
-                    "converting index array to int64 type",
-                    category=RuntimeWarning,
-                )
-                key = key.astype(np.int64)
+            if key.dtype != bool:
+                key = key._warn_and_convert(np.dtype(np.int64))
 
             return key._thunk
 
@@ -2092,12 +2088,8 @@ def compress(
             raise ValueError(
                 "Dimension mismatch: condition must be a 1D array"
             )
-        if condition.dtype != bool:
-            runtime.warn(
-                "converting condition to bool type",
-                category=RuntimeWarning,
-            )
-            condition = condition.astype(bool)
+
+        condition = condition._warn_and_convert(np.dtype(bool))
 
         if axis is None:
             axis = 0
@@ -2464,6 +2456,59 @@ def diagonal(
                 raise ValueError("Either axis1/axis2 or axes must be supplied")
         return self._diag_helper(offset=offset, axes=axes, extract=extract)
 
+    @add_boilerplate("indices", "values")
+    def put(
+        self, indices: ndarray, values: ndarray, mode: str = "raise"
+    ) -> None:
+        """
+        Replaces specified elements of the array with given values.
+
+        Refer to :func:`cunumeric.put` for full documentation.
+
+        See Also
+        --------
+        cunumeric.put : equivalent function
+
+        Availability
+        --------
+        Multiple GPUs, Multiple CPUs
+
+        """
+
+        if values.size == 0 or indices.size == 0 or self.size == 0:
+            return
+
+        if mode not in ("raise", "wrap", "clip"):
+            raise ValueError(
+                "mode must be one of 'clip', 'raise', or 'wrap' "
+                f"(got  {mode})"
+            )
+
+        if mode == "wrap":
+            indices = indices % self.size
+        elif mode == "clip":
+            indices = indices.clip(0, self.size - 1)
+
+        indices = indices._warn_and_convert(np.dtype(np.int64))
+        values = values._warn_and_convert(self.dtype)
+
+        if indices.ndim > 1:
+            indices = indices.ravel()
+
+        if self.shape == ():
+            if values.shape == ():
+                v = values
+            else:
+                v = values[0]
+            self._thunk.copy(v._thunk, deep=False)
+            return
+
+        # call _wrap on the values if they need to be wrapped
+        if values.ndim != indices.ndim or values.size != indices.size:
+            values = values._wrap(indices.size)
+
+        self._thunk.put(indices._thunk, values._thunk)
+
     @add_boilerplate()
     def trace(
         self,
@@ -3810,6 +3855,16 @@ def _maybe_convert(self, dtype: np.dtype[Any], hints: Any) -> ndarray:
         copy._thunk.convert(self._thunk)
         return copy
 
+    def _warn_and_convert(self, dtype: np.dtype[Any]) -> ndarray:
+        if self.dtype != dtype:
+            runtime.warn(
+                f"converting array to {dtype} type",
+                category=RuntimeWarning,
+            )
+            return self.astype(dtype)
+        else:
+            return self
+
     # For performing normal/broadcast unary operations
     @classmethod
     def _perform_unary_op(

diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py
@@ -781,10 +781,16 @@ def _broadcast(self, shape: NdShape) -> Any:
 
         return result
 
-    def _convert_future_to_regionfield(self) -> DeferredArray:
+    def _convert_future_to_regionfield(
+        self, change_shape: bool = False
+    ) -> DeferredArray:
+        if change_shape and self.shape == ():
+            shape: NdShape = (1,)
+        else:
+            shape = self.shape
         store = self.context.create_store(
             self.dtype,
-            shape=self.shape,
+            shape=shape,
             optimize_scalar=False,
         )
         thunk_copy = DeferredArray(
@@ -1660,6 +1666,60 @@ def _diag_helper(
 
         task.execute()
 
+    @auto_convert([1, 2])
+    def put(self, indices: Any, values: Any) -> None:
+
+        if indices.base.kind == Future or indices.base.transformed:
+            change_shape = indices.base.kind == Future
+            indices = indices._convert_future_to_regionfield(change_shape)
+        if values.base.kind == Future or values.base.transformed:
+            change_shape = values.base.kind == Future
+            values = values._convert_future_to_regionfield(change_shape)
+
+        if self.base.kind == Future or self.base.transformed:
+            change_shape = self.base.kind == Future
+            self_tmp = self._convert_future_to_regionfield(change_shape)
+        else:
+            self_tmp = self
+
+        assert indices.size == values.size
+
+        # first, we create indirect array with PointN type that
+        # (indices.size,) shape and is used to copy data from values
+        # to the target ND array (self)
+        N = self_tmp.ndim
+        pointN_dtype = self.runtime.get_point_type(N)
+        indirect = cast(
+            DeferredArray,
+            self.runtime.create_empty_thunk(
+                shape=indices.shape,
+                dtype=pointN_dtype,
+                inputs=[indices],
+            ),
+        )
+
+        shape = self_tmp.shape
+        task = self.context.create_task(CuNumericOpCode.WRAP)
+        task.add_output(indirect.base)
+        task.add_scalar_arg(shape, (ty.int64,))
+        task.add_scalar_arg(True, bool)  # has_input
+        task.add_input(indices.base)
+        task.add_alignment(indices.base, indirect.base)
+        task.throws_exception(IndexError)
+        task.execute()
+        if indirect.base.kind == Future:
+            indirect = indirect._convert_future_to_regionfield()
+
+        copy = self.context.create_copy()
+        copy.set_target_indirect_out_of_range(False)
+        copy.add_input(values.base)
+        copy.add_target_indirect(indirect.base)
+        copy.add_output(self_tmp.base)
+        copy.execute()
+
+        if self_tmp is not self:
+            self.copy(self_tmp, deep=True)
+
     # Create an identity array with the ones offset from the diagonal by k
     def eye(self, k: int) -> None:
         assert self.ndim == 2  # Only 2-D arrays should be here
@@ -2877,8 +2937,13 @@ def unary_op(
         args: Any,
         multiout: Optional[Any] = None,
     ) -> None:
-        lhs = self.base
-        rhs = src._broadcast(lhs.shape)
+
+        if self.shape == () and self.size == src.size:
+            lhs = self._broadcast(src.shape)
+            rhs = src.base
+        else:
+            lhs = self.base
+            rhs = src._broadcast(lhs.shape)
 
         task = self.context.create_auto_task(CuNumericOpCode.UNARY_OP)
         task.add_output(lhs)
@@ -3334,9 +3399,11 @@ def unpackbits(
         task.execute()
 
     @auto_convert([1])
-    def _wrap(self, src: Any, new_len: int) -> None:
+    def _wrap(self, src: DeferredArray, new_len: int) -> None:
+        src = self.runtime.to_deferred_array(src)
         if src.base.kind == Future or src.base.transformed:
-            src = src._convert_future_to_regionfield()
+            change_shape = src.base.kind == Future
+            src = src._convert_future_to_regionfield(change_shape)
 
         # first, we create indirect array with PointN type that
         # (len,) shape and is used to copy data from original array
@@ -3355,6 +3422,7 @@ def _wrap(self, src: Any, new_len: int) -> None:
         task = self.context.create_task(CuNumericOpCode.WRAP)
         task.add_output(indirect.base)
         task.add_scalar_arg(src.shape, (ty.int64,))
+        task.add_scalar_arg(False, bool)  # has_input
         task.execute()
 
         copy = self.context.create_copy()

diff --git a/cunumeric/eager.py b/cunumeric/eager.py
@@ -619,6 +619,13 @@ def _diag_helper(
                 axes = tuple(range(ndims - naxes, ndims))
                 self.array = diagonal_reference(rhs.array, axes)
 
+    def put(self, indices: Any, values: Any) -> None:
+        self.check_eager_args(indices, values)
+        if self.deferred is not None:
+            self.deferred.put(indices, values)
+        else:
+            np.put(self.array, indices.array, values.array)
+
     def eye(self, k: int) -> None:
         if self.deferred is not None:
             self.deferred.eye(k)

diff --git a/cunumeric/module.py b/cunumeric/module.py
@@ -2386,12 +2386,7 @@ def repeat(a: ndarray, repeats: Any, axis: Optional[int] = None) -> ndarray:
     # repeats is an array
     else:
         # repeats should be integer type
-        if repeats.dtype != np.int64:
-            runtime.warn(
-                "converting repeats to an integer type",
-                category=RuntimeWarning,
-            )
-        repeats = repeats.astype(np.int64)
+        repeats = repeats._warn_and_convert(np.int64)
         if repeats.shape[0] != array.shape[axis]:
             raise ValueError("incorrect shape of repeats array")
         result = array._thunk.repeat(
@@ -3449,6 +3444,44 @@ def diagonal(
     )
 
 
+@add_boilerplate("a", "indices", "values")
+def put(
+    a: ndarray, indices: ndarray, values: ndarray, mode: str = "raise"
+) -> None:
+    """
+    Replaces specified elements of an array with given values.
+    The indexing works as if the target array is first flattened.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to put data into
+    indices : array_like
+        Target indices, interpreted as integers.
+        WARNING: In case there are repeated entries in the
+        indices array, Legate doesn't guarantee the order in
+        which values are updated.
+
+    values : array_like
+        Values to place in `a` at target indices. If values array is shorter
+        than indices, it will be repeated as necessary.
+    mode : {'raise', 'wrap', 'clip'}, optional
+        Specifies how out-of-bounds indices will behave.
+        'raise' : raise an error.
+        'wrap' : wrap around.
+        'clip' : clip to the range.
+
+    See Also
+    --------
+    numpy.put
+
+    Availability
+    --------
+    Multiple GPUs, Multiple CPUs
+    """
+    a.put(indices=indices, values=values, mode=mode)
+
+
 @add_boilerplate("a", "val")
 def fill_diagonal(a: ndarray, val: ndarray, wrap: bool = False) -> None:
     """

diff --git a/cunumeric/thunk.py b/cunumeric/thunk.py
@@ -196,6 +196,10 @@ def _diag_helper(
     ) -> None:
         ...
 
+    @abstractmethod
+    def put(self, indices: Any, values: Any) -> None:
+        ...
+
     @abstractmethod
     def eye(self, k: int) -> None:
         ...

diff --git a/docs/cunumeric/source/api/indexing.rst b/docs/cunumeric/source/api/indexing.rst
@@ -43,5 +43,6 @@ Inserting data into arrays
    :toctree: generated/
 
    fill_diagonal
+   put
    put_along_axis
    place
diff --git a/src/cunumeric/index/wrap.cc b/src/cunumeric/index/wrap.cc
@@ -24,28 +24,30 @@ using namespace legate;
 
 template <int DIM>
 struct WrapImplBody<VariantKind::CPU, DIM> {
+  template <typename IND>
   void operator()(const AccessorWO<Point<DIM>, 1>& out,
                   const Pitches<0>& pitches_out,
                   const Rect<1>& out_rect,
                   const Pitches<DIM - 1>& pitches_in,
                   const Rect<DIM>& in_rect,
-                  const bool dense) const
+                  const bool dense,
+                  const IND& indices) const
   {
     const int64_t start  = out_rect.lo[0];
     const int64_t end    = out_rect.hi[0];
     const auto in_volume = in_rect.volume();
     if (dense) {
-      int64_t out_idx = 0;
-      auto outptr     = out.ptr(out_rect);
+      auto outptr = out.ptr(out_rect);
       for (int64_t i = start; i <= end; i++) {
-        const int64_t input_idx = i % in_volume;
+        check_idx(i, in_volume, indices);
+        const int64_t input_idx = compute_idx(i, in_volume, indices);
         auto point              = pitches_in.unflatten(input_idx, in_rect.lo);
-        outptr[out_idx]         = point;
-        out_idx++;
+        outptr[i - start]       = point;
       }
     } else {
       for (int64_t i = start; i <= end; i++) {
-        const int64_t input_idx = i % in_volume;
+        check_idx(i, in_volume, indices);
+        const int64_t input_idx = compute_idx(i, in_volume, indices);
         auto point              = pitches_in.unflatten(input_idx, in_rect.lo);
         out[i]                  = point;
       }