add argsort operator (#956)

simonbyrne · web-flow · commit 8a6d39927570 · 2025-05-05T13:32:00.000-07:00
* add argsort operator
diff --git a/docs_input/api/searchsort/argsort.rst b/docs_input/api/searchsort/argsort.rst
@@ -0,0 +1,27 @@
+.. _argsort_func:
+
+argsort
+#######
+
+Compute the indices that would sort the elements of a tensor in either ascending or descending order
+
+.. doxygenfunction:: argsort(const InputOperator &a, const SortDirection_t dir)
+
+Examples
+~~~~~~~~
+
+.. literalinclude:: ../../../test/00_tensor/CUBTests.cu
+   :language: cpp
+   :start-after: example-begin argsort-test-1
+   :end-before: example-end argsort-test-1
+   :dedent:
+
+
+.. literalinclude:: ../../../test/00_tensor/CUBTests.cu
+   :language: cpp
+   :start-after: example-begin argsort-test-2
+   :end-before: example-end argsort-test-2
+   :dedent:
+
+
+
diff --git a/include/matx/core/iterator.h b/include/matx/core/iterator.h
@@ -291,7 +291,23 @@ struct RandomOperatorOutputIterator {
   __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ friend bool operator==(const self_type &a, const self_type &b)
   {
     return a.offset_ == b.offset_;
-  }    
+  }
+
+  __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ friend bool operator<(const self_type &a, const self_type &b) {
+    return a.offset_ < b.offset_;
+  }
+
+  __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ friend bool operator>(const self_type &a, const self_type &b) {
+    return a.offset_ > b.offset_;
+  }
+
+  __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ friend bool operator<=(const self_type &a, const self_type &b) {
+    return a.offset_ <= b.offset_;
+  }
+
+  __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ friend bool operator>=(const self_type &a, const self_type &b) {
+    return a.offset_ >= b.offset_;
+  }
 
   static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank() {
     return OperatorType::Rank();
diff --git a/include/matx/operators/argsort.h b/include/matx/operators/argsort.h
@@ -0,0 +1,145 @@
+////////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (c) 2025, NVIDIA Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COpBRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COpBRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+
+#include "matx/core/type_utils.h"
+#include "matx/operators/base_operator.h"
+#include "matx/transforms/cub.h"
+
+namespace matx {
+
+
+
+namespace detail {
+  template<typename OpA>
+  class ArgsortOp : public BaseOp<ArgsortOp<OpA>>
+  {
+    private:
+      typename detail::base_type_t<OpA> a_;
+      SortDirection_t dir_;
+      cuda::std::array<index_t, OpA::Rank()> out_dims_;
+      mutable detail::tensor_impl_t<index_t, OpA::Rank()> tmp_out_;
+      mutable index_t *ptr = nullptr; 
+
+    public:
+      using matxop = bool;
+      using value_type = index_t;
+      using matx_transform_op = bool;
+      using sort_xform_op = bool;
+
+      __MATX_INLINE__ std::string str() const { return "argsort()"; }
+      __MATX_INLINE__ ArgsortOp(const OpA &a, const SortDirection_t dir) : a_(a), dir_(dir) { 
+        for (int r = 0; r < Rank(); r++) {
+          out_dims_[r] = a_.Size(r);
+        }
+      }
+
+      __MATX_HOST__ __MATX_INLINE__ auto Data() const noexcept { return ptr; }
+
+      template <typename... Is>
+      __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const {
+        return tmp_out_(indices...);
+      }
+
+      template <typename Out, typename Executor>
+      void Exec(Out &&out, Executor &&ex) const {
+        argsort_impl(cuda::std::get<0>(out), a_, dir_, ex);
+      }
+
+      static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
+      {
+        return OpA::Rank();
+      }
+
+      template <typename ShapeType, typename Executor>
+      __MATX_INLINE__ void InnerPreRun([[maybe_unused]] ShapeType &&shape, Executor &&ex) const noexcept
+      {
+        if constexpr (is_matx_op<OpA>()) {
+          a_.PreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
+        }          
+      }      
+
+      template <typename ShapeType, typename Executor>
+      __MATX_INLINE__ void PreRun([[maybe_unused]] ShapeType &&shape, Executor &&ex) const noexcept
+      {
+        InnerPreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));     
+
+        detail::AllocateTempTensor(tmp_out_, std::forward<Executor>(ex), out_dims_, &ptr);
+
+        Exec(cuda::std::make_tuple(tmp_out_), std::forward<Executor>(ex));
+      }
+
+      template <typename ShapeType, typename Executor>
+      __MATX_INLINE__ void PostRun(ShapeType &&shape, Executor &&ex) const noexcept
+      {
+        if constexpr (is_matx_op<OpA>()) {
+          a_.PostRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
+        }
+
+        matxFree(ptr);
+      }      
+
+      constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t Size(int dim) const
+      {
+        return out_dims_[dim];
+      }
+
+  };
+}
+
+/**
+ * Argsort rows of an operator
+ *
+ * Generates indices that would sort the rows of an operator.
+ * Currently supported types are float, double, ints, and long ints (both signed
+ * and unsigned). For a 1D operator, a linear sort is performed. For 2D and above
+ * each row of the inner dimensions are batched and sorted separately.
+ *
+ * @note Temporary memory may be used during the sorting process, and about 4N will
+ * be allocated, where N is the length of the tensor.
+ *
+ * @tparam InputOperator
+ *   Input type
+ * @param a
+ *   Input operator
+ * @param dir
+ *   Direction to sort (either SORT_DIR_ASC or SORT_DIR_DESC)
+ * @returns Operator containing indices that would sort the tensor
+ */
+template <typename InputOperator>
+__MATX_INLINE__ auto argsort(const InputOperator &a, const SortDirection_t dir = SORT_DIR_ASC) {
+  return detail::ArgsortOp(a, dir);
+}
+
+}
diff --git a/include/matx/operators/operators.h b/include/matx/operators/operators.h
@@ -129,3 +129,4 @@
 #include "matx/operators/argminmax.h"
 #include "matx/operators/all.h"
 #include "matx/operators/any.h"
+#include "matx/operators/argsort.h"
diff --git a/include/matx/transforms/cub.h b/include/matx/transforms/cub.h
diff --git a/test/00_tensor/CUBTests.cu b/test/00_tensor/CUBTests.cu