open-mmlab · ry3s · Apr 11, 2024 · May 3, 2024 · May 4, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -12,6 +12,7 @@ cmake_minimum_required(VERSION 3.14)
 project(MMDeploy VERSION 1.3.1)
 
 set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 set(MMDEPLOY_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
 set(MMDEPLOY_VERSION_MINOR ${PROJECT_VERSION_MINOR})

diff --git a/csrc/mmdeploy/backend_ops/CMakeLists.txt b/csrc/mmdeploy/backend_ops/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (NOT MSVC)
-    set(CMAKE_CXX_STANDARD 14)
+    set(CMAKE_CXX_STANDARD 17)
     set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 endif ()
 

diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/common/ort_utils.h b/csrc/mmdeploy/backend_ops/onnxruntime/common/ort_utils.h
@@ -10,14 +10,6 @@ namespace mmdeploy {
 
 typedef std::unordered_map<std::string, std::vector<OrtCustomOp*>> CustomOpsTable;
 
-struct OrtTensorDimensions : std::vector<int64_t> {
-  OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
-    OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
-    std::vector<int64_t>::operator=(ort.GetTensorShape(info));
-    ort.ReleaseTensorTypeAndShapeInfo(info);
-  }
-};
-
 CustomOpsTable& get_mmdeploy_custom_ops();
 
 template <char const* domain, typename T>

diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.cpp
@@ -14,10 +14,19 @@ namespace mmdeploy {
 #define CLIP_COORDINATES(in, out, clip_limit) out = MIN((clip_limit - 1), MAX(in, 0))
 
 GridSampleKernel::GridSampleKernel(const OrtApi &api, const OrtKernelInfo *info)
-    : ort_(api), info_(info) {
-  align_corners_ = ort_.KernelInfoGetAttribute<int64_t>(info, "align_corners");
-  interpolation_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "interpolation_mode");
-  padding_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "padding_mode");
+    : ort_(api), info_(info){
+
+#if ORT_API_VERSION >= 14
+  const auto kernel_info = Ort::ConstKernelInfo(info);
+  align_corners_ = kernel_info.GetAttribute<int64_t>("align_corners");
+  interpolation_mode_ = kernel_info.GetAttribute<int64_t>("interpolation_mode");
+  padding_mode_ = kernel_info.GetAttribute<int64_t>("padding_mode");
+#else
+  Ort::CustomOpApi custom_api{api};
+  align_corners_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "align_corners");
+  interpolation_mode_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "interpolation_mode");
+  padding_mode_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "padding_mode");
+#endif
 
   allocator_ = Ort::AllocatorWithDefaultOptions();
 }
@@ -144,14 +153,22 @@ void GridSampleKernel::Compute(OrtKernelContext *context) {
   const int64_t padding_mode = padding_mode_;
   const int64_t interpolation_mode = interpolation_mode_;
 
-  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
-  const float *input_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto input = ctx.GetInput(0);
+  const auto grid = ctx.GetInput(1);
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> input = const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> grid = const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
+#endif
 
-  const OrtValue *grid = ort_.KernelContext_GetInput(context, 1);
-  const float *grid_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(grid));
+  const auto* input_data = input.GetTensorData<float>();
+  const auto* grid_data = grid.GetTensorData<float>();
+
+  std::vector<int64_t> input_dims = input.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> grid_dims = grid.GetTensorTypeAndShapeInfo().GetShape();
 
-  OrtTensorDimensions input_dims(ort_, input);
-  OrtTensorDimensions grid_dims(ort_, grid);
   int64_t N = input_dims[0];
   int64_t C = input_dims[1];
   int64_t inp_H = input_dims[2];
@@ -160,9 +177,14 @@ void GridSampleKernel::Compute(OrtKernelContext *context) {
   int64_t out_W = grid_dims[2];
 
   std::vector<int64_t> output_dims = {N, C, out_H, out_W};
-  OrtValue *output =
-      ort_.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
-  float *out_ptr = ort_.GetTensorMutableData<float>(output);
+
+#if ORT_API_VERSION >= 14
+  auto output = ctx.GetOutput(0, output_dims.data(), output_dims.size());
+#else
+  Ort::Unowned<Ort::Value> output = api.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
+#endif
+
+  auto* out_ptr = output.GetTensorMutableData<float>();
 
   int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3];
   int64_t inp_sC = input_dims[2] * input_dims[3];

diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.h b/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.h
@@ -12,7 +12,7 @@ struct GridSampleKernel {
   void Compute(OrtKernelContext *context);
 
  protected:
-  Ort::CustomOpApi ort_;
+  const OrtApi& ort_;
   const OrtKernelInfo *info_;
   Ort::AllocatorWithDefaultOptions allocator_;
 

diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp
@@ -20,8 +20,9 @@ void parallel_unroll_gemm(const float *A, const float *B, const float *V, const
       tmp[n] = 0;
     }
     {
-      int32_t remainder = K % 8;  // unroll
-      for (int32_t k = 0; k < K; k += 8) {
+      const int32_t num_unroll = 8;
+      const int32_t remainder = K % num_unroll;  // unroll
+      for (int32_t k = 0; k < K - num_unroll; k += num_unroll) {
         for (int32_t n = 0; n < N; n++) {
           tmp[n] += A[m * K + k] * B[k * N + n];
           tmp[n] += A[m * K + k + 1] * B[k * N + N + n];
@@ -113,19 +114,32 @@ void deformable_conv2d_ref_fp32(const float *src, const float *offset, const flo
 MMCVModulatedDeformConvKernel::MMCVModulatedDeformConvKernel(const OrtApi &api,
                                                              const OrtKernelInfo *info)
     : ort_(api), info_(info) {
-  std::vector<int64_t> stride = ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "stride");
+#if ORT_API_VERSION >= 14
+  const auto kernel_info = Ort::ConstKernelInfo(info);
+  std::vector<int64_t> stride = kernel_info.GetAttributes<int64_t>("stride");
+  std::vector<int64_t> padding = kernel_info.GetAttributes<int64_t>("padding");
+  std::vector<int64_t> dilation = kernel_info.GetAttributes<int64_t>("dilation");
+
+  deformable_group_ = kernel_info.GetAttribute<int64_t>("deform_groups");
+  group_ = kernel_info.GetAttribute<int64_t>("groups");
+#else
+  Ort::CustomOpApi custom_api{api};
+  auto stride = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "stride");
+  auto padding = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "padding");
+  auto dilation = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "dilation");
+
+  deformable_group_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "deform_groups");
+  group_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "groups");
+#endif
+
   stride_height_ = stride[0];
   stride_width_ = stride[1];
-  std::vector<int64_t> padding = ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "padding");
+
   padding_height_ = padding[0];
   padding_width_ = padding[1];
-  std::vector<int64_t> dilation =
-      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "dilation");
+
   dilation_height_ = dilation[0];
   dilation_width_ = dilation[1];
-  deformable_group_ = ort_.KernelInfoGetAttribute<int64_t>(info, "deform_groups");
-  group_ = ort_.KernelInfoGetAttribute<int64_t>(info, "groups");
-
   // create allocator
   allocator_ = Ort::AllocatorWithDefaultOptions();
 }
@@ -140,26 +154,42 @@ void MMCVModulatedDeformConvKernel::Compute(OrtKernelContext *context) {
   const int64_t deformable_group = deformable_group_;
   const int64_t group = group_;
 
-  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
-  const float *input_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));
-
-  const OrtValue *offset = ort_.KernelContext_GetInput(context, 1);
-  const float *offset_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(offset));
-
-  const OrtValue *mask = ort_.KernelContext_GetInput(context, 2);
-  const float *mask_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(mask));
-
-  const OrtValue *filter = ort_.KernelContext_GetInput(context, 3);
-  const float *filter_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(filter));
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto input = ctx.GetInput(0);
+  const auto offset = ctx.GetInput(1);
+  const auto mask = ctx.GetInput(2);
+  const auto filter = ctx.GetInput(3);
+  const auto bias = ctx.GetInput(4);
+
+  const float *bias_data = bias ? bias.GetTensorData<float>() : nullptr;
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> input =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> offset =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 1));
+  const Ort::Unowned<Ort::Value> mask =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 2));
+  const Ort::Unowned<Ort::Value> filter =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 3));
+  const float *bias_data = [&context, &api]() -> const float * {
+    const OrtValue *bias_val = api.KernelContext_GetInput(context, 4);
+    if (bias_val) {
+      const Ort::Unowned<Ort::Value> bias{const_cast<OrtValue *>(bias_val)};
+      return bias.GetTensorData<float>();
+    }
+    return nullptr;
+  }();
+#endif
 
-  const OrtValue *bias = ort_.KernelContext_GetInput(context, 4);
-  const float *bias_data = (bias != nullptr)
-                               ? reinterpret_cast<const float *>(ort_.GetTensorData<float>(bias))
-                               : nullptr;
-  // const float *bias_data = nullptr;
+  const float *input_data = input.GetTensorData<float>();
+  const float *offset_data = offset.GetTensorData<float>();
+  const float *mask_data = mask.GetTensorData<float>();
+  const float *filter_data = filter.GetTensorData<float>();
 
-  OrtTensorDimensions input_dims(ort_, input);
-  OrtTensorDimensions filter_dims(ort_, filter);
+  std::vector<int64_t> input_dims = input.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> filter_dims = filter.GetTensorTypeAndShapeInfo().GetShape();
 
   int64_t batch = input_dims[0];
   int64_t channels = input_dims[1];
@@ -177,9 +207,15 @@ void MMCVModulatedDeformConvKernel::Compute(OrtKernelContext *context) {
       (in_width + 2 * padding_width - dilation_width * (kernel_width - 1) - 1) / stride_width + 1);
 
   std::vector<int64_t> output_dims = {batch, num_output, out_height, out_width};
-  OrtValue *output =
-      ort_.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
-  float *out_ptr = ort_.GetTensorMutableData<float>(output);
+
+#if ORT_API_VERSION >= 14
+  auto output = ctx.GetOutput(0, output_dims.data(), output_dims.size());
+#else
+  Ort::Unowned<Ort::Value> output =
+      api.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
+#endif
+
+  float *out_ptr = output.GetTensorMutableData<float>();
 
   // allocate tmp memory
   int64_t column_len = (channels / group) * kernel_height * kernel_width * out_height * out_width;

diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.h b/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.h
@@ -12,7 +12,7 @@ struct MMCVModulatedDeformConvKernel {
   void Compute(OrtKernelContext *context);
 
  protected:
-  Ort::CustomOpApi ort_;
+  const OrtApi& ort_;
   const OrtKernelInfo *info_;
   Ort::AllocatorWithDefaultOptions allocator_;
 

diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.cpp
@@ -42,17 +42,32 @@ NMSMatchKernel::NMSMatchKernel(const OrtApi& api, const OrtKernelInfo* info)
 }
 
 void NMSMatchKernel::Compute(OrtKernelContext* context) {
-  const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
-  const float* boxes_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
-  const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
-  const float* scores_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
-  const OrtValue* iou_threshold_ = ort_.KernelContext_GetInput(context, 2);
-  const float iou_threshold_data = ort_.GetTensorData<float>(iou_threshold_)[0];
-  const OrtValue* score_threshold_ = ort_.KernelContext_GetInput(context, 3);
-  const float score_threshold_data = ort_.GetTensorData<float>(score_threshold_)[0];
-
-  OrtTensorDimensions boxes_dim(ort_, boxes);
-  OrtTensorDimensions scores_dim(ort_, scores);
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto boxes = ctx.GetInput(0);
+  const auto scores = ctx.GetInput(1);
+  const auto iou_threshold = ctx.GetInput(2);
+  const auto score_threshold = ctx.GetInput(3);
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> boxes =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> scores =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
+  const Ort::Unowned<Ort::Value> iou_threshold =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 2));
+  const Ort::Unowned<Ort::Value> score_threshold =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 3));
+#endif
+
+  const float* boxes_data = boxes.GetTensorData<float>();
+  const float* scores_data = scores.GetTensorData<float>();
+  const float iou_threshold_data = iou_threshold.GetTensorData<float>()[0];
+  const float score_threshold_data = score_threshold.GetTensorData<float>()[0];
+
+  std::vector<int64_t> boxes_dim = boxes.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> scores_dim = scores.GetTensorTypeAndShapeInfo().GetShape();
+
   // loop over batch
   int64_t nbatch = boxes_dim[0];
   int64_t nboxes = boxes_dim[1];
@@ -118,8 +133,14 @@ void NMSMatchKernel::Compute(OrtKernelContext* context) {
   }
   std::vector<int64_t> inds_dims({(int64_t)res_order.size() / 4, 4});
 
-  OrtValue* res = ort_.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
-  int64_t* res_data = ort_.GetTensorMutableData<int64_t>(res);
+#if ORT_API_VERSION >= 14
+  auto res = ctx.GetOutput(0, inds_dims.data(), inds_dims.size());
+#else
+  Ort::Unowned<Ort::Value> res =
+      api.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
+#endif
+
+  int64_t* res_data = res.GetTensorMutableData<int64_t>();
 
   memcpy(res_data, res_order.data(), sizeof(int64_t) * res_order.size());
 

diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.h b/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.h
@@ -17,7 +17,7 @@ struct NMSMatchKernel {
   void Compute(OrtKernelContext* context);
 
  private:
-  Ort::CustomOpApi ort_;
+  const OrtApi& ort_;
   const OrtKernelInfo* info_;
   Ort::AllocatorWithDefaultOptions allocator_;
 };

diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.cpp
@@ -6,7 +6,6 @@
 #include <algorithm>
 #include <cassert>
 #include <cmath>
-#include <iostream>
 #include <iterator>
 #include <numeric>  // std::iota
 #include <vector>
@@ -263,8 +262,15 @@ float rotated_boxes_intersection(const RotatedBox& box1, const RotatedBox& box2)
 
 NMSRotatedKernel::NMSRotatedKernel(const OrtApi& api, const OrtKernelInfo* info)
     : ort_(api), info_(info) {
-  iou_threshold_ = ort_.KernelInfoGetAttribute<float>(info, "iou_threshold");
-  score_threshold_ = ort_.KernelInfoGetAttribute<float>(info, "score_threshold");
+#if ORT_API_VERSION >= 14
+  const auto kernel_info = Ort::ConstKernelInfo(info);
+  iou_threshold_ = kernel_info.GetAttribute<float>("iou_threshold");
+  score_threshold_ = kernel_info.GetAttribute<float>("score_threshold");
+#else
+  Ort::CustomOpApi custom_api{api};
+  iou_threshold_ = custom_api.KernelInfoGetAttribute<float>(info, "iou_threshold");
+  score_threshold_ = custom_api.KernelInfoGetAttribute<float>(info, "score_threshold");
+#endif
 
   // create allocator
   allocator_ = Ort::AllocatorWithDefaultOptions();
@@ -274,13 +280,23 @@ void NMSRotatedKernel::Compute(OrtKernelContext* context) {
   const float iou_threshold = iou_threshold_;
   const float score_threshold = score_threshold_;
 
-  const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
-  const float* boxes_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
-  const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
-  const float* scores_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto boxes = ctx.GetInput(0);
+  const auto scores = ctx.GetInput(1);
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> boxes =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> scores =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
+#endif
 
-  OrtTensorDimensions boxes_dim(ort_, boxes);
-  OrtTensorDimensions scores_dim(ort_, scores);
+  const float* boxes_data = boxes.GetTensorData<float>();
+  const float* scores_data = scores.GetTensorData<float>();
+
+  std::vector<int64_t> boxes_dim = boxes.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> scores_dim = scores.GetTensorTypeAndShapeInfo().GetShape();
 
   // loop over batch
   int64_t nbatch = boxes_dim[0];
@@ -354,8 +370,13 @@ void NMSRotatedKernel::Compute(OrtKernelContext* context) {
 
   std::vector<int64_t> inds_dims({(int64_t)res_order.size() / 3, 3});
 
-  OrtValue* res = ort_.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
-  int64_t* res_data = ort_.GetTensorMutableData<int64_t>(res);
+#if ORT_API_VERSION >= 14
+  auto res = ctx.GetOutput(0, inds_dims.data(), inds_dims.size());
+#else
+  Ort::Unowned<Ort::Value> res = api.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
+#endif
+
+  int64_t* res_data = res.GetTensorMutableData<int64_t>();
 
   memcpy(res_data, res_order.data(), sizeof(int64_t) * res_order.size());