Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support onnxruntime 1.17.1 #2755

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cmake_minimum_required(VERSION 3.14)
project(MMDeploy VERSION 1.3.1)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(MMDEPLOY_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(MMDEPLOY_VERSION_MINOR ${PROJECT_VERSION_MINOR})
Expand Down
2 changes: 1 addition & 1 deletion csrc/mmdeploy/backend_ops/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
if (NOT MSVC)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
endif ()

Expand Down
8 changes: 0 additions & 8 deletions csrc/mmdeploy/backend_ops/onnxruntime/common/ort_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@ namespace mmdeploy {

typedef std::unordered_map<std::string, std::vector<OrtCustomOp*>> CustomOpsTable;

struct OrtTensorDimensions : std::vector<int64_t> {
OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
std::vector<int64_t>::operator=(ort.GetTensorShape(info));
ort.ReleaseTensorTypeAndShapeInfo(info);
}
};

CustomOpsTable& get_mmdeploy_custom_ops();

template <char const* domain, typename T>
Expand Down
48 changes: 35 additions & 13 deletions csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,19 @@ namespace mmdeploy {
#define CLIP_COORDINATES(in, out, clip_limit) out = MIN((clip_limit - 1), MAX(in, 0))

GridSampleKernel::GridSampleKernel(const OrtApi &api, const OrtKernelInfo *info)
: ort_(api), info_(info) {
align_corners_ = ort_.KernelInfoGetAttribute<int64_t>(info, "align_corners");
interpolation_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "interpolation_mode");
padding_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "padding_mode");
: ort_(api), info_(info){

#if ORT_API_VERSION >= 14
const auto kernel_info = Ort::ConstKernelInfo(info);
align_corners_ = kernel_info.GetAttribute<int64_t>("align_corners");
interpolation_mode_ = kernel_info.GetAttribute<int64_t>("interpolation_mode");
padding_mode_ = kernel_info.GetAttribute<int64_t>("padding_mode");
#else
Ort::CustomOpApi custom_api{api};
align_corners_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "align_corners");
interpolation_mode_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "interpolation_mode");
padding_mode_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "padding_mode");
#endif

allocator_ = Ort::AllocatorWithDefaultOptions();
}
Expand Down Expand Up @@ -144,14 +153,22 @@ void GridSampleKernel::Compute(OrtKernelContext *context) {
const int64_t padding_mode = padding_mode_;
const int64_t interpolation_mode = interpolation_mode_;

const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
const float *input_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));
#if ORT_API_VERSION >= 14
const Ort::KernelContext ctx(context);
const auto input = ctx.GetInput(0);
const auto grid = ctx.GetInput(1);
#else
Ort::CustomOpApi api{ort_};
const Ort::Unowned<Ort::Value> input = const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
const Ort::Unowned<Ort::Value> grid = const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
#endif

const OrtValue *grid = ort_.KernelContext_GetInput(context, 1);
const float *grid_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(grid));
const auto* input_data = input.GetTensorData<float>();
const auto* grid_data = grid.GetTensorData<float>();

std::vector<int64_t> input_dims = input.GetTensorTypeAndShapeInfo().GetShape();
std::vector<int64_t> grid_dims = grid.GetTensorTypeAndShapeInfo().GetShape();

OrtTensorDimensions input_dims(ort_, input);
OrtTensorDimensions grid_dims(ort_, grid);
int64_t N = input_dims[0];
int64_t C = input_dims[1];
int64_t inp_H = input_dims[2];
Expand All @@ -160,9 +177,14 @@ void GridSampleKernel::Compute(OrtKernelContext *context) {
int64_t out_W = grid_dims[2];

std::vector<int64_t> output_dims = {N, C, out_H, out_W};
OrtValue *output =
ort_.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
float *out_ptr = ort_.GetTensorMutableData<float>(output);

#if ORT_API_VERSION >= 14
auto output = ctx.GetOutput(0, output_dims.data(), output_dims.size());
#else
Ort::Unowned<Ort::Value> output = api.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
#endif

auto* out_ptr = output.GetTensorMutableData<float>();

int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3];
int64_t inp_sC = input_dims[2] * input_dims[3];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ struct GridSampleKernel {
void Compute(OrtKernelContext *context);

protected:
Ort::CustomOpApi ort_;
const OrtApi& ort_;
const OrtKernelInfo *info_;
Ort::AllocatorWithDefaultOptions allocator_;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ void parallel_unroll_gemm(const float *A, const float *B, const float *V, const
tmp[n] = 0;
}
{
int32_t remainder = K % 8; // unroll
for (int32_t k = 0; k < K; k += 8) {
const int32_t num_unroll = 8;
const int32_t remainder = K % num_unroll; // unroll
for (int32_t k = 0; k < K - num_unroll; k += num_unroll) {
for (int32_t n = 0; n < N; n++) {
tmp[n] += A[m * K + k] * B[k * N + n];
tmp[n] += A[m * K + k + 1] * B[k * N + N + n];
Expand Down Expand Up @@ -113,19 +114,32 @@ void deformable_conv2d_ref_fp32(const float *src, const float *offset, const flo
MMCVModulatedDeformConvKernel::MMCVModulatedDeformConvKernel(const OrtApi &api,
const OrtKernelInfo *info)
: ort_(api), info_(info) {
std::vector<int64_t> stride = ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "stride");
#if ORT_API_VERSION >= 14
const auto kernel_info = Ort::ConstKernelInfo(info);
std::vector<int64_t> stride = kernel_info.GetAttributes<int64_t>("stride");
std::vector<int64_t> padding = kernel_info.GetAttributes<int64_t>("padding");
std::vector<int64_t> dilation = kernel_info.GetAttributes<int64_t>("dilation");

deformable_group_ = kernel_info.GetAttribute<int64_t>("deform_groups");
group_ = kernel_info.GetAttribute<int64_t>("groups");
#else
Ort::CustomOpApi custom_api{api};
auto stride = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "stride");
auto padding = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "padding");
auto dilation = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "dilation");

deformable_group_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "deform_groups");
group_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "groups");
#endif

stride_height_ = stride[0];
stride_width_ = stride[1];
std::vector<int64_t> padding = ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "padding");

padding_height_ = padding[0];
padding_width_ = padding[1];
std::vector<int64_t> dilation =
ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "dilation");

dilation_height_ = dilation[0];
dilation_width_ = dilation[1];
deformable_group_ = ort_.KernelInfoGetAttribute<int64_t>(info, "deform_groups");
group_ = ort_.KernelInfoGetAttribute<int64_t>(info, "groups");

// create allocator
allocator_ = Ort::AllocatorWithDefaultOptions();
}
Expand All @@ -140,26 +154,42 @@ void MMCVModulatedDeformConvKernel::Compute(OrtKernelContext *context) {
const int64_t deformable_group = deformable_group_;
const int64_t group = group_;

const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
const float *input_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));

const OrtValue *offset = ort_.KernelContext_GetInput(context, 1);
const float *offset_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(offset));

const OrtValue *mask = ort_.KernelContext_GetInput(context, 2);
const float *mask_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(mask));

const OrtValue *filter = ort_.KernelContext_GetInput(context, 3);
const float *filter_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(filter));
#if ORT_API_VERSION >= 14
const Ort::KernelContext ctx(context);
const auto input = ctx.GetInput(0);
const auto offset = ctx.GetInput(1);
const auto mask = ctx.GetInput(2);
const auto filter = ctx.GetInput(3);
const auto bias = ctx.GetInput(4);

const float *bias_data = bias ? bias.GetTensorData<float>() : nullptr;
#else
Ort::CustomOpApi api{ort_};
const Ort::Unowned<Ort::Value> input =
const_cast<OrtValue *>(api.KernelContext_GetInput(context, 0));
const Ort::Unowned<Ort::Value> offset =
const_cast<OrtValue *>(api.KernelContext_GetInput(context, 1));
const Ort::Unowned<Ort::Value> mask =
const_cast<OrtValue *>(api.KernelContext_GetInput(context, 2));
const Ort::Unowned<Ort::Value> filter =
const_cast<OrtValue *>(api.KernelContext_GetInput(context, 3));
const float *bias_data = [&context, &api]() -> const float * {
const OrtValue *bias_val = api.KernelContext_GetInput(context, 4);
if (bias_val) {
const Ort::Unowned<Ort::Value> bias{const_cast<OrtValue *>(bias_val)};
return bias.GetTensorData<float>();
}
return nullptr;
}();
#endif

const OrtValue *bias = ort_.KernelContext_GetInput(context, 4);
const float *bias_data = (bias != nullptr)
? reinterpret_cast<const float *>(ort_.GetTensorData<float>(bias))
: nullptr;
// const float *bias_data = nullptr;
const float *input_data = input.GetTensorData<float>();
const float *offset_data = offset.GetTensorData<float>();
const float *mask_data = mask.GetTensorData<float>();
const float *filter_data = filter.GetTensorData<float>();

OrtTensorDimensions input_dims(ort_, input);
OrtTensorDimensions filter_dims(ort_, filter);
std::vector<int64_t> input_dims = input.GetTensorTypeAndShapeInfo().GetShape();
std::vector<int64_t> filter_dims = filter.GetTensorTypeAndShapeInfo().GetShape();

int64_t batch = input_dims[0];
int64_t channels = input_dims[1];
Expand All @@ -177,9 +207,15 @@ void MMCVModulatedDeformConvKernel::Compute(OrtKernelContext *context) {
(in_width + 2 * padding_width - dilation_width * (kernel_width - 1) - 1) / stride_width + 1);

std::vector<int64_t> output_dims = {batch, num_output, out_height, out_width};
OrtValue *output =
ort_.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
float *out_ptr = ort_.GetTensorMutableData<float>(output);

#if ORT_API_VERSION >= 14
auto output = ctx.GetOutput(0, output_dims.data(), output_dims.size());
#else
Ort::Unowned<Ort::Value> output =
api.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
#endif

float *out_ptr = output.GetTensorMutableData<float>();

// allocate tmp memory
int64_t column_len = (channels / group) * kernel_height * kernel_width * out_height * out_width;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ struct MMCVModulatedDeformConvKernel {
void Compute(OrtKernelContext *context);

protected:
Ort::CustomOpApi ort_;
const OrtApi& ort_;
const OrtKernelInfo *info_;
Ort::AllocatorWithDefaultOptions allocator_;

Expand Down
47 changes: 34 additions & 13 deletions csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,32 @@ NMSMatchKernel::NMSMatchKernel(const OrtApi& api, const OrtKernelInfo* info)
}

void NMSMatchKernel::Compute(OrtKernelContext* context) {
const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
const float* boxes_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
const float* scores_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
const OrtValue* iou_threshold_ = ort_.KernelContext_GetInput(context, 2);
const float iou_threshold_data = ort_.GetTensorData<float>(iou_threshold_)[0];
const OrtValue* score_threshold_ = ort_.KernelContext_GetInput(context, 3);
const float score_threshold_data = ort_.GetTensorData<float>(score_threshold_)[0];

OrtTensorDimensions boxes_dim(ort_, boxes);
OrtTensorDimensions scores_dim(ort_, scores);
#if ORT_API_VERSION >= 14
const Ort::KernelContext ctx(context);
const auto boxes = ctx.GetInput(0);
const auto scores = ctx.GetInput(1);
const auto iou_threshold = ctx.GetInput(2);
const auto score_threshold = ctx.GetInput(3);
#else
Ort::CustomOpApi api{ort_};
const Ort::Unowned<Ort::Value> boxes =
const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
const Ort::Unowned<Ort::Value> scores =
const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
const Ort::Unowned<Ort::Value> iou_threshold =
const_cast<OrtValue*>(api.KernelContext_GetInput(context, 2));
const Ort::Unowned<Ort::Value> score_threshold =
const_cast<OrtValue*>(api.KernelContext_GetInput(context, 3));
#endif

const float* boxes_data = boxes.GetTensorData<float>();
const float* scores_data = scores.GetTensorData<float>();
const float iou_threshold_data = iou_threshold.GetTensorData<float>()[0];
const float score_threshold_data = score_threshold.GetTensorData<float>()[0];

std::vector<int64_t> boxes_dim = boxes.GetTensorTypeAndShapeInfo().GetShape();
std::vector<int64_t> scores_dim = scores.GetTensorTypeAndShapeInfo().GetShape();

// loop over batch
int64_t nbatch = boxes_dim[0];
int64_t nboxes = boxes_dim[1];
Expand Down Expand Up @@ -118,8 +133,14 @@ void NMSMatchKernel::Compute(OrtKernelContext* context) {
}
std::vector<int64_t> inds_dims({(int64_t)res_order.size() / 4, 4});

OrtValue* res = ort_.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
int64_t* res_data = ort_.GetTensorMutableData<int64_t>(res);
#if ORT_API_VERSION >= 14
auto res = ctx.GetOutput(0, inds_dims.data(), inds_dims.size());
#else
Ort::Unowned<Ort::Value> res =
api.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
#endif

int64_t* res_data = res.GetTensorMutableData<int64_t>();

memcpy(res_data, res_order.data(), sizeof(int64_t) * res_order.size());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ struct NMSMatchKernel {
void Compute(OrtKernelContext* context);

private:
Ort::CustomOpApi ort_;
const OrtApi& ort_;
const OrtKernelInfo* info_;
Ort::AllocatorWithDefaultOptions allocator_;
};
Expand Down
43 changes: 32 additions & 11 deletions csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <iterator>
#include <numeric> // std::iota
#include <vector>
Expand Down Expand Up @@ -263,8 +262,15 @@ float rotated_boxes_intersection(const RotatedBox& box1, const RotatedBox& box2)

NMSRotatedKernel::NMSRotatedKernel(const OrtApi& api, const OrtKernelInfo* info)
: ort_(api), info_(info) {
iou_threshold_ = ort_.KernelInfoGetAttribute<float>(info, "iou_threshold");
score_threshold_ = ort_.KernelInfoGetAttribute<float>(info, "score_threshold");
#if ORT_API_VERSION >= 14
const auto kernel_info = Ort::ConstKernelInfo(info);
iou_threshold_ = kernel_info.GetAttribute<float>("iou_threshold");
score_threshold_ = kernel_info.GetAttribute<float>("score_threshold");
#else
Ort::CustomOpApi custom_api{api};
iou_threshold_ = custom_api.KernelInfoGetAttribute<float>(info, "iou_threshold");
score_threshold_ = custom_api.KernelInfoGetAttribute<float>(info, "score_threshold");
#endif

// create allocator
allocator_ = Ort::AllocatorWithDefaultOptions();
Expand All @@ -274,13 +280,23 @@ void NMSRotatedKernel::Compute(OrtKernelContext* context) {
const float iou_threshold = iou_threshold_;
const float score_threshold = score_threshold_;

const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
const float* boxes_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
const float* scores_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
#if ORT_API_VERSION >= 14
const Ort::KernelContext ctx(context);
const auto boxes = ctx.GetInput(0);
const auto scores = ctx.GetInput(1);
#else
Ort::CustomOpApi api{ort_};
const Ort::Unowned<Ort::Value> boxes =
const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
const Ort::Unowned<Ort::Value> scores =
const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
#endif

OrtTensorDimensions boxes_dim(ort_, boxes);
OrtTensorDimensions scores_dim(ort_, scores);
const float* boxes_data = boxes.GetTensorData<float>();
const float* scores_data = scores.GetTensorData<float>();

std::vector<int64_t> boxes_dim = boxes.GetTensorTypeAndShapeInfo().GetShape();
std::vector<int64_t> scores_dim = scores.GetTensorTypeAndShapeInfo().GetShape();

// loop over batch
int64_t nbatch = boxes_dim[0];
Expand Down Expand Up @@ -354,8 +370,13 @@ void NMSRotatedKernel::Compute(OrtKernelContext* context) {

std::vector<int64_t> inds_dims({(int64_t)res_order.size() / 3, 3});

OrtValue* res = ort_.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
int64_t* res_data = ort_.GetTensorMutableData<int64_t>(res);
#if ORT_API_VERSION >= 14
auto res = ctx.GetOutput(0, inds_dims.data(), inds_dims.size());
#else
Ort::Unowned<Ort::Value> res = api.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
#endif

int64_t* res_data = res.GetTensorMutableData<int64_t>();

memcpy(res_data, res_order.data(), sizeof(int64_t) * res_order.size());

Expand Down
Loading