diff --git a/CMakeLists.txt b/CMakeLists.txt
index a241d90674..2338d2e434 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,6 +12,7 @@ cmake_minimum_required(VERSION 3.14)
 project(MMDeploy VERSION 1.3.1)
 
 set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 set(MMDEPLOY_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
 set(MMDEPLOY_VERSION_MINOR ${PROJECT_VERSION_MINOR})
diff --git a/csrc/mmdeploy/backend_ops/CMakeLists.txt b/csrc/mmdeploy/backend_ops/CMakeLists.txt
index 761c35a59a..05ff1a432e 100644
--- a/csrc/mmdeploy/backend_ops/CMakeLists.txt
+++ b/csrc/mmdeploy/backend_ops/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (NOT MSVC)
-    set(CMAKE_CXX_STANDARD 14)
+    set(CMAKE_CXX_STANDARD 17)
     set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 endif ()
 
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/common/ort_utils.h b/csrc/mmdeploy/backend_ops/onnxruntime/common/ort_utils.h
index e19c984f86..aef55e3e91 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/common/ort_utils.h
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/common/ort_utils.h
@@ -10,14 +10,6 @@ namespace mmdeploy {
 
 typedef std::unordered_map<std::string, std::vector<OrtCustomOp*>> CustomOpsTable;
 
-struct OrtTensorDimensions : std::vector<int64_t> {
-  OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
-    OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
-    std::vector<int64_t>::operator=(ort.GetTensorShape(info));
-    ort.ReleaseTensorTypeAndShapeInfo(info);
-  }
-};
-
 CustomOpsTable& get_mmdeploy_custom_ops();
 
 template <char const* domain, typename T>
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.cpp
index c7fed37d23..accfe34554 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.cpp
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.cpp
@@ -14,10 +14,19 @@ namespace mmdeploy {
 #define CLIP_COORDINATES(in, out, clip_limit) out = MIN((clip_limit - 1), MAX(in, 0))
 
 GridSampleKernel::GridSampleKernel(const OrtApi &api, const OrtKernelInfo *info)
-    : ort_(api), info_(info) {
-  align_corners_ = ort_.KernelInfoGetAttribute<int64_t>(info, "align_corners");
-  interpolation_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "interpolation_mode");
-  padding_mode_ = ort_.KernelInfoGetAttribute<int64_t>(info, "padding_mode");
+    : ort_(api), info_(info){
+
+#if ORT_API_VERSION >= 14
+  const auto kernel_info = Ort::ConstKernelInfo(info);
+  align_corners_ = kernel_info.GetAttribute<int64_t>("align_corners");
+  interpolation_mode_ = kernel_info.GetAttribute<int64_t>("interpolation_mode");
+  padding_mode_ = kernel_info.GetAttribute<int64_t>("padding_mode");
+#else
+  Ort::CustomOpApi custom_api{api};
+  align_corners_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "align_corners");
+  interpolation_mode_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "interpolation_mode");
+  padding_mode_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "padding_mode");
+#endif
 
   allocator_ = Ort::AllocatorWithDefaultOptions();
 }
@@ -144,14 +153,22 @@ void GridSampleKernel::Compute(OrtKernelContext *context) {
   const int64_t padding_mode = padding_mode_;
   const int64_t interpolation_mode = interpolation_mode_;
 
-  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
-  const float *input_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto input = ctx.GetInput(0);
+  const auto grid = ctx.GetInput(1);
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> input = const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> grid = const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
+#endif
 
-  const OrtValue *grid = ort_.KernelContext_GetInput(context, 1);
-  const float *grid_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(grid));
+  const auto* input_data = input.GetTensorData<float>();
+  const auto* grid_data = grid.GetTensorData<float>();
+
+  std::vector<int64_t> input_dims = input.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> grid_dims = grid.GetTensorTypeAndShapeInfo().GetShape();
 
-  OrtTensorDimensions input_dims(ort_, input);
-  OrtTensorDimensions grid_dims(ort_, grid);
   int64_t N = input_dims[0];
   int64_t C = input_dims[1];
   int64_t inp_H = input_dims[2];
@@ -160,9 +177,14 @@ void GridSampleKernel::Compute(OrtKernelContext *context) {
   int64_t out_W = grid_dims[2];
 
   std::vector<int64_t> output_dims = {N, C, out_H, out_W};
-  OrtValue *output =
-      ort_.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
-  float *out_ptr = ort_.GetTensorMutableData<float>(output);
+
+#if ORT_API_VERSION >= 14
+  auto output = ctx.GetOutput(0, output_dims.data(), output_dims.size());
+#else
+  Ort::Unowned<Ort::Value> output = api.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
+#endif
+
+  auto* out_ptr = output.GetTensorMutableData<float>();
 
   int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3];
   int64_t inp_sC = input_dims[2] * input_dims[3];
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.h b/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.h
index 2581b7833e..4af5d7623e 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.h
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/grid_sample/grid_sample.h
@@ -12,7 +12,7 @@ struct GridSampleKernel {
   void Compute(OrtKernelContext *context);
 
  protected:
-  Ort::CustomOpApi ort_;
+  const OrtApi& ort_;
   const OrtKernelInfo *info_;
   Ort::AllocatorWithDefaultOptions allocator_;
 
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp
index 075c3277bc..01c6686728 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp
@@ -20,8 +20,9 @@ void parallel_unroll_gemm(const float *A, const float *B, const float *V, const
       tmp[n] = 0;
     }
     {
-      int32_t remainder = K % 8;  // unroll
-      for (int32_t k = 0; k < K; k += 8) {
+      const int32_t num_unroll = 8;
+      const int32_t remainder = K % num_unroll;  // unroll
+      for (int32_t k = 0; k < K - num_unroll; k += num_unroll) {
         for (int32_t n = 0; n < N; n++) {
           tmp[n] += A[m * K + k] * B[k * N + n];
           tmp[n] += A[m * K + k + 1] * B[k * N + N + n];
@@ -113,19 +114,32 @@ void deformable_conv2d_ref_fp32(const float *src, const float *offset, const flo
 MMCVModulatedDeformConvKernel::MMCVModulatedDeformConvKernel(const OrtApi &api,
                                                              const OrtKernelInfo *info)
     : ort_(api), info_(info) {
-  std::vector<int64_t> stride = ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "stride");
+#if ORT_API_VERSION >= 14
+  const auto kernel_info = Ort::ConstKernelInfo(info);
+  std::vector<int64_t> stride = kernel_info.GetAttributes<int64_t>("stride");
+  std::vector<int64_t> padding = kernel_info.GetAttributes<int64_t>("padding");
+  std::vector<int64_t> dilation = kernel_info.GetAttributes<int64_t>("dilation");
+
+  deformable_group_ = kernel_info.GetAttribute<int64_t>("deform_groups");
+  group_ = kernel_info.GetAttribute<int64_t>("groups");
+#else
+  Ort::CustomOpApi custom_api{api};
+  auto stride = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "stride");
+  auto padding = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "padding");
+  auto dilation = custom_api.KernelInfoGetAttribute<std::vector<int64_t> >(info, "dilation");
+
+  deformable_group_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "deform_groups");
+  group_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "groups");
+#endif
+
   stride_height_ = stride[0];
   stride_width_ = stride[1];
-  std::vector<int64_t> padding = ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "padding");
+
   padding_height_ = padding[0];
   padding_width_ = padding[1];
-  std::vector<int64_t> dilation =
-      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "dilation");
+
   dilation_height_ = dilation[0];
   dilation_width_ = dilation[1];
-  deformable_group_ = ort_.KernelInfoGetAttribute<int64_t>(info, "deform_groups");
-  group_ = ort_.KernelInfoGetAttribute<int64_t>(info, "groups");
-
   // create allocator
   allocator_ = Ort::AllocatorWithDefaultOptions();
 }
@@ -140,26 +154,42 @@ void MMCVModulatedDeformConvKernel::Compute(OrtKernelContext *context) {
   const int64_t deformable_group = deformable_group_;
   const int64_t group = group_;
 
-  const OrtValue *input = ort_.KernelContext_GetInput(context, 0);
-  const float *input_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(input));
-
-  const OrtValue *offset = ort_.KernelContext_GetInput(context, 1);
-  const float *offset_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(offset));
-
-  const OrtValue *mask = ort_.KernelContext_GetInput(context, 2);
-  const float *mask_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(mask));
-
-  const OrtValue *filter = ort_.KernelContext_GetInput(context, 3);
-  const float *filter_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(filter));
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto input = ctx.GetInput(0);
+  const auto offset = ctx.GetInput(1);
+  const auto mask = ctx.GetInput(2);
+  const auto filter = ctx.GetInput(3);
+  const auto bias = ctx.GetInput(4);
+
+  const float *bias_data = bias ? bias.GetTensorData<float>() : nullptr;
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> input =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> offset =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 1));
+  const Ort::Unowned<Ort::Value> mask =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 2));
+  const Ort::Unowned<Ort::Value> filter =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 3));
+  const float *bias_data = [&context, &api]() -> const float * {
+    const OrtValue *bias_val = api.KernelContext_GetInput(context, 4);
+    if (bias_val) {
+      const Ort::Unowned<Ort::Value> bias{const_cast<OrtValue *>(bias_val)};
+      return bias.GetTensorData<float>();
+    }
+    return nullptr;
+  }();
+#endif
 
-  const OrtValue *bias = ort_.KernelContext_GetInput(context, 4);
-  const float *bias_data = (bias != nullptr)
-                               ? reinterpret_cast<const float *>(ort_.GetTensorData<float>(bias))
-                               : nullptr;
-  // const float *bias_data = nullptr;
+  const float *input_data = input.GetTensorData<float>();
+  const float *offset_data = offset.GetTensorData<float>();
+  const float *mask_data = mask.GetTensorData<float>();
+  const float *filter_data = filter.GetTensorData<float>();
 
-  OrtTensorDimensions input_dims(ort_, input);
-  OrtTensorDimensions filter_dims(ort_, filter);
+  std::vector<int64_t> input_dims = input.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> filter_dims = filter.GetTensorTypeAndShapeInfo().GetShape();
 
   int64_t batch = input_dims[0];
   int64_t channels = input_dims[1];
@@ -177,9 +207,15 @@ void MMCVModulatedDeformConvKernel::Compute(OrtKernelContext *context) {
       (in_width + 2 * padding_width - dilation_width * (kernel_width - 1) - 1) / stride_width + 1);
 
   std::vector<int64_t> output_dims = {batch, num_output, out_height, out_width};
-  OrtValue *output =
-      ort_.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
-  float *out_ptr = ort_.GetTensorMutableData<float>(output);
+
+#if ORT_API_VERSION >= 14
+  auto output = ctx.GetOutput(0, output_dims.data(), output_dims.size());
+#else
+  Ort::Unowned<Ort::Value> output =
+      api.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size());
+#endif
+
+  float *out_ptr = output.GetTensorMutableData<float>();
 
   // allocate tmp memory
   int64_t column_len = (channels / group) * kernel_height * kernel_width * out_height * out_width;
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.h b/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.h
index 772a9c4a88..5ca352a142 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.h
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.h
@@ -12,7 +12,7 @@ struct MMCVModulatedDeformConvKernel {
   void Compute(OrtKernelContext *context);
 
  protected:
-  Ort::CustomOpApi ort_;
+  const OrtApi& ort_;
   const OrtKernelInfo *info_;
   Ort::AllocatorWithDefaultOptions allocator_;
 
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.cpp
index 784be2c987..af1cf70230 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.cpp
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.cpp
@@ -42,17 +42,32 @@ NMSMatchKernel::NMSMatchKernel(const OrtApi& api, const OrtKernelInfo* info)
 }
 
 void NMSMatchKernel::Compute(OrtKernelContext* context) {
-  const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
-  const float* boxes_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
-  const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
-  const float* scores_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
-  const OrtValue* iou_threshold_ = ort_.KernelContext_GetInput(context, 2);
-  const float iou_threshold_data = ort_.GetTensorData<float>(iou_threshold_)[0];
-  const OrtValue* score_threshold_ = ort_.KernelContext_GetInput(context, 3);
-  const float score_threshold_data = ort_.GetTensorData<float>(score_threshold_)[0];
-
-  OrtTensorDimensions boxes_dim(ort_, boxes);
-  OrtTensorDimensions scores_dim(ort_, scores);
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto boxes = ctx.GetInput(0);
+  const auto scores = ctx.GetInput(1);
+  const auto iou_threshold = ctx.GetInput(2);
+  const auto score_threshold = ctx.GetInput(3);
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> boxes =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> scores =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
+  const Ort::Unowned<Ort::Value> iou_threshold =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 2));
+  const Ort::Unowned<Ort::Value> score_threshold =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 3));
+#endif
+
+  const float* boxes_data = boxes.GetTensorData<float>();
+  const float* scores_data = scores.GetTensorData<float>();
+  const float iou_threshold_data = iou_threshold.GetTensorData<float>()[0];
+  const float score_threshold_data = score_threshold.GetTensorData<float>()[0];
+
+  std::vector<int64_t> boxes_dim = boxes.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> scores_dim = scores.GetTensorTypeAndShapeInfo().GetShape();
+
   // loop over batch
   int64_t nbatch = boxes_dim[0];
   int64_t nboxes = boxes_dim[1];
@@ -118,8 +133,14 @@ void NMSMatchKernel::Compute(OrtKernelContext* context) {
   }
   std::vector<int64_t> inds_dims({(int64_t)res_order.size() / 4, 4});
 
-  OrtValue* res = ort_.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
-  int64_t* res_data = ort_.GetTensorMutableData<int64_t>(res);
+#if ORT_API_VERSION >= 14
+  auto res = ctx.GetOutput(0, inds_dims.data(), inds_dims.size());
+#else
+  Ort::Unowned<Ort::Value> res =
+      api.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
+#endif
+
+  int64_t* res_data = res.GetTensorMutableData<int64_t>();
 
   memcpy(res_data, res_order.data(), sizeof(int64_t) * res_order.size());
 
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.h b/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.h
index 57aa94d964..78e2821de3 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.h
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/nms_match/nms_match.h
@@ -17,7 +17,7 @@ struct NMSMatchKernel {
   void Compute(OrtKernelContext* context);
 
  private:
-  Ort::CustomOpApi ort_;
+  const OrtApi& ort_;
   const OrtKernelInfo* info_;
   Ort::AllocatorWithDefaultOptions allocator_;
 };
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.cpp
index 9d8cc4597e..00fad4afce 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.cpp
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.cpp
@@ -6,7 +6,6 @@
 #include <algorithm>
 #include <cassert>
 #include <cmath>
-#include <iostream>
 #include <iterator>
 #include <numeric>  // std::iota
 #include <vector>
@@ -263,8 +262,15 @@ float rotated_boxes_intersection(const RotatedBox& box1, const RotatedBox& box2)
 
 NMSRotatedKernel::NMSRotatedKernel(const OrtApi& api, const OrtKernelInfo* info)
     : ort_(api), info_(info) {
-  iou_threshold_ = ort_.KernelInfoGetAttribute<float>(info, "iou_threshold");
-  score_threshold_ = ort_.KernelInfoGetAttribute<float>(info, "score_threshold");
+#if ORT_API_VERSION >= 14
+  const auto kernel_info = Ort::ConstKernelInfo(info);
+  iou_threshold_ = kernel_info.GetAttribute<float>("iou_threshold");
+  score_threshold_ = kernel_info.GetAttribute<float>("score_threshold");
+#else
+  Ort::CustomOpApi custom_api{api};
+  iou_threshold_ = custom_api.KernelInfoGetAttribute<float>(info, "iou_threshold");
+  score_threshold_ = custom_api.KernelInfoGetAttribute<float>(info, "score_threshold");
+#endif
 
   // create allocator
   allocator_ = Ort::AllocatorWithDefaultOptions();
@@ -274,13 +280,23 @@ void NMSRotatedKernel::Compute(OrtKernelContext* context) {
   const float iou_threshold = iou_threshold_;
   const float score_threshold = score_threshold_;
 
-  const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
-  const float* boxes_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
-  const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
-  const float* scores_data = reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto boxes = ctx.GetInput(0);
+  const auto scores = ctx.GetInput(1);
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> boxes =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> scores =
+      const_cast<OrtValue*>(api.KernelContext_GetInput(context, 1));
+#endif
 
-  OrtTensorDimensions boxes_dim(ort_, boxes);
-  OrtTensorDimensions scores_dim(ort_, scores);
+  const float* boxes_data = boxes.GetTensorData<float>();
+  const float* scores_data = scores.GetTensorData<float>();
+
+  std::vector<int64_t> boxes_dim = boxes.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> scores_dim = scores.GetTensorTypeAndShapeInfo().GetShape();
 
   // loop over batch
   int64_t nbatch = boxes_dim[0];
@@ -354,8 +370,13 @@ void NMSRotatedKernel::Compute(OrtKernelContext* context) {
 
   std::vector<int64_t> inds_dims({(int64_t)res_order.size() / 3, 3});
 
-  OrtValue* res = ort_.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
-  int64_t* res_data = ort_.GetTensorMutableData<int64_t>(res);
+#if ORT_API_VERSION >= 14
+  auto res = ctx.GetOutput(0, inds_dims.data(), inds_dims.size());
+#else
+  Ort::Unowned<Ort::Value> res = api.KernelContext_GetOutput(context, 0, inds_dims.data(), inds_dims.size());
+#endif
+
+  int64_t* res_data = res.GetTensorMutableData<int64_t>();
 
   memcpy(res_data, res_order.data(), sizeof(int64_t) * res_order.size());
 
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.h b/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.h
index 6ed44ce410..7bbcbe584d 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.h
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/nms_rotated/nms_rotated.h
@@ -17,7 +17,7 @@ struct NMSRotatedKernel {
   void Compute(OrtKernelContext* context);
 
  private:
-  Ort::CustomOpApi ort_;
+  const OrtApi& ort_;
   const OrtKernelInfo* info_;
   Ort::AllocatorWithDefaultOptions allocator_;
   float iou_threshold_;
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/roi_align_rotated/roi_align_rotated.cpp b/csrc/mmdeploy/backend_ops/onnxruntime/roi_align_rotated/roi_align_rotated.cpp
index a8e7023fe1..d361369b02 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/roi_align_rotated/roi_align_rotated.cpp
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/roi_align_rotated/roi_align_rotated.cpp
@@ -198,15 +198,24 @@ void ROIAlignRotatedForwardCPU(const int nthreads, const float *input, const flo
 
 void MMCVRoIAlignRotatedKernel::Compute(OrtKernelContext *context) {
   // Setup inputs
-  const OrtValue *input_X = ort_.KernelContext_GetInput(context, 0);
-  const float *X_data = reinterpret_cast<const float *>(ort_.GetTensorData<float>(input_X));
-  const OrtValue *input_rois = ort_.KernelContext_GetInput(context, 1);
-  const float *rois =
-      reinterpret_cast<const float *>(ort_.GetTensorData<const float *>(input_rois));
+#if ORT_API_VERSION >= 14
+  const Ort::KernelContext ctx(context);
+  const auto input_X = ctx.GetInput(0);
+  const auto input_rois = ctx.GetInput(1);
+#else
+  Ort::CustomOpApi api{ort_};
+  const Ort::Unowned<Ort::Value> input_X =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 0));
+  const Ort::Unowned<Ort::Value> input_rois =
+      const_cast<OrtValue *>(api.KernelContext_GetInput(context, 1));
+#endif
+
+  const float *X_data = input_X.GetTensorData<float>();
+  const float *rois = input_rois.GetTensorData<float>();
 
   // Setup output
-  OrtTensorDimensions out_dimensions(ort_, input_X);
-  OrtTensorDimensions roi_dimensions(ort_, input_rois);
+  std::vector<int64_t> out_dimensions = input_X.GetTensorTypeAndShapeInfo().GetShape();
+  std::vector<int64_t> roi_dimensions = input_rois.GetTensorTypeAndShapeInfo().GetShape();
 
   int batch_size = out_dimensions.data()[0];
   int input_channels = out_dimensions.data()[1];
@@ -217,11 +226,15 @@ void MMCVRoIAlignRotatedKernel::Compute(OrtKernelContext *context) {
   out_dimensions.data()[2] = aligned_height_;
   out_dimensions.data()[3] = aligned_width_;
 
-  OrtValue *output =
-      ort_.KernelContext_GetOutput(context, 0, out_dimensions.data(), out_dimensions.size());
-  float *out = ort_.GetTensorMutableData<float>(output);
-  OrtTensorTypeAndShapeInfo *output_info = ort_.GetTensorTypeAndShape(output);
-  ort_.ReleaseTensorTypeAndShapeInfo(output_info);
+#if ORT_API_VERSION >= 14
+  auto output = ctx.GetOutput(0, out_dimensions.data(), out_dimensions.size());
+#else
+  Ort::Unowned<Ort::Value> output =
+      api.KernelContext_GetOutput(context, 0, out_dimensions.data(), out_dimensions.size());
+#endif
+
+  float *out = output.GetTensorMutableData<float>();
+  auto output_info = output.GetTensorTypeAndShapeInfo();
 
   // TODO: forward here
   int output_size = out_dimensions.data()[0];
diff --git a/csrc/mmdeploy/backend_ops/onnxruntime/roi_align_rotated/roi_align_rotated.h b/csrc/mmdeploy/backend_ops/onnxruntime/roi_align_rotated/roi_align_rotated.h
index c0129d31f8..3efd66f2ec 100644
--- a/csrc/mmdeploy/backend_ops/onnxruntime/roi_align_rotated/roi_align_rotated.h
+++ b/csrc/mmdeploy/backend_ops/onnxruntime/roi_align_rotated/roi_align_rotated.h
@@ -13,19 +13,31 @@
 namespace mmdeploy {
 struct MMCVRoIAlignRotatedKernel {
  public:
-  MMCVRoIAlignRotatedKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info) : ort_(ort) {
-    aligned_height_ = ort_.KernelInfoGetAttribute<int64_t>(info, "output_height");
-    aligned_width_ = ort_.KernelInfoGetAttribute<int64_t>(info, "output_width");
-    sampling_ratio_ = ort_.KernelInfoGetAttribute<int64_t>(info, "sampling_ratio");
-    spatial_scale_ = ort_.KernelInfoGetAttribute<float>(info, "spatial_scale");
-    aligned_ = ort_.KernelInfoGetAttribute<int64_t>(info, "aligned");
-    clockwise_ = ort_.KernelInfoGetAttribute<int64_t>(info, "clockwise");
+  MMCVRoIAlignRotatedKernel(const OrtApi& ort, const OrtKernelInfo* info) : ort_(ort) {
+#if ORT_API_VERSION >= 14
+  const auto kernel_info = Ort::ConstKernelInfo(info);
+  aligned_height_ = kernel_info.GetAttribute<int64_t>("output_height");
+  aligned_width_ = kernel_info.GetAttribute<int64_t>("output_width");
+  sampling_ratio_ = kernel_info.GetAttribute<int64_t>("sampling_ratio");
+  spatial_scale_ = kernel_info.GetAttribute<float>("spatial_scale");
+  aligned_ = kernel_info.GetAttribute<int64_t>("aligned");
+  clockwise_ = kernel_info.GetAttribute<int64_t>("clockwise");
+#else
+  Ort::CustomOpApi custom_api{ort};
+  aligned_height_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "output_height");
+  aligned_width_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "output_width");
+  sampling_ratio_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "sampling_ratio");
+  spatial_scale_ = custom_api.KernelInfoGetAttribute<float>(info, "spatial_scale");
+  aligned_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "aligned");
+  clockwise_ = custom_api.KernelInfoGetAttribute<int64_t>(info, "clockwise");
+#endif
+
   }
 
   void Compute(OrtKernelContext* context);
 
  private:
-  Ort::CustomOpApi ort_;
+  const OrtApi& ort_;
   int aligned_height_;
   int aligned_width_;
   float spatial_scale_;
@@ -36,7 +48,7 @@ struct MMCVRoIAlignRotatedKernel {
 
 struct MMCVRoIAlignRotatedCustomOp
     : Ort::CustomOpBase<MMCVRoIAlignRotatedCustomOp, MMCVRoIAlignRotatedKernel> {
-  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
+  void* CreateKernel(const OrtApi& api, const OrtKernelInfo* info) const {
     return new MMCVRoIAlignRotatedKernel(api, info);
   }
   const char* GetName() const { return "MMCVRoIAlignRotated"; }
diff --git a/mmdeploy/mmcv/ops/nms_rotated.py b/mmdeploy/mmcv/ops/nms_rotated.py
index a61e7521cc..679b748121 100644
--- a/mmdeploy/mmcv/ops/nms_rotated.py
+++ b/mmdeploy/mmcv/ops/nms_rotated.py
@@ -190,7 +190,7 @@ def select_rnms_index(scores: torch.Tensor,
     batched_labels = cls_inds.unsqueeze(0).repeat(batch_size, 1)
     batched_labels = batched_labels.where(
         (batch_inds == batch_template.unsqueeze(1)),
-        batched_labels.new_ones(1) * -1)
+        (batched_labels.new_ones(1) * -1).to(dtype=batched_labels.dtype))
 
     N = batched_dets.shape[0]
 
diff --git a/mmdeploy/utils/test.py b/mmdeploy/utils/test.py
index c9afd73b4e..86bed2766a 100644
--- a/mmdeploy/utils/test.py
+++ b/mmdeploy/utils/test.py
@@ -344,7 +344,8 @@ def forward(self, inputs: dict):
             output_names=output_names,
             opset_version=11,
             dynamic_axes=dynamic_axes,
-            keep_initializers_as_inputs=False)
+            keep_initializers_as_inputs=False,
+            autograd_inlining=False)
     return onnx_file_path
 
 
diff --git a/tests/test_ops/utils.py b/tests/test_ops/utils.py
index 0291158e16..41cac4bda4 100644
--- a/tests/test_ops/utils.py
+++ b/tests/test_ops/utils.py
@@ -50,7 +50,8 @@ def run_and_validate(self,
                 output_names=output_names,
                 do_constant_folding=do_constant_folding,
                 dynamic_axes=dynamic_axes,
-                opset_version=11)
+                opset_version=11,
+                autograd_inlining=False)
         if expected_result is None:
             with torch.no_grad():
                 model_outputs = model(*input_list)