Ensure so is there for lifecycle (openvinotoolkit#19510)

songbell · ilyachur · web-flow · commit b8942b6dd67d · 2023-09-18T14:13:38.000+08:00
* ensure so is there for lifecycle

Signed-off-by: fishbell &lt;bell.song@intel.com&gt;

* batch plugin + batch not triggered case

Signed-off-by: fishbell &lt;bell.song@intel.com&gt;

* clang

Signed-off-by: fishbell &lt;bell.song@intel.com&gt;

* fix settensor failure

Signed-off-by: fishbell &lt;bell.song@intel.com&gt;

* fix model info mismatch when load with 1.0 API with ppp info

Signed-off-by: fishbell &lt;bell.song@intel.com&gt;

* remove unncessary ppp code

Signed-off-by: fishbell &lt;bell.song@intel.com&gt;

* Update src/plugins/auto_batch/src/compiled_model.cpp

Co-authored-by: Ilya Churaev &lt;ilyachur@gmail.com&gt;

* enable the meta holder cases

Signed-off-by: fishbell &lt;bell.song@intel.com&gt;

---------

Signed-off-by: fishbell &lt;bell.song@intel.com&gt;
Co-authored-by: Ilya Churaev &lt;ilyachur@gmail.com&gt;
diff --git a/src/plugins/auto/src/infer_request.cpp b/src/plugins/auto/src/infer_request.cpp
@@ -56,10 +56,18 @@ ov::auto_plugin::InferRequest::InferRequest(const std::shared_ptr<const ov::auto
         }
     } else {
         for (const auto& input : get_inputs()) {
-            ov::ISyncInferRequest::set_tensor(input, m_shared_request->get_tensor(input));
+            auto tensor = m_shared_request->get_tensor(input);
+            if (!tensor._so) {
+                tensor._so = m_shared_request._so;
+            }
+            ov::ISyncInferRequest::set_tensor(input, tensor);
         }
         for (const auto& output : get_outputs()) {
-            ov::ISyncInferRequest::set_tensor(output, m_shared_request->get_tensor(output));
+            auto tensor = m_shared_request->get_tensor(output);
+            if (!tensor._so) {
+                tensor._so = m_shared_request._so;
+            }
+            ov::ISyncInferRequest::set_tensor(output, tensor);
         }
     }
 }
diff --git a/src/plugins/auto_batch/src/async_infer_request.cpp b/src/plugins/auto_batch/src/async_infer_request.cpp
@@ -15,35 +15,96 @@ AsyncInferRequest::AsyncInferRequest(const std::shared_ptr<SyncInferRequest>& re
     : ov::IAsyncInferRequest(request, nullptr, callback_executor),
       m_sync_request(request),
       m_request_without_batch(request_without_batch) {
-    // this executor starts the inference while  the task (checking the result) is passed to the next stage
-    struct ThisRequestExecutor : public ov::threading::ITaskExecutor {
-        explicit ThisRequestExecutor(AsyncInferRequest* _this_) : _this{_this_} {}
-        void run(ov::threading::Task task) override {
-            auto workerInferRequest = _this->m_sync_request->m_batched_request_wrapper;
-            std::pair<AsyncInferRequest*, ov::threading::Task> t;
-            t.first = _this;
-            t.second = std::move(task);
-            workerInferRequest->_tasks.push(t);
-            // it is ok to call size() here as the queue only grows (and the bulk removal happens under the mutex)
-            const int sz = static_cast<int>(workerInferRequest->_tasks.size());
-            if (sz == workerInferRequest->_batch_size) {
-                workerInferRequest->_cond.notify_one();
+    if (m_sync_request && m_sync_request->get_batch_size() == 0) {
+        // batch not applicable, just a wrapper to hardware infer request
+        // share the tensors with hardware infer request
+        for (const auto& input : get_inputs()) {
+            auto tensor = m_request_without_batch->get_tensor(input);
+            if (!tensor._so) {
+                tensor._so = m_request_without_batch._so;
             }
+            set_tensor(input, tensor);
+        }
+        for (const auto& output : get_outputs()) {
+            auto tensor = m_request_without_batch->get_tensor(output);
+            if (!tensor._so) {
+                tensor._so = m_request_without_batch._so;
+            }
+            set_tensor(output, tensor);
+        }
+        struct RequestExecutor : ov::threading::ITaskExecutor {
+            explicit RequestExecutor(const ov::SoPtr<ov::IAsyncInferRequest>& infer_request)
+                : m_inferrequest(infer_request) {
+                m_inferrequest->set_callback([this](std::exception_ptr exceptionPtr) mutable {
+                    m_exceptionptr = std::move(exceptionPtr);
+                    auto capturedTask = std::move(m_task);
+                    capturedTask();
+                });
+            }
+            void run(ov::threading::Task task) override {
+                m_task = std::move(task);
+                m_inferrequest->start_async();
+            };
+            const ov::SoPtr<ov::IAsyncInferRequest>& m_inferrequest;
+            std::exception_ptr m_exceptionptr;
+            ov::threading::Task m_task;
         };
-        AsyncInferRequest* _this = nullptr;
-    };
-    m_pipeline = {{/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
-                       if (this->m_sync_request->m_exception_ptr)  // if the exception happened in the batch1 fallback
-                           std::rethrow_exception(this->m_sync_request->m_exception_ptr);
-                       auto batchReq = this->m_sync_request->m_batched_request_wrapper;
-                       if (batchReq->_exception_ptr)  // when the batchN execution failed
-                           std::rethrow_exception(batchReq->_exception_ptr);
-                       // in the case of non-batched execution the tensors were set explicitly
-                       if (SyncInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
-                           this->m_sync_request->m_batched_request_status) {
-                           this->m_sync_request->copy_outputs_if_needed();
-                       }
-                   }}};
+        auto requestExecutor = std::make_shared<RequestExecutor>(m_request_without_batch);
+        m_pipeline.emplace_back(requestExecutor, [requestExecutor] {
+            if (nullptr != requestExecutor->m_exceptionptr) {
+                std::rethrow_exception(requestExecutor->m_exceptionptr);
+            }
+        });
+    } else {
+        // batch size > 1, try infer with batched request
+        // this executor starts the inference while  the task (checking the result) is passed to the next stage
+        struct ThisRequestExecutor : public ov::threading::ITaskExecutor {
+            explicit ThisRequestExecutor(AsyncInferRequest* _this_) : _this{_this_} {}
+            void run(ov::threading::Task task) override {
+                auto workerInferRequest = _this->m_sync_request->m_batched_request_wrapper;
+                std::pair<AsyncInferRequest*, ov::threading::Task> t;
+                t.first = _this;
+                t.second = std::move(task);
+                workerInferRequest->_tasks.push(t);
+                // it is ok to call size() here as the queue only grows (and the bulk removal happens under the mutex)
+                const int sz = static_cast<int>(workerInferRequest->_tasks.size());
+                if (sz == workerInferRequest->_batch_size) {
+                    workerInferRequest->_cond.notify_one();
+                }
+            };
+            AsyncInferRequest* _this = nullptr;
+        };
+        m_pipeline = {
+            {/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
+                 if (this->m_sync_request->m_exception_ptr)  // if the exception happened in the batch1 fallback
+                     std::rethrow_exception(this->m_sync_request->m_exception_ptr);
+                 auto batchReq = this->m_sync_request->m_batched_request_wrapper;
+                 if (batchReq->_exception_ptr)  // when the batchN execution failed
+                     std::rethrow_exception(batchReq->_exception_ptr);
+                 // in the case of non-batched execution the tensors were set explicitly
+                 if (SyncInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
+                     this->m_sync_request->m_batched_request_status) {
+                     this->m_sync_request->copy_outputs_if_needed();
+                 }
+             }}};
+    }
+}
+
+void AsyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) {
+    check_state();
+    if (m_sync_request && m_sync_request->get_batch_size() == 0) {
+        m_request_without_batch->set_tensor(port, tensor);
+    }
+    ov::IAsyncInferRequest::set_tensor(port, tensor);
+}
+
+void AsyncInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
+                                    const std::vector<ov::SoPtr<ov::ITensor>>& tensors) {
+    check_state();
+    if (m_sync_request && m_sync_request->get_batch_size() == 0) {
+        m_request_without_batch->set_tensors(port, tensors);
+    }
+    ov::IAsyncInferRequest::set_tensors(port, tensors);
 }
 
 std::vector<ov::ProfilingInfo> AsyncInferRequest::get_profiling_info() const {
diff --git a/src/plugins/auto_batch/src/async_infer_request.hpp b/src/plugins/auto_batch/src/async_infer_request.hpp
@@ -26,6 +26,11 @@ class AsyncInferRequest : public ov::IAsyncInferRequest {
 
     std::shared_ptr<ov::autobatch_plugin::SyncInferRequest> m_sync_request;
 
+    void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
+
+    void set_tensors(const ov::Output<const ov::Node>& port,
+                     const std::vector<ov::SoPtr<ov::ITensor>>& tensors) override;
+
     ov::SoPtr<ov::IAsyncInferRequest> m_request_without_batch;
 };
 }  // namespace autobatch_plugin
diff --git a/src/plugins/auto_batch/src/compiled_model.cpp b/src/plugins/auto_batch/src/compiled_model.cpp
@@ -134,26 +134,19 @@ CompiledModel::GetWorkerInferRequest() const {
 }
 
 std::shared_ptr<ov::IAsyncInferRequest> CompiledModel::create_infer_request() const {
-    if (!m_compiled_model_with_batch) {
-        auto res = m_compiled_model_without_batch->create_infer_request();
-        for (auto& iter : res->get_inputs()) {
-            auto&& tensor = res->get_tensor(iter);
-            if (!tensor._so)
-                tensor._so = m_compiled_model_without_batch._so;
-        }
-        for (auto& iter : res->get_outputs()) {
-            auto&& tensor = res->get_tensor(iter);
-            if (!tensor._so)
-                tensor._so = m_compiled_model_without_batch._so;
-        }
-        return res;
-    }
-
-    auto sync_res = create_sync_infer_request();
-
     ov::SoPtr<ov::IAsyncInferRequest> infer_request_without_batch = {
         m_compiled_model_without_batch->create_infer_request(),
         m_compiled_model_without_batch._so};
+    // simpler wrapper if m_compiled_model_with_batch is empty
+    std::shared_ptr<ov::ISyncInferRequest> sync_res;
+    if (m_compiled_model_with_batch)
+        sync_res = create_sync_infer_request();
+    else
+        sync_res = std::make_shared<ov::autobatch_plugin::SyncInferRequest>(
+            std::dynamic_pointer_cast<const ov::autobatch_plugin::CompiledModel>(shared_from_this()),
+            nullptr,
+            0,
+            0);
     return std::make_shared<ov::autobatch_plugin::AsyncInferRequest>(
         std::dynamic_pointer_cast<ov::autobatch_plugin::SyncInferRequest>(sync_res),
         infer_request_without_batch,
@@ -249,6 +242,14 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
+const std::vector<ov::Output<const ov::Node>>& CompiledModel::outputs() const {
+    return m_compiled_model_without_batch->outputs();
+}
+
+const std::vector<ov::Output<const ov::Node>>& CompiledModel::inputs() const {
+    return m_compiled_model_without_batch->inputs();
+}
+
 void CompiledModel::export_model(std::ostream& model) const {
     OPENVINO_NOT_IMPLEMENTED;
 }
diff --git a/src/plugins/auto_batch/src/compiled_model.hpp b/src/plugins/auto_batch/src/compiled_model.hpp
@@ -54,6 +54,10 @@ class CompiledModel : public ov::ICompiledModel {
 
     virtual ~CompiledModel();
 
+    const std::vector<ov::Output<const ov::Node>>& outputs() const override;
+
+    const std::vector<ov::Output<const ov::Node>>& inputs() const override;
+
 protected:
     std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
     static unsigned int ParseTimeoutValue(const std::string&);
diff --git a/src/plugins/auto_batch/src/sync_infer_request.cpp b/src/plugins/auto_batch/src/sync_infer_request.cpp
@@ -41,7 +41,12 @@ SyncInferRequest::SyncInferRequest(
       m_batched_request_wrapper(worker_request),
       m_batch_id(batch_id),
       m_batch_size(num_batch) {
-    share_tensors_with_batched_req(batched_inputs, batched_outputs);
+    if (m_batched_request_wrapper)
+        share_tensors_with_batched_req(batched_inputs, batched_outputs);
+}
+
+size_t SyncInferRequest::get_batch_size() const {
+    return m_batch_size;
 }
 
 void SyncInferRequest::share_tensors_with_batched_req(const std::set<std::string>& batched_inputs,
diff --git a/src/plugins/auto_batch/src/sync_infer_request.hpp b/src/plugins/auto_batch/src/sync_infer_request.hpp
@@ -17,8 +17,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
                      const std::shared_ptr<ov::autobatch_plugin::CompiledModel::WorkerInferRequest>& worker_request,
                      int batch_id,
                      int num_batch,
-                     const std::set<std::string>& batched_inputs,
-                     const std::set<std::string>& batched_outputs);
+                     const std::set<std::string>& batched_inputs = {},
+                     const std::set<std::string>& batched_outputs = {});
 
     // Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
     void set_tensors_to_another_request(ov::SoPtr<ov::IAsyncInferRequest>& req);
@@ -43,6 +43,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
         TIMEOUT_EXECUTED
     } m_batched_request_status = eExecutionFlavor::NOT_EXECUTED;
 
+    size_t get_batch_size() const;
+
 protected:
     void copy_tensor_if_needed(const ov::SoPtr<ov::ITensor>& src, ov::SoPtr<ov::ITensor>& dst, const bool bInput);
 
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -127,10 +127,5 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*smoke_LPT.*ElementwiseBranchSelectionTransformation.*)",
             // Dynamic state unsupported for now
             R"(.*MemoryDynamicBatch.*)",
-            // Meta plugins may miss saving HW plugin so handle, thus plugin may be unloaded before all objects are deleted which leads to segfault
-            // Issue: 118840
-            R"(.*OVHoldersTest.*AUTO.*)",
-            R"(.*OVHoldersTest.*MULTI.*)",
-            R"(.*OVHoldersTest.*BATCH.*)",
     };
 }

Original file line number	Diff line number	Diff line change
`@@ -56,10 +56,18 @@ ov::auto_plugin::InferRequest::InferRequest(const std::shared_ptr<const ov::auto`
`56`	`56`	`}`
`57`	`57`	`} else {`
`58`	`58`	`for (const auto& input : get_inputs()) {`
`59`		`- ov::ISyncInferRequest::set_tensor(input, m_shared_request->get_tensor(input));`
	`59`	`+ auto tensor = m_shared_request->get_tensor(input);`
	`60`	`+ if (!tensor._so) {`
	`61`	`+ tensor._so = m_shared_request._so;`
	`62`	`+ }`
	`63`	`+ ov::ISyncInferRequest::set_tensor(input, tensor);`
`60`	`64`	`}`
`61`	`65`	`for (const auto& output : get_outputs()) {`
`62`		`- ov::ISyncInferRequest::set_tensor(output, m_shared_request->get_tensor(output));`
	`66`	`+ auto tensor = m_shared_request->get_tensor(output);`
	`67`	`+ if (!tensor._so) {`
	`68`	`+ tensor._so = m_shared_request._so;`
	`69`	`+ }`
	`70`	`+ ov::ISyncInferRequest::set_tensor(output, tensor);`
`63`	`71`	`}`
`64`	`72`	`}`
`65`	`73`	`}`