Skip to content

Commit b8942b6

Browse files
songbellilyachur
andauthored
Ensure so is there for lifecycle (openvinotoolkit#19510)
* ensure so is there for lifecycle Signed-off-by: fishbell <[email protected]> * batch plugin + batch not triggered case Signed-off-by: fishbell <[email protected]> * clang Signed-off-by: fishbell <[email protected]> * fix settensor failure Signed-off-by: fishbell <[email protected]> * fix model info mismatch when load with 1.0 API with ppp info Signed-off-by: fishbell <[email protected]> * remove unncessary ppp code Signed-off-by: fishbell <[email protected]> * Update src/plugins/auto_batch/src/compiled_model.cpp Co-authored-by: Ilya Churaev <[email protected]> * enable the meta holder cases Signed-off-by: fishbell <[email protected]> --------- Signed-off-by: fishbell <[email protected]> Co-authored-by: Ilya Churaev <[email protected]>
1 parent 0b8237f commit b8942b6

File tree

8 files changed

+135
-54
lines changed

8 files changed

+135
-54
lines changed

src/plugins/auto/src/infer_request.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,18 @@ ov::auto_plugin::InferRequest::InferRequest(const std::shared_ptr<const ov::auto
5656
}
5757
} else {
5858
for (const auto& input : get_inputs()) {
59-
ov::ISyncInferRequest::set_tensor(input, m_shared_request->get_tensor(input));
59+
auto tensor = m_shared_request->get_tensor(input);
60+
if (!tensor._so) {
61+
tensor._so = m_shared_request._so;
62+
}
63+
ov::ISyncInferRequest::set_tensor(input, tensor);
6064
}
6165
for (const auto& output : get_outputs()) {
62-
ov::ISyncInferRequest::set_tensor(output, m_shared_request->get_tensor(output));
66+
auto tensor = m_shared_request->get_tensor(output);
67+
if (!tensor._so) {
68+
tensor._so = m_shared_request._so;
69+
}
70+
ov::ISyncInferRequest::set_tensor(output, tensor);
6371
}
6472
}
6573
}

src/plugins/auto_batch/src/async_infer_request.cpp

+88-27
Original file line numberDiff line numberDiff line change
@@ -15,35 +15,96 @@ AsyncInferRequest::AsyncInferRequest(const std::shared_ptr<SyncInferRequest>& re
1515
: ov::IAsyncInferRequest(request, nullptr, callback_executor),
1616
m_sync_request(request),
1717
m_request_without_batch(request_without_batch) {
18-
// this executor starts the inference while the task (checking the result) is passed to the next stage
19-
struct ThisRequestExecutor : public ov::threading::ITaskExecutor {
20-
explicit ThisRequestExecutor(AsyncInferRequest* _this_) : _this{_this_} {}
21-
void run(ov::threading::Task task) override {
22-
auto workerInferRequest = _this->m_sync_request->m_batched_request_wrapper;
23-
std::pair<AsyncInferRequest*, ov::threading::Task> t;
24-
t.first = _this;
25-
t.second = std::move(task);
26-
workerInferRequest->_tasks.push(t);
27-
// it is ok to call size() here as the queue only grows (and the bulk removal happens under the mutex)
28-
const int sz = static_cast<int>(workerInferRequest->_tasks.size());
29-
if (sz == workerInferRequest->_batch_size) {
30-
workerInferRequest->_cond.notify_one();
18+
if (m_sync_request && m_sync_request->get_batch_size() == 0) {
19+
// batch not applicable, just a wrapper to hardware infer request
20+
// share the tensors with hardware infer request
21+
for (const auto& input : get_inputs()) {
22+
auto tensor = m_request_without_batch->get_tensor(input);
23+
if (!tensor._so) {
24+
tensor._so = m_request_without_batch._so;
3125
}
26+
set_tensor(input, tensor);
27+
}
28+
for (const auto& output : get_outputs()) {
29+
auto tensor = m_request_without_batch->get_tensor(output);
30+
if (!tensor._so) {
31+
tensor._so = m_request_without_batch._so;
32+
}
33+
set_tensor(output, tensor);
34+
}
35+
struct RequestExecutor : ov::threading::ITaskExecutor {
36+
explicit RequestExecutor(const ov::SoPtr<ov::IAsyncInferRequest>& infer_request)
37+
: m_inferrequest(infer_request) {
38+
m_inferrequest->set_callback([this](std::exception_ptr exceptionPtr) mutable {
39+
m_exceptionptr = std::move(exceptionPtr);
40+
auto capturedTask = std::move(m_task);
41+
capturedTask();
42+
});
43+
}
44+
void run(ov::threading::Task task) override {
45+
m_task = std::move(task);
46+
m_inferrequest->start_async();
47+
};
48+
const ov::SoPtr<ov::IAsyncInferRequest>& m_inferrequest;
49+
std::exception_ptr m_exceptionptr;
50+
ov::threading::Task m_task;
3251
};
33-
AsyncInferRequest* _this = nullptr;
34-
};
35-
m_pipeline = {{/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
36-
if (this->m_sync_request->m_exception_ptr) // if the exception happened in the batch1 fallback
37-
std::rethrow_exception(this->m_sync_request->m_exception_ptr);
38-
auto batchReq = this->m_sync_request->m_batched_request_wrapper;
39-
if (batchReq->_exception_ptr) // when the batchN execution failed
40-
std::rethrow_exception(batchReq->_exception_ptr);
41-
// in the case of non-batched execution the tensors were set explicitly
42-
if (SyncInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
43-
this->m_sync_request->m_batched_request_status) {
44-
this->m_sync_request->copy_outputs_if_needed();
45-
}
46-
}}};
52+
auto requestExecutor = std::make_shared<RequestExecutor>(m_request_without_batch);
53+
m_pipeline.emplace_back(requestExecutor, [requestExecutor] {
54+
if (nullptr != requestExecutor->m_exceptionptr) {
55+
std::rethrow_exception(requestExecutor->m_exceptionptr);
56+
}
57+
});
58+
} else {
59+
// batch size > 1, try infer with batched request
60+
// this executor starts the inference while the task (checking the result) is passed to the next stage
61+
struct ThisRequestExecutor : public ov::threading::ITaskExecutor {
62+
explicit ThisRequestExecutor(AsyncInferRequest* _this_) : _this{_this_} {}
63+
void run(ov::threading::Task task) override {
64+
auto workerInferRequest = _this->m_sync_request->m_batched_request_wrapper;
65+
std::pair<AsyncInferRequest*, ov::threading::Task> t;
66+
t.first = _this;
67+
t.second = std::move(task);
68+
workerInferRequest->_tasks.push(t);
69+
// it is ok to call size() here as the queue only grows (and the bulk removal happens under the mutex)
70+
const int sz = static_cast<int>(workerInferRequest->_tasks.size());
71+
if (sz == workerInferRequest->_batch_size) {
72+
workerInferRequest->_cond.notify_one();
73+
}
74+
};
75+
AsyncInferRequest* _this = nullptr;
76+
};
77+
m_pipeline = {
78+
{/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
79+
if (this->m_sync_request->m_exception_ptr) // if the exception happened in the batch1 fallback
80+
std::rethrow_exception(this->m_sync_request->m_exception_ptr);
81+
auto batchReq = this->m_sync_request->m_batched_request_wrapper;
82+
if (batchReq->_exception_ptr) // when the batchN execution failed
83+
std::rethrow_exception(batchReq->_exception_ptr);
84+
// in the case of non-batched execution the tensors were set explicitly
85+
if (SyncInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
86+
this->m_sync_request->m_batched_request_status) {
87+
this->m_sync_request->copy_outputs_if_needed();
88+
}
89+
}}};
90+
}
91+
}
92+
93+
void AsyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) {
94+
check_state();
95+
if (m_sync_request && m_sync_request->get_batch_size() == 0) {
96+
m_request_without_batch->set_tensor(port, tensor);
97+
}
98+
ov::IAsyncInferRequest::set_tensor(port, tensor);
99+
}
100+
101+
void AsyncInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
102+
const std::vector<ov::SoPtr<ov::ITensor>>& tensors) {
103+
check_state();
104+
if (m_sync_request && m_sync_request->get_batch_size() == 0) {
105+
m_request_without_batch->set_tensors(port, tensors);
106+
}
107+
ov::IAsyncInferRequest::set_tensors(port, tensors);
47108
}
48109

49110
std::vector<ov::ProfilingInfo> AsyncInferRequest::get_profiling_info() const {

src/plugins/auto_batch/src/async_infer_request.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ class AsyncInferRequest : public ov::IAsyncInferRequest {
2626

2727
std::shared_ptr<ov::autobatch_plugin::SyncInferRequest> m_sync_request;
2828

29+
void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
30+
31+
void set_tensors(const ov::Output<const ov::Node>& port,
32+
const std::vector<ov::SoPtr<ov::ITensor>>& tensors) override;
33+
2934
ov::SoPtr<ov::IAsyncInferRequest> m_request_without_batch;
3035
};
3136
} // namespace autobatch_plugin

src/plugins/auto_batch/src/compiled_model.cpp

+18-17
Original file line numberDiff line numberDiff line change
@@ -134,26 +134,19 @@ CompiledModel::GetWorkerInferRequest() const {
134134
}
135135

136136
std::shared_ptr<ov::IAsyncInferRequest> CompiledModel::create_infer_request() const {
137-
if (!m_compiled_model_with_batch) {
138-
auto res = m_compiled_model_without_batch->create_infer_request();
139-
for (auto& iter : res->get_inputs()) {
140-
auto&& tensor = res->get_tensor(iter);
141-
if (!tensor._so)
142-
tensor._so = m_compiled_model_without_batch._so;
143-
}
144-
for (auto& iter : res->get_outputs()) {
145-
auto&& tensor = res->get_tensor(iter);
146-
if (!tensor._so)
147-
tensor._so = m_compiled_model_without_batch._so;
148-
}
149-
return res;
150-
}
151-
152-
auto sync_res = create_sync_infer_request();
153-
154137
ov::SoPtr<ov::IAsyncInferRequest> infer_request_without_batch = {
155138
m_compiled_model_without_batch->create_infer_request(),
156139
m_compiled_model_without_batch._so};
140+
// simpler wrapper if m_compiled_model_with_batch is empty
141+
std::shared_ptr<ov::ISyncInferRequest> sync_res;
142+
if (m_compiled_model_with_batch)
143+
sync_res = create_sync_infer_request();
144+
else
145+
sync_res = std::make_shared<ov::autobatch_plugin::SyncInferRequest>(
146+
std::dynamic_pointer_cast<const ov::autobatch_plugin::CompiledModel>(shared_from_this()),
147+
nullptr,
148+
0,
149+
0);
157150
return std::make_shared<ov::autobatch_plugin::AsyncInferRequest>(
158151
std::dynamic_pointer_cast<ov::autobatch_plugin::SyncInferRequest>(sync_res),
159152
infer_request_without_batch,
@@ -249,6 +242,14 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
249242
OPENVINO_SUPPRESS_DEPRECATED_END
250243
}
251244

245+
const std::vector<ov::Output<const ov::Node>>& CompiledModel::outputs() const {
246+
return m_compiled_model_without_batch->outputs();
247+
}
248+
249+
const std::vector<ov::Output<const ov::Node>>& CompiledModel::inputs() const {
250+
return m_compiled_model_without_batch->inputs();
251+
}
252+
252253
void CompiledModel::export_model(std::ostream& model) const {
253254
OPENVINO_NOT_IMPLEMENTED;
254255
}

src/plugins/auto_batch/src/compiled_model.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ class CompiledModel : public ov::ICompiledModel {
5454

5555
virtual ~CompiledModel();
5656

57+
const std::vector<ov::Output<const ov::Node>>& outputs() const override;
58+
59+
const std::vector<ov::Output<const ov::Node>>& inputs() const override;
60+
5761
protected:
5862
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
5963
static unsigned int ParseTimeoutValue(const std::string&);

src/plugins/auto_batch/src/sync_infer_request.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,12 @@ SyncInferRequest::SyncInferRequest(
4141
m_batched_request_wrapper(worker_request),
4242
m_batch_id(batch_id),
4343
m_batch_size(num_batch) {
44-
share_tensors_with_batched_req(batched_inputs, batched_outputs);
44+
if (m_batched_request_wrapper)
45+
share_tensors_with_batched_req(batched_inputs, batched_outputs);
46+
}
47+
48+
size_t SyncInferRequest::get_batch_size() const {
49+
return m_batch_size;
4550
}
4651

4752
void SyncInferRequest::share_tensors_with_batched_req(const std::set<std::string>& batched_inputs,

src/plugins/auto_batch/src/sync_infer_request.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
1717
const std::shared_ptr<ov::autobatch_plugin::CompiledModel::WorkerInferRequest>& worker_request,
1818
int batch_id,
1919
int num_batch,
20-
const std::set<std::string>& batched_inputs,
21-
const std::set<std::string>& batched_outputs);
20+
const std::set<std::string>& batched_inputs = {},
21+
const std::set<std::string>& batched_outputs = {});
2222

2323
// Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
2424
void set_tensors_to_another_request(ov::SoPtr<ov::IAsyncInferRequest>& req);
@@ -43,6 +43,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
4343
TIMEOUT_EXECUTED
4444
} m_batched_request_status = eExecutionFlavor::NOT_EXECUTED;
4545

46+
size_t get_batch_size() const;
47+
4648
protected:
4749
void copy_tensor_if_needed(const ov::SoPtr<ov::ITensor>& src, ov::SoPtr<ov::ITensor>& dst, const bool bInput);
4850

src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,5 @@ std::vector<std::string> disabledTestPatterns() {
127127
R"(.*smoke_LPT.*ElementwiseBranchSelectionTransformation.*)",
128128
// Dynamic state unsupported for now
129129
R"(.*MemoryDynamicBatch.*)",
130-
// Meta plugins may miss saving HW plugin so handle, thus plugin may be unloaded before all objects are deleted which leads to segfault
131-
// Issue: 118840
132-
R"(.*OVHoldersTest.*AUTO.*)",
133-
R"(.*OVHoldersTest.*MULTI.*)",
134-
R"(.*OVHoldersTest.*BATCH.*)",
135130
};
136131
}

0 commit comments

Comments
 (0)