CR fix: instance ProxyMemManager for output edge which NeedAllocation

ceciliapeng2011 · ceciliapeng2011 · commit af162b044d7d · 2023-06-28T20:51:55.000-07:00
diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
@@ -830,6 +830,31 @@ void Graph::AllocateWithReuse() {
     }
 
     if (!undefinedBoxes.empty()) {
+        // Use proxy memory manager for output edges
+        for (auto& box : undefinedBoxes) {
+            for (auto& edge : edge_clusters[box.id]) {
+                const auto child = edge->getChild();
+                if (edge->getStatus() == Edge::Status::NeedAllocation &&
+                    child->getType() == Type::Output) {
+                    auto proxyMemMngr =
+                        std::make_shared<ProxyMemoryMngr>(std::make_shared<DnnlMemoryMngr>(make_unique<MemoryMngrWithReuse>()));
+                    DEBUG_LOG(proxyMemMngr, " ", this);
+
+                    // Store the output memory managers.
+                    // So that, the infer requests can be able to access them.
+                    int count = 0;
+                    for (auto &output : outputNodesMap) {
+                        if (output.second == child) {
+                            outputNodesMemMngrMap[output.first] = proxyMemMngr;
+                            count++;
+                        }
+                    }
+                    IE_ASSERT(count == 1);
+                }
+            }
+        }
+        IE_ASSERT(outputNodesMemMngrMap.size() <= outputNodesMap.size());
+
         if (!syncNodesInds.empty()) {
             //We have to extend the lifespan of thensors that are crossing a sync point border in order to save
             //the intermediate computation results from possible loss due to the tensor resize
@@ -882,47 +907,13 @@ void Graph::AllocateWithReuse() {
             }
         }
         for (auto& group : groups) {
-            MemoryMngrPtr grpMemMngr;
-            grpMemMngr =
+            auto grpMemMngr =
                 std::make_shared<DnnlMemoryMngr>(make_unique<MemoryMngrWithReuse>());
-            // deternmine a group with outputs.
-            size_t isOutGrp = 0;
-            int64_t outBoxId = -1;
-            for (auto& box : group) {
-                if (std::any_of(
-                    edge_clusters[box.id].begin(),
-                    edge_clusters[box.id].end(),
-                    [box](const ov::intel_cpu::EdgePtr edge) {
-                        return edge->getChild()->getType() == Type::Output;
-                    })) {
-                        isOutGrp++;
-                        outBoxId = box.id;
-                }
-            }
-            if (isOutGrp) {
-                IE_ASSERT(isOutGrp==1);  // reuse_io_tensors false
-                grpMemMngr =
-                    std::make_shared<ProxyMemoryMngr>(grpMemMngr);
-                DEBUG_LOG(grpMemMngr, " ", this);
-
-                // Store the output memory managers.
-                // So that, the infer requests can be able to access them.
-                for (auto& edge : edge_clusters[outBoxId]) {
-                    const auto child = edge->getChild();
-                    if (child->getType() == Type::Output) {
-                        for (auto &output : outputNodesMap) {
-                            if (output.second == child) outputNodesMemMngrMap[output.first] = std::static_pointer_cast<ProxyMemoryMngr>(grpMemMngr);
-                        }
-                    }
-                }
-            }
             for (auto& box : group) {
                 for (auto& edge : edge_clusters[box.id]) {
                     if (edge->getStatus() == Edge::Status::NeedAllocation) {
                         edge->allocate(grpMemMngr);
                     }
-                    if (isOutGrp && "Parameter" != edge->getParent()->getTypeStr())
-                        edge->getParent()->forceUpdateShape = true;  // force recheck shape updates for nodes in the output groups.
                 }
             }
         }
diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -275,19 +275,20 @@ void InferRequestBase::changeDefaultPtr() {
                 auto itr = outMemMngrMap.find(it.first);
                 if (itr != outMemMngrMap.end()) {
                     outputMemMngr = itr->second;
-                    OPENVINO_ASSERT(outputMemMngr, "output memmanager should not be empty.");
+                    OPENVINO_ASSERT(outputMemMngr, "proxy mem manager for output ", it.first, " is empty.");
                 } else {
-                    OPENVINO_THROW("Cannot find output memmanager for output " + it.first + " !");
+                    canBeInPlace = false;
+                    DEBUG_LOG("no proxy mem manager for output ", it.first, " !");
                 }
 
                 if (canBeInPlace) {
                     auto tt = std::get<0>(outputsTensor2BlobMap[it.first]);
                     auto memptr = tt->get_memory();
                     outputMemMngr->setManager(memptr->getMemoryMngr());
-                    DEBUG_LOG("setTensor ", tt, " graph ", graph, " inferrequest ", this);
+                    DEBUG_LOG("setManager ", memptr->getMemoryMngr(), " graph ", graph, " inferrequest ", this);
                 } else {
                     outputMemMngr->setManager(nullptr);
-                    DEBUG_LOG("setTensor nullptr", " graph ", graph, " inferrequest ", this);
+                    DEBUG_LOG("setManager nullptr", " graph ", graph, " inferrequest ", this);
                 }
 
                 continue;