Skip to content

Commit 16f8811

Browse files
Sarkars/backend create tensor (#370)
1 parent 1bb6517 commit 16f8811

File tree

5 files changed

+124
-10
lines changed

5 files changed

+124
-10
lines changed

ngraph_bridge/ngraph_encapsulate_impl.cc

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,6 @@ Status NGraphEncapsulateImpl::AllocateNGOutputTensors(
395395
current_dst_ptr, last_dst_ptr, last_ng_tensor, true, ng_exec,
396396
op_backend, ng_element_type, ng_shape,
397397
m_executable_can_create_tensor ? out_group_from_pipeline[i] : nullptr);
398-
399398
current_ng_tensor->set_stale(true);
400399
output_caches[i] = std::make_pair(current_dst_ptr, current_ng_tensor);
401400
ng_outputs.push_back(current_ng_tensor);
@@ -416,18 +415,21 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
416415
// NOTE: we assume that TF's pointers WILL change if it actually changes
417416
// values. ie, it will not reuse the same space if its rewritten it
418417
bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr;
418+
NGRAPH_VLOG(5) << "tf_tensor_has_changed: " << tf_tensor_has_changed;
419419
bool no_ng_tensor_found = last_ng_tensor == nullptr;
420-
bool is_cpu = m_op_backend_name == "CPU";
420+
// m_op_backend_name might be BE:0, check if it starts with BE
421+
bool is_cpu_or_nnpi = (m_op_backend_name.find("CPU") == 0) ||
422+
(m_op_backend_name.find("NNPI") == 0);
421423

422424
// We need to check last_ng_tensor != nullptr, since there are cases where
423425
// at the first call to the ng_exec, both current_dst_ptr (when the
424426
// output is a 0-sized tensor) and last_dst_ptr (uninitialized at the
425427
// first call) are nullptr
426428
// A new tensor needs to be created for sure if no_ng_tensor_found
427-
// Additionally for CPU, it needs to be created if tf_tensor_has_changed,
429+
// Additionally for CPU/NNPI, it needs to be created if tf_tensor_has_changed,
428430
// for others, we do not create
429431
bool need_new_tensor_creation;
430-
if (is_cpu) {
432+
if (is_cpu_or_nnpi) {
431433
need_new_tensor_creation = no_ng_tensor_found || tf_tensor_has_changed;
432434
} else {
433435
need_new_tensor_creation = no_ng_tensor_found;
@@ -449,7 +451,9 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
449451
current_ng_tensor = tensor_from_pipeline;
450452
} else {
451453
if (need_new_tensor_creation) {
452-
if (is_cpu) {
454+
if (is_cpu_or_nnpi) {
455+
NGRAPH_VLOG(5) << "Backend creating tensor with pointer: "
456+
<< current_tf_ptr;
453457
current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape,
454458
current_tf_ptr);
455459
} else {
@@ -576,6 +580,36 @@ void NGraphEncapsulateImpl::DumpNgFunction(
576580
StringToFile(file_name, m_serialized_ng_function_map[ng_exec]);
577581
}
578582

583+
Status NGraphEncapsulateImpl::GetPersistentTFOutputTensor(
584+
std::shared_ptr<ngraph::runtime::Executable> exec,
585+
std::vector<tensorflow::PersistentTensor>& tf_output_tensors) {
586+
auto itr = m_out_persistents.find(exec);
587+
if (itr == m_out_persistents.end()) {
588+
return errors::Internal(
589+
"Expected persistent tensor to be present in cache");
590+
} else {
591+
tf_output_tensors = itr->second;
592+
}
593+
return Status::OK();
594+
}
595+
596+
bool NGraphEncapsulateImpl::PersistentOutputsExist(
597+
std::shared_ptr<ngraph::runtime::Executable> exec) {
598+
return m_out_persistents.find(exec) != m_out_persistents.end();
599+
}
600+
601+
Status NGraphEncapsulateImpl::RegisterPersistentOutputTensors(
602+
std::shared_ptr<ngraph::runtime::Executable> exec,
603+
std::vector<tensorflow::PersistentTensor> persistent_tensors) {
604+
auto itr = m_out_persistents.find(exec);
605+
if (itr != m_out_persistents.end()) {
606+
return errors::Internal(
607+
"Found an entry already exists in the cache for persistent tensors");
608+
}
609+
m_out_persistents.emplace(exec, persistent_tensors);
610+
return Status::OK();
611+
}
612+
579613
} // namespace ngraph_bridge
580614

581615
} // namespace tensorflow

ngraph_bridge/ngraph_encapsulate_impl.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,18 @@ class NGraphEncapsulateImpl {
174174
m_serialized_ng_function_map.clear();
175175
}
176176

177+
void ClearNgExecPersistentOutputCache() { m_out_persistents.clear(); }
178+
179+
Status GetPersistentTFOutputTensor(
180+
std::shared_ptr<ngraph::runtime::Executable>,
181+
std::vector<tensorflow::PersistentTensor>&);
182+
183+
bool PersistentOutputsExist(std::shared_ptr<ngraph::runtime::Executable>);
184+
185+
Status RegisterPersistentOutputTensors(
186+
std::shared_ptr<ngraph::runtime::Executable>,
187+
std::vector<tensorflow::PersistentTensor>);
188+
177189
NGraphFreshnessTracker* GetNgraphFreshnessTracker() {
178190
return m_freshness_tracker;
179191
}
@@ -249,6 +261,13 @@ class NGraphEncapsulateImpl {
249261
m_executable_pipelined_tensors_map;
250262

251263
int m_depth{2}; // TODO make this settable
264+
265+
// each executable (which comes from a new shape) corresponds to a vector of
266+
// output tensors
267+
// TODO: Should the vector store PersistentTensor or PersistentTensor* ?
268+
std::unordered_map<std::shared_ptr<ngraph::runtime::Executable>,
269+
std::vector<tensorflow::PersistentTensor>>
270+
m_out_persistents;
252271
};
253272

254273
} // namespace ngraph_bridge

ngraph_bridge/ngraph_encapsulate_op.cc

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,22 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx)
196196
BackendManager::SetConfig(ng_encap_impl.GetOpBackend(),
197197
additional_attribute_map);
198198

199-
ng_encap_impl.SetExecCanCreateTensor(
199+
// For NNPI (even though executable can create tensor) use backend to create
200+
// tensor
201+
// Keep the executable_can_create_tensors check before the
202+
// backend_name!="NNPI"
203+
bool executable_create_tensor =
200204
BackendManager::GetBackend(ng_encap_impl.GetOpBackend())
201-
->executable_can_create_tensors());
205+
->executable_can_create_tensors() &&
206+
(backend_name != "NNPI");
207+
ng_encap_impl.SetExecCanCreateTensor(executable_create_tensor);
202208
NGRAPH_VLOG(5) << "Executable can "
203209
<< (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not")
204210
<< " create tensors";
205211

212+
const char* not_persistent_flag = std::getenv("NGRAPH_TF_DISABLE_PERSISTENT");
213+
m_use_persistent = (not_persistent_flag == nullptr);
214+
206215
event.Stop();
207216
ngraph::Event::write_trace(event);
208217
}
@@ -262,6 +271,7 @@ NGraphEncapsulateOp::~NGraphEncapsulateOp() {
262271
ng_encap_impl.ClearNgExecMap();
263272
ng_encap_impl.ClearNgExecPipelinedTensorMap();
264273
ng_encap_impl.ClearNgExecSerializedFunctionCache();
274+
ng_encap_impl.ClearNgExecPersistentOutputCache();
265275

266276
// Release the backend
267277
NGRAPH_VLOG(2) << "~NGraphEncapsulateOp():: ReleaseBackend";
@@ -345,9 +355,20 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
345355
// Allocate tensors for the output results.
346356
vector<shared_ptr<ng::runtime::Tensor>> ng_outputs;
347357
std::vector<Tensor*> tf_output_tensors;
358+
std::vector<tensorflow::PersistentTensor> cached_persistent_output_tensors(
359+
ng_exec->get_results().size());
360+
bool present_in_cache = false;
348361

349362
{
350363
NG_TRACE("NGTF_Output_Alloc", "");
364+
if (m_use_persistent) {
365+
present_in_cache = ng_encap_impl.PersistentOutputsExist(ng_exec);
366+
if (present_in_cache) {
367+
OP_REQUIRES_OK(ctx, ng_encap_impl.GetPersistentTFOutputTensor(
368+
ng_exec, cached_persistent_output_tensors));
369+
}
370+
}
371+
351372
for (auto i = 0; i < ng_exec->get_results().size(); i++) {
352373
auto ng_element = ng_exec->get_results()[i];
353374
auto ng_shape = ng_element->get_shape();
@@ -360,21 +381,40 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
360381
}
361382
TensorShape tf_shape(dims);
362383
Tensor* output_tensor = nullptr;
363-
OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor));
364-
tf_output_tensors.push_back(output_tensor);
365384

366385
// Make sure the nGraph-inferred element type agrees with what TensorFlow
367386
// expected.
368387
ng::element::Type expected_elem_type;
388+
// TODO, we only need to do these checks once when the exec was
389+
// created/compiled, not again and again
390+
369391
OP_REQUIRES_OK(
370392
ctx, TFDataTypeToNGraphElementType(ctx->expected_output_dtype(i),
371393
&expected_elem_type));
372394
OP_REQUIRES(
373395
ctx, ng_element_type == expected_elem_type,
374396
errors::Internal("Element type inferred by nGraph does not match "
375397
"the element type expected by TensorFlow"));
376-
}
377398

399+
if (m_use_persistent) {
400+
if (present_in_cache) {
401+
output_tensor = cached_persistent_output_tensors[i].AccessTensor(ctx);
402+
} else {
403+
// create a persistent tensor
404+
OP_REQUIRES_OK(
405+
ctx, ctx->allocate_persistent(
406+
ctx->expected_output_dtype(i), tf_shape,
407+
&cached_persistent_output_tensors[i], &output_tensor));
408+
}
409+
} else {
410+
OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor));
411+
}
412+
tf_output_tensors.push_back(output_tensor);
413+
}
414+
if (m_use_persistent && !present_in_cache) {
415+
OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterPersistentOutputTensors(
416+
ng_exec, cached_persistent_output_tensors));
417+
}
378418
OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors(
379419
tf_output_tensors, ng_exec, out_group_from_pipeline,
380420
op_backend, ng_outputs));
@@ -611,6 +651,16 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
611651
exp.what(), "\n"));
612652
}
613653
}
654+
655+
if (m_use_persistent) {
656+
for (int out_idx = 0; out_idx < ng_exec->get_results().size(); out_idx++) {
657+
OP_REQUIRES_OK(ctx, ng_encap_impl.GetPersistentTFOutputTensor(
658+
ng_exec, cached_persistent_output_tensors));
659+
auto out_tensor =
660+
cached_persistent_output_tensors[out_idx].AccessTensor(ctx);
661+
ctx->set_output(out_idx, *out_tensor);
662+
}
663+
}
614664
} // end compute
615665

616666
int NGraphEncapsulateImpl::s_instance_count = 0;

ngraph_bridge/ngraph_encapsulate_op.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class NGraphEncapsulateOp : public OpKernel {
4343
private:
4444
NGraphEncapsulateImpl ng_encap_impl;
4545
std::mutex m_compute_lock;
46+
bool m_use_persistent;
4647
};
4748

4849
} // namespace ngraph_bridge

test/tf_exec.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,16 @@ Status CreateSession(const string& graph_filename, const string& backend_name,
9595
return load_graph_status;
9696
}
9797

98+
// This test might fail when running with persistent output tensors
99+
// Maybe because once we have computed outputs out to persistent tensors,
100+
// the next thread comes in before Compare runs, and changes the values?
101+
// For example, if we add a 1sec sleep right after entering Compute(), this test
102+
// would pass (since we now allow enough time for compare to run before the next
103+
// thread comes in and modifies the persistent tensor values)
104+
// TODO: see how persistenttensors might fit in with this kind of multithreading
105+
// (symmetric parallel)
98106
TEST(tf_exec, SingleGraphOn2Threads) {
107+
SetEnvVariable("NGRAPH_TF_DISABLE_PERSISTENT", "1");
99108
string graph_name = "test_axpy.pbtxt";
100109
vector<string> backends{"CPU", "INTERPRETER"};
101110
for (auto be : backends) {
@@ -136,6 +145,7 @@ TEST(tf_exec, SingleGraphOn2Threads) {
136145
thread0.join();
137146
thread1.join();
138147
}
148+
UnsetEnvVariable("NGRAPH_TF_DISABLE_PERSISTENT");
139149
}
140150

141151
TEST(tf_exec, hello_world) {

0 commit comments

Comments
 (0)