From 19d70aaa69e59bedb9ceaf102722d2e9e6acf2f4 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 30 Jan 2018 22:55:57 +0800 Subject: [PATCH 1/7] "rush out a memory caculater" --- paddle/platform/CMakeLists.txt | 2 +- paddle/platform/profiler.cc | 73 ++++++++++++++++++++++++++++------ paddle/platform/profiler.h | 9 +++++ 3 files changed, 71 insertions(+), 13 deletions(-) diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index d68caea99719b3..e649adfc86c34c 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -37,7 +37,7 @@ nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context) nv_test(nccl_test SRCS nccl_test.cu DEPS dynload_cuda gpu_info device_context) -cc_library(profiler SRCS profiler.cc DEPS device_context) +cc_library(profiler SRCS profiler.cc DEPS device_context paddle_memory) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) if(NOT WITH_C_API AND WITH_FLUID) diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index 2a8afc940393ba..e23fc2a245e0c7 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -60,6 +60,7 @@ Event::Event(EventKind kind, std::string name, uint32_t thread_id, } #endif cpu_ns_ = GetTimeInNsec(); + memory_used_chars_ = memory::memory_usage(dev_ctx->GetPlace()); } std::string Event::kind() const { @@ -92,6 +93,10 @@ double Event::CudaElapsedMs(const Event& e) const { #endif } +double Event::MemoryUsed(const Event& e) const { + return (e.memory_used_chars_ - memory_used_chars_) / (1024 * 1024); +} + #ifdef PADDLE_WITH_CUDA static void ForEachDevice(std::function func) { auto original_device = GetCurrentDeviceId(); @@ -257,14 +262,19 @@ void ParseEvents(std::vector>& events, double event_time = (g_profiler_place == "CUDA") ? rit->CudaElapsedMs(events[i][j]) : rit->CpuElapsedMs(events[i][j]); + + double event_memory_used = rit->MemoryUsed(events[i][j]); std::string event_name = "thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); max_name_width = std::max(max_name_width, event_name.size()); if (event_idx.find(event_name) == event_idx.end()) { event_idx[event_name] = event_items.size(); - EventItem event_item = {event_name, 1, event_time, - event_time, event_time, event_time}; + EventItem event_item = {event_name, 1, + event_time, event_time, + event_time, event_time, + event_memory_used, event_memory_used, + event_memory_used, event_memory_used}; event_items.push_back(event_item); } else { int index = event_idx[event_name]; @@ -277,6 +287,17 @@ void ParseEvents(std::vector>& events, // max time event_items[index].max_time = std::max(event_time, event_items[index].max_time); + + // total memory used + event_items[index].total_time += event_memory_used; + + // min memory used + event_items[index].min_memory_used = + std::min(event_memory_used, event_items[index].min_memory_used); + + // max memory used + event_items[index].max_memory_used = + std::max(event_memory_used, event_items[index].max_memory_used); } // remove the push marker from the list @@ -288,9 +309,10 @@ void ParseEvents(std::vector>& events, } } } - // average time + // average time and memory used for (auto& item : event_items) { item.ave_time = item.total_time / item.calls; + item.ave_memory_used = item.total_memory_used / item.calls; } // sort if (sorted_by != EventSortingKey::kDefault) { @@ -314,20 +336,42 @@ void ParseEvents(std::vector>& events, void PrintProfiler(std::vector>& events_table, std::string& sorted_domain, const size_t name_width, const size_t data_width) { + double app_total_time = .0, app_total_memory = .0; + for (auto& event_item : events_table) { + app_total_time += event_item[2]; // event.total_time + app_total_memory += event_item[6]; // event.total_memory_used + } + // Output header information std::cout << "\n------------------------->" << " Profiling Report " << "<-------------------------\n\n"; - std::cout << "Place: " << g_profiler_place << std::endl; - std::cout << "Time unit: ms" << std::endl; - std::cout << "Sorted by " << sorted_domain - << " in descending order in the same thread\n\n"; + + // clang-format off + std::cout << "Place: " << g_profiler_place + << "Total Time:" << app_total_time << "ms" + << "Total Memory:" << app_total_time << "MB" + << "Sorted by " << sorted_domain + << " in descending order in the same thread\n" + << std::endl; + // clang-format on + // Output events table + // clang-format off std::cout.setf(std::ios::left); - std::cout << std::setw(name_width) << "Event" << std::setw(data_width) - << "Calls" << std::setw(data_width) << "Total" - << std::setw(data_width) << "Min." << std::setw(data_width) - << "Max." << std::setw(data_width) << "Ave." << std::endl; + std::cout << std::setw(name_width) << "Event" + << std::setw(data_width) << "Calls" + << std::setw(data_width) << "Total" + << std::setw(data_width) << "Min." + << std::setw(data_width) << "Max." + << std::setw(data_width) << "Ave." + << std::setw(data_width) << "Total Memory." + << std::setw(data_width) << "Min Memory." + << std::setw(data_width) << "Max Memory." + << std::setw(data_width) << "Ave Memory." + << std::endl; + // clang-format on + for (size_t i = 0; i < events_table.size(); ++i) { for (size_t j = 0; j < events_table[i].size(); ++j) { EventItem& event_item = events_table[i][j]; @@ -336,7 +380,12 @@ void PrintProfiler(std::vector>& events_table, << std::setw(data_width) << event_item.total_time << std::setw(data_width) << event_item.min_time << std::setw(data_width) << event_item.max_time - << std::setw(data_width) << event_item.ave_time << std::endl; + << std::setw(data_width) << event_item.ave_time + << std::setw(data_width) << event_item.total_memory_used + << std::setw(data_width) << event_item.min_memory_used + << std::setw(data_width) << event_item.max_memory_used + << std::setw(data_width) << event_item.ave_memory_used + << std::endl; } } std::cout << std::endl; diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h index 8de1e6ad296d1e..42af8240b1709d 100644 --- a/paddle/platform/profiler.h +++ b/paddle/platform/profiler.h @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once + #include #include #include #include + +#include "paddle/memory/memory.h" #include "paddle/platform/device_context.h" namespace paddle { @@ -43,12 +46,14 @@ class Event { double CpuElapsedMs(const Event& e) const; double CudaElapsedMs(const Event& e) const; + double MemoryUsed(const Event& e) const; private: EventKind kind_; std::string name_; uint32_t thread_id_; int64_t cpu_ns_; + int64_t memory_used_chars_; bool has_cuda_; #ifdef PADDLE_WITH_CUDA cudaEvent_t event_ = nullptr; @@ -124,6 +129,10 @@ struct EventItem { double min_time; double max_time; double ave_time; + double total_memory_used; + double min_memory_used; + double max_memory_used; + double ave_memory_used; }; // Candidate keys to sort the profiling report From 296d6128603e7ef3813de973d6c5c034c636476c Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 30 Jan 2018 22:58:27 +0800 Subject: [PATCH 2/7] "fix ci" --- paddle/platform/profiler.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index e23fc2a245e0c7..afcddbc71bbc22 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -337,9 +337,13 @@ void PrintProfiler(std::vector>& events_table, std::string& sorted_domain, const size_t name_width, const size_t data_width) { double app_total_time = .0, app_total_memory = .0; - for (auto& event_item : events_table) { - app_total_time += event_item[2]; // event.total_time - app_total_memory += event_item[6]; // event.total_memory_used + for (size_t i = 0; i < events_table.size(); ++i) { + for (size_t j = 0; j < events_table[i].size(); ++j) { + EventItem& event_item = events_table[i][j]; + + app_total_time += event_item.total_time; + app_total_memory += event_item.total_memory_used; + } } // Output header information @@ -375,6 +379,8 @@ void PrintProfiler(std::vector>& events_table, for (size_t i = 0; i < events_table.size(); ++i) { for (size_t j = 0; j < events_table[i].size(); ++j) { EventItem& event_item = events_table[i][j]; + + // clang-format off std::cout << std::setw(name_width) << event_item.name << std::setw(data_width) << event_item.calls << std::setw(data_width) << event_item.total_time @@ -386,6 +392,7 @@ void PrintProfiler(std::vector>& events_table, << std::setw(data_width) << event_item.max_memory_used << std::setw(data_width) << event_item.ave_memory_used << std::endl; + // clang-format on } } std::cout << std::endl; From 9725e49cda90d109beb01cf2894e664aa05835fe Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 30 Jan 2018 23:33:13 +0800 Subject: [PATCH 3/7] "better format" --- paddle/platform/profiler.cc | 13 +++++++++---- paddle/platform/profiler_test.cc | 27 +++++++++++++++++---------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index afcddbc71bbc22..ad1543b6749c9b 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -60,7 +60,12 @@ Event::Event(EventKind kind, std::string name, uint32_t thread_id, } #endif cpu_ns_ = GetTimeInNsec(); - memory_used_chars_ = memory::memory_usage(dev_ctx->GetPlace()); + + if (has_cuda_) { + memory_used_chars_ = memory::memory_usage(dev_ctx->GetPlace()); + } else { + memory_used_chars_ = memory::memory_usage(platform::CPUPlace()); + } } std::string Event::kind() const { @@ -352,9 +357,9 @@ void PrintProfiler(std::vector>& events_table, << "<-------------------------\n\n"; // clang-format off - std::cout << "Place: " << g_profiler_place - << "Total Time:" << app_total_time << "ms" - << "Total Memory:" << app_total_time << "MB" + std::cout << "Place: " << g_profiler_place << "\t" + << "Total Time:" << app_total_time << "ms" << "\t" + << "Total Memory:" << app_total_time << "MB" << "\t" << "Sorted by " << sorted_domain << " in descending order in the same thread\n" << std::endl; diff --git a/paddle/platform/profiler_test.cc b/paddle/platform/profiler_test.cc index 81f10c91342f76..23a0aaf40e85b8 100644 --- a/paddle/platform/profiler_test.cc +++ b/paddle/platform/profiler_test.cc @@ -18,15 +18,20 @@ limitations under the License. */ TEST(Event, CpuElapsedTime) { using paddle::platform::Event; using paddle::platform::EventKind; + using namespace paddle::platform; - Event start_event(EventKind::kPushRange, "test", 0, nullptr); + DeviceContext* dev_ctx = new CPUDeviceContext(); + + Event start_event(EventKind::kPushRange, "test", 0, dev_ctx); EXPECT_TRUE(start_event.has_cuda() == false); int counter = 0; while (counter != 1000) { counter++; } - Event stop_event(EventKind::kPopRange, "test", 0, nullptr); + Event stop_event(EventKind::kPopRange, "test", 0, dev_ctx); EXPECT_GT(start_event.CpuElapsedMs(stop_event), 0); + + delete dev_ctx; } #ifdef PADDLE_WITH_CUDA @@ -46,27 +51,26 @@ TEST(Event, CudaElapsedTime) { } Event stop_event(EventKind::kPopRange, "test", 0, dev_ctx); EXPECT_GT(start_event.CudaElapsedMs(stop_event), 0); + + delete dev_ctx; } #endif TEST(RecordEvent, RecordEvent) { - using paddle::platform::DeviceContext; - using paddle::platform::Event; - using paddle::platform::EventKind; - using paddle::platform::RecordEvent; - using paddle::platform::ProfilerState; - using paddle::platform::EventSortingKey; + using namespace paddle::platform; ProfilerState state = ProfilerState::kCPU; DeviceContext* dev_ctx = nullptr; #ifdef PADDLE_WITH_CUDA - using paddle::platform::CUDADeviceContext; - using paddle::platform::CUDAPlace; state = ProfilerState::kCUDA; dev_ctx = new paddle::platform::CUDADeviceContext(paddle::platform::CUDAPlace(0)); +#else + dev_ctx = new CPUDeviceContext(); #endif + EnableProfiler(state); + PADDLE_ENFORCE_NOT_NULL(dev_ctx); /* Usage 1: * PushEvent(evt_name, dev_ctx); @@ -75,6 +79,7 @@ TEST(RecordEvent, RecordEvent) { * ... * PopEvent(evt_name, dev_ctx); */ + for (int loop = 0; loop < 3; ++loop) { for (int i = 1; i < 5; ++i) { std::string name = "op_" + std::to_string(i); @@ -121,6 +126,8 @@ TEST(RecordEvent, RecordEvent) { } } } + + delete dev_ctx; EXPECT_EQ(cuda_startup_count % 5, 0); EXPECT_EQ(start_profiler_count, 1); From 615589e27d1ab33e4b2c3ed367a4456b7e025a7d Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 30 Jan 2018 23:18:43 -0800 Subject: [PATCH 4/7] "followed comment" --- paddle/framework/executor.cc | 3 ++- paddle/platform/profiler.cc | 43 +++++++++++++++++++++++++++--------- paddle/platform/profiler.h | 1 + 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 9a232b08434d29..6368d5575f3328 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -113,11 +113,12 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, } // if (create_local_scope) } // if (create_vars) + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); VLOG(4) << op->DebugStringEx(local_scope); - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::RecordEvent record_event(op->Type(), pool.Get(place_)); op->Run(*local_scope, place_); diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index ad1543b6749c9b..6dbc343e8d6a1d 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -20,6 +20,14 @@ limitations under the License. */ namespace paddle { namespace platform { +/* start profiler event name */ +constexpr char kStartProfiler[] = "_start_profiler_"; + +/* stop profiler event name */ +constexpr char kStopProfiler[] = "_stop_profiler_"; + +/* start cuda profiler on every device event name */ + // The profiler state, the initial value is ProfilerState::kDisabled static ProfilerState g_state = ProfilerState::kDisabled; // To record which timer the profiler used, CUDA or CPU. @@ -66,6 +74,7 @@ Event::Event(EventKind kind, std::string name, uint32_t thread_id, } else { memory_used_chars_ = memory::memory_usage(platform::CPUPlace()); } + VLOG(3) << "create event : " << memory_used_chars_; } std::string Event::kind() const { @@ -99,7 +108,14 @@ double Event::CudaElapsedMs(const Event& e) const { } double Event::MemoryUsed(const Event& e) const { - return (e.memory_used_chars_ - memory_used_chars_) / (1024 * 1024); + return static_cast(e.memory_used_chars_ - memory_used_chars_) / + (1024 * 1024); +} + +double Event::MaxMemoryUsed(const Event& e) const { + return static_cast( + std::max(e.memory_used_chars_, memory_used_chars_)) / + (1024 * 1024); } #ifdef PADDLE_WITH_CUDA @@ -172,7 +188,7 @@ void EnableProfiler(ProfilerState state) { } #endif // Mark the profiling start. - Mark("_start_profiler_", nullptr); + Mark(kStartProfiler, nullptr); } void ResetProfiler() { @@ -197,7 +213,7 @@ void DisableProfiler(EventSortingKey sorted_key) { PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, "Can't disable profiling, since it's not starting."); // Mark the profiling stop. - Mark("_stop_profiler_", nullptr); + Mark(kStopProfiler, nullptr); g_state = ProfilerState::kDisabled; std::vector> all_events = GetAllEvents(); @@ -269,6 +285,7 @@ void ParseEvents(std::vector>& events, : rit->CpuElapsedMs(events[i][j]); double event_memory_used = rit->MemoryUsed(events[i][j]); + double total_memory_used = rit->MaxMemoryUsed(events[i][j]); std::string event_name = "thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); max_name_width = std::max(max_name_width, event_name.size()); @@ -278,7 +295,7 @@ void ParseEvents(std::vector>& events, EventItem event_item = {event_name, 1, event_time, event_time, event_time, event_time, - event_memory_used, event_memory_used, + total_memory_used, event_memory_used, event_memory_used, event_memory_used}; event_items.push_back(event_item); } else { @@ -293,9 +310,6 @@ void ParseEvents(std::vector>& events, event_items[index].max_time = std::max(event_time, event_items[index].max_time); - // total memory used - event_items[index].total_time += event_memory_used; - // min memory used event_items[index].min_memory_used = std::min(event_memory_used, event_items[index].min_memory_used); @@ -341,13 +355,20 @@ void ParseEvents(std::vector>& events, void PrintProfiler(std::vector>& events_table, std::string& sorted_domain, const size_t name_width, const size_t data_width) { - double app_total_time = .0, app_total_memory = .0; + double app_total_memory = .0, app_total_time = .0; + for (size_t i = 0; i < events_table.size(); ++i) { for (size_t j = 0; j < events_table[i].size(); ++j) { EventItem& event_item = events_table[i][j]; + if (event_item.name == kStartProfiler) { + app_total_time -= event_item.total_time; + } + if (event_item.name == kStopProfiler) { + app_total_time += event_item.total_time; + } - app_total_time += event_item.total_time; - app_total_memory += event_item.total_memory_used; + app_total_memory = + std::max(event_item.total_memory_used, app_total_memory); } } @@ -359,7 +380,7 @@ void PrintProfiler(std::vector>& events_table, // clang-format off std::cout << "Place: " << g_profiler_place << "\t" << "Total Time:" << app_total_time << "ms" << "\t" - << "Total Memory:" << app_total_time << "MB" << "\t" + << "Total Memory:" << app_total_memory << "MB" << "\t" << "Sorted by " << sorted_domain << " in descending order in the same thread\n" << std::endl; diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h index 42af8240b1709d..23fa82ba347614 100644 --- a/paddle/platform/profiler.h +++ b/paddle/platform/profiler.h @@ -47,6 +47,7 @@ class Event { double CpuElapsedMs(const Event& e) const; double CudaElapsedMs(const Event& e) const; double MemoryUsed(const Event& e) const; + double MaxMemoryUsed(const Event& e) const; private: EventKind kind_; From 036c997dd2cfd83f6e96fd74f28e4952d5c0facc Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 30 Jan 2018 23:30:14 -0800 Subject: [PATCH 5/7] "add memory tool" --- paddle/platform/profiler.cc | 21 +++++++++------------ paddle/platform/profiler.h | 6 +++++- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index 6dbc343e8d6a1d..f011b6a19ef4bd 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -112,12 +112,6 @@ double Event::MemoryUsed(const Event& e) const { (1024 * 1024); } -double Event::MaxMemoryUsed(const Event& e) const { - return static_cast( - std::max(e.memory_used_chars_, memory_used_chars_)) / - (1024 * 1024); -} - #ifdef PADDLE_WITH_CUDA static void ForEachDevice(std::function func) { auto original_device = GetCurrentDeviceId(); @@ -285,7 +279,9 @@ void ParseEvents(std::vector>& events, : rit->CpuElapsedMs(events[i][j]); double event_memory_used = rit->MemoryUsed(events[i][j]); - double total_memory_used = rit->MaxMemoryUsed(events[i][j]); + double total_memory_used = + static_cast(rit->GetMemoryUsed()) / (1024 * 1024); + int64_t mark_time = rit->GetCpuNs(); std::string event_name = "thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); max_name_width = std::max(max_name_width, event_name.size()); @@ -293,10 +289,11 @@ void ParseEvents(std::vector>& events, if (event_idx.find(event_name) == event_idx.end()) { event_idx[event_name] = event_items.size(); EventItem event_item = {event_name, 1, + mark_time, event_time, event_time, event_time, - event_time, event_time, - total_memory_used, event_memory_used, - event_memory_used, event_memory_used}; + event_time, total_memory_used, + event_memory_used, event_memory_used, + event_memory_used}; event_items.push_back(event_item); } else { int index = event_idx[event_name]; @@ -361,10 +358,10 @@ void PrintProfiler(std::vector>& events_table, for (size_t j = 0; j < events_table[i].size(); ++j) { EventItem& event_item = events_table[i][j]; if (event_item.name == kStartProfiler) { - app_total_time -= event_item.total_time; + app_total_time -= event_item.mark_time / 1000000.0; } if (event_item.name == kStopProfiler) { - app_total_time += event_item.total_time; + app_total_time += event_item.mark_time / 1000000.0; } app_total_memory = diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h index 23fa82ba347614..949467d078ab61 100644 --- a/paddle/platform/profiler.h +++ b/paddle/platform/profiler.h @@ -47,7 +47,10 @@ class Event { double CpuElapsedMs(const Event& e) const; double CudaElapsedMs(const Event& e) const; double MemoryUsed(const Event& e) const; - double MaxMemoryUsed(const Event& e) const; + /* Get memory used attribute */ + int64_t GetMemoryUsed() const { return memory_used_chars_; } + /* Get cpu time used attribute */ + int64_t GetCpuNs() const { return cpu_ns_; } private: EventKind kind_; @@ -126,6 +129,7 @@ std::vector> GetAllEvents(); struct EventItem { std::string name; int calls; + int64_t mark_time; double total_time; double min_time; double max_time; From fa2c7a4b3ead68dbd59fd9bdf913f0acf63e8e6a Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Wed, 31 Jan 2018 19:11:33 -0800 Subject: [PATCH 6/7] "fix based on comments" --- paddle/platform/profiler.cc | 36 +++++++++++++++++------------------- paddle/platform/profiler.h | 4 +--- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index f011b6a19ef4bd..dd95010a042c34 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -258,13 +258,22 @@ void ParseEvents(std::vector>& events, std::vector> events_table; size_t max_name_width = 0; + double app_total_time = 0; + for (size_t i = 0; i < events.size(); i++) { std::list pushed_events; std::vector event_items; std::unordered_map event_idx; for (size_t j = 0; j < events[i].size(); j++) { - if (events[i][j].kind() == "push") { + if (events[i][j].kind() == "mark") { + if (events[i][j].name() == kStartProfiler) { + app_total_time -= events[i][j].GetCpuNs() / 1000000.0; + } + if (events[i][j].name() == kStopProfiler) { + app_total_time += events[i][j].GetCpuNs() / 1000000.0; + } + } else if (events[i][j].kind() == "push") { pushed_events.push_back(events[i][j]); } else if (events[i][j].kind() == "pop") { std::list::reverse_iterator rit = pushed_events.rbegin(); @@ -281,7 +290,6 @@ void ParseEvents(std::vector>& events, double event_memory_used = rit->MemoryUsed(events[i][j]); double total_memory_used = static_cast(rit->GetMemoryUsed()) / (1024 * 1024); - int64_t mark_time = rit->GetCpuNs(); std::string event_name = "thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); max_name_width = std::max(max_name_width, event_name.size()); @@ -289,11 +297,10 @@ void ParseEvents(std::vector>& events, if (event_idx.find(event_name) == event_idx.end()) { event_idx[event_name] = event_items.size(); EventItem event_item = {event_name, 1, - mark_time, event_time, event_time, event_time, - event_time, total_memory_used, - event_memory_used, event_memory_used, - event_memory_used}; + event_time, event_time, + total_memory_used, event_memory_used, + event_memory_used, event_memory_used}; event_items.push_back(event_item); } else { int index = event_idx[event_name]; @@ -328,7 +335,6 @@ void ParseEvents(std::vector>& events, // average time and memory used for (auto& item : event_items) { item.ave_time = item.total_time / item.calls; - item.ave_memory_used = item.total_memory_used / item.calls; } // sort if (sorted_by != EventSortingKey::kDefault) { @@ -346,24 +352,18 @@ void ParseEvents(std::vector>& events, } // Print report - PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12); + PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12, + app_total_time); } void PrintProfiler(std::vector>& events_table, std::string& sorted_domain, const size_t name_width, - const size_t data_width) { - double app_total_memory = .0, app_total_time = .0; + const size_t data_width, double app_total_time) { + double app_total_memory = .0; for (size_t i = 0; i < events_table.size(); ++i) { for (size_t j = 0; j < events_table[i].size(); ++j) { EventItem& event_item = events_table[i][j]; - if (event_item.name == kStartProfiler) { - app_total_time -= event_item.mark_time / 1000000.0; - } - if (event_item.name == kStopProfiler) { - app_total_time += event_item.mark_time / 1000000.0; - } - app_total_memory = std::max(event_item.total_memory_used, app_total_memory); } @@ -395,7 +395,6 @@ void PrintProfiler(std::vector>& events_table, << std::setw(data_width) << "Total Memory." << std::setw(data_width) << "Min Memory." << std::setw(data_width) << "Max Memory." - << std::setw(data_width) << "Ave Memory." << std::endl; // clang-format on @@ -413,7 +412,6 @@ void PrintProfiler(std::vector>& events_table, << std::setw(data_width) << event_item.total_memory_used << std::setw(data_width) << event_item.min_memory_used << std::setw(data_width) << event_item.max_memory_used - << std::setw(data_width) << event_item.ave_memory_used << std::endl; // clang-format on } diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h index 949467d078ab61..9755f8af1cf796 100644 --- a/paddle/platform/profiler.h +++ b/paddle/platform/profiler.h @@ -129,7 +129,6 @@ std::vector> GetAllEvents(); struct EventItem { std::string name; int calls; - int64_t mark_time; double total_time; double min_time; double max_time; @@ -137,7 +136,6 @@ struct EventItem { double total_memory_used; double min_memory_used; double max_memory_used; - double ave_memory_used; }; // Candidate keys to sort the profiling report @@ -158,7 +156,7 @@ void ParseEvents(std::vector>&, // Print results void PrintProfiler(std::vector>& events_table, std::string& sorted_domain, const size_t name_width, - const size_t data_width); + const size_t data_width, double app_total_time); } // namespace platform } // namespace paddle From 907c8c337d9db8ea3f51351bd4b595a365ee1896 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Wed, 31 Jan 2018 19:13:22 -0800 Subject: [PATCH 7/7] "fix typo" --- paddle/platform/profiler.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index dd95010a042c34..40ca1c3c6418d0 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -300,7 +300,7 @@ void ParseEvents(std::vector>& events, event_time, event_time, event_time, event_time, total_memory_used, event_memory_used, - event_memory_used, event_memory_used}; + event_memory_used}; event_items.push_back(event_item); } else { int index = event_idx[event_name];