diff --git a/include/singa/core/common.h b/include/singa/core/common.h index 47c1068db0..ee6f07ce3d 100644 --- a/include/singa/core/common.h +++ b/include/singa/core/common.h @@ -53,6 +53,9 @@ typedef struct _Opencl { } Opencl; } // namespace lang class Device; +struct DeviceOptInfoToAppend; + + /// Block represent a chunk of memory (on device or host). class Block { public: @@ -97,6 +100,16 @@ class Block { std::atomic ref_count_; }; +// struct for Append purpose in device class. +struct DeviceOptInfoToAppend{ + string operation_type; + string block_ptr; + int size; + long t = (std::chrono::system_clock::now()).time_since_epoch().count(); + + DeviceOptInfoToAppend(string opt_type, string ptr,int s):operation_type(opt_type),block_ptr(ptr),size(s){} +}; + typedef struct _Context { std::mt19937 random_generator; #ifdef USE_CUDA diff --git a/include/singa/core/device.h b/include/singa/core/device.h index e9dcc1402d..7d9ed57757 100644 --- a/include/singa/core/device.h +++ b/include/singa/core/device.h @@ -66,7 +66,6 @@ class Device { /// Called by Tensor. void FreeBlock(Block* block); - void AppendInfo(string block_info); void* UpdateGpuPtrInfo(const Block* block_ptr); /// Return the size (bytes) of memory in use @@ -107,7 +106,7 @@ class Device { int id() const { return id_; } virtual void* UpdateGpuPtr(const Block* block_ptr) = 0; - + virtual void Append(DeviceOptInfoToAppend dev_opt_info) = 0; private: Device() {}; @@ -124,7 +123,7 @@ class Device { /// Free device memory. virtual void Free(void* ptr) = 0; virtual void AppendAfterMalloc(Block* block,void* data_ptr,int size) = 0; - virtual void Append(string block_info) = 0; + protected: int id_ = 0; @@ -154,6 +153,7 @@ class CppCPU : public Device { std::shared_ptr host() const override { return defaultDevice;} void SetRandSeed(unsigned seed) override; + void Append(DeviceOptInfoToAppend dev_opt_info) override {} protected: void DoExec(function&& fn, int executor) override; @@ -167,7 +167,7 @@ class CppCPU : public Device { /// Free cpu memory. void Free(void* ptr) override; void AppendAfterMalloc(Block* block,void* data_ptr,int size) override {} - void Append(string block_info) override {} + void* UpdateGpuPtr(const Block* block_ptr) override {} }; @@ -188,6 +188,8 @@ class CudaGPU : public Device { void SetRandSeed(unsigned seed) override; size_t GetAllocatedMem() override; + void Append(DeviceOptInfoToAppend dev_opt_info) override {} + protected: void DoExec(function&& fn, int executor) override; @@ -201,7 +203,6 @@ class CudaGPU : public Device { /// Free cpu memory. void Free(void* ptr) override; void AppendAfterMalloc(Block* block,void* data_ptr,int size) override {} - void Append(string block_info) override; void* UpdateGpuPtr(const Block* block_ptr) override; private: @@ -284,6 +285,8 @@ class SwapGPU : public Device { void SetRandSeed(unsigned seed) override; size_t GetAllocatedMem() override; + //Append at every index: free, read, mutable + void Append(DeviceOptInfoToAppend dev_opt_info) override; protected: void DoExec(function&& fn, int executor) override; @@ -295,10 +298,7 @@ class SwapGPU : public Device { void* Malloc(int size) override; /// Free cpu memory. - void Free(void* ptr) override; - - //Append at every index: free, read, mutable - void Append(string block_info) override; + void Free(void* ptr) override; //append info after Malloc, as Block* is not available till Malloc() done. void AppendAfterMalloc(Block* block,void* data_ptr,int size) override; @@ -408,7 +408,7 @@ class OpenclDevice : public singa::Device { virtual void CopyDataToFrom(Block* dst, Block* src, size_t nBytes, CopyDirection direction, int dst_offset = 0, int src_offset = 0) override; - + void Append(DeviceOptInfoToAppend dev_opt_info) override {} protected: /// The OpenCL device that this object represents. /// Each OpenclDevice contains exactly one cl::Device for the lifetime of the @@ -439,7 +439,7 @@ class OpenclDevice : public singa::Device { /// This has the effect of freeing up device memory. void Free(void* ptr) override; void AppendAfterMalloc(Block* block,void* data_ptr,int size) override {} - void Append(string block_info) override {} + void* UpdateGpuPtr(const Block* block_ptr) override {} diff --git a/src/core/common/common.cc b/src/core/common/common.cc index d6e9c5a301..692c1c451f 100644 --- a/src/core/common/common.cc +++ b/src/core/common/common.cc @@ -30,15 +30,13 @@ void* Block::mutable_data() { //Append block info: opt_type, ptr, time_stamp if (ptr_device_!=nullptr){ - stringstream strm2; - strm2<AppendInfo(temp); + stringstream strm; + strm<Append(dev_opt_info); } //update ptr after swap in done, if variable is not swapped back yet as expected. @@ -56,16 +54,13 @@ const void* Block::data() const { //Append block info: opt_type, ptr, time_stamp if (ptr_device_!=nullptr){ - //Append info. - stringstream strm2; - strm2<AppendInfo(temp); + stringstream strm; + strm<Append(dev_opt_info); } //update ptr after swap in done, if variable is not swapped back yet as expected. diff --git a/src/core/device/cuda_gpu.cc b/src/core/device/cuda_gpu.cc index 7ec8a9deb5..523986f4f7 100644 --- a/src/core/device/cuda_gpu.cc +++ b/src/core/device/cuda_gpu.cc @@ -123,10 +123,6 @@ void CudaGPU::Free(void* ptr) { } } -void CudaGPU::Append(string blockInfo){ - pool_->Append(blockInfo); -} - void* CudaGPU::UpdateGpuPtr(const Block* block_){ return nullptr; } diff --git a/src/core/device/device.cc b/src/core/device/device.cc index 59faddc5c6..5a1ac270ac 100644 --- a/src/core/device/device.cc +++ b/src/core/device/device.cc @@ -55,23 +55,18 @@ void Device::FreeBlock(Block* block) { Free(tempPtr); //append block info for free operation. - stringstream strm1; - strm1<size()); + auto t = (std::chrono::system_clock::now()).time_since_epoch().count(); + dev_opt_info.t = t; + Append(dev_opt_info); delete block; } } -void Device::AppendInfo(string blockInfo){ - Append(blockInfo); -} void* Device::UpdateGpuPtrInfo(const Block* block_){ return UpdateGpuPtr(block_); diff --git a/src/core/device/swap_gpu.cc b/src/core/device/swap_gpu.cc index 85a4061f30..4228f6e16a 100644 --- a/src/core/device/swap_gpu.cc +++ b/src/core/device/swap_gpu.cc @@ -904,18 +904,13 @@ void SwapGPU::AppendAfterMalloc(Block* block_ptr,void* data_ptr,int size){ */ //append info - stringstream strm1; - strm1< v = SplitOptString(block_info, " "); + //convert block_ptr from string to Block* void* temp_ptr; - stringstream convert(v[1]); + stringstream convert(dev_opt_info.block_ptr); convert>>temp_ptr; auto block_ptr = static_cast(temp_ptr); - - // insert size, malloc : flag, block_, size, t; others: insert size t. - if (v.size() != 4) { - stringstream strm1; - strm1<size(); - string temp_str1 = strm1.str(); - block_info = v[0] + ' ' + v[1] + ' ' + temp_str1 + ' ' + v[2]; - } // update global load if (iteration_length < iteration_length_threshold){ - if (v[0] == "Malloc"){ + if (dev_opt_info.operation_type == "Malloc"){ if (global_load.size()>0){ global_load.push_back(global_load[global_load.size()-1]+block_ptr->size()); } else { global_load.push_back(block_ptr->size()); } - } else if (v[0] == "Free"){ + } else if (dev_opt_info.operation_type == "Free"){ global_load.push_back(global_load[global_load.size()-1]-block_ptr->size()); } else { global_load.push_back(global_load[global_load.size()-1]); @@ -1037,6 +1013,15 @@ void SwapGPU::Append(string block_info){ } //append into vec_block + stringstream strm1; + strm1<