Skip to content

Commit

Permalink
vd2: swap+pool
Browse files Browse the repository at this point in the history
new class of pool: SwapPool
important APIs: PoolOpt(), Malloc(), Free()
PoolOpt() takes in M/F sequences including those induced by swapping
cross-iteration variables and last iteration case solved.

record down MF after swap done, for one iteration
  • Loading branch information
junzhezhang committed Sep 19, 2018
1 parent 383fffe commit 2fb3f02
Show file tree
Hide file tree
Showing 7 changed files with 376 additions and 225 deletions.
4 changes: 2 additions & 2 deletions examples/cifar10/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
dev = device.get_default_device()
else:
print('Using GPU')
dev = device.create_cuda_gpu_on(1)
dev = device.create_cuda_gpu_on(0)

net.to_device(dev)
opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
Expand All @@ -153,7 +153,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
fileTimeLog.write('Epoch %d: ' % epoch)
fileTimeLog.write(str(int(round(time.time()*1000))))
fileTimeLog.write('\n')
for b in range(15): #num_train_batch):
for b in range(20): #num_train_batch):
print ("start of iteration %d: " %b)
#time.sleep(1)
fileTimeLog.write('iteration %d: ' % b)
Expand Down
1 change: 1 addition & 0 deletions include/singa/core/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ class SwapGPU : public Device {
//vec_block
vector<string>vec_block; //iteration 0-3
vector<string>vec_block_fresh; //iteration 4 5 6
vector<string>vec_block_mf; //itr 8 9 10
vector<double>global_load; // from begining
vector<double>origin_load; //vec_load 3 itr. TODO(junzhe) to delete vec_load, global_load after use.
vector<onePieceMsg>vec_run;
Expand Down
36 changes: 27 additions & 9 deletions include/singa/core/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class DeviceMemPool {
virtual void Malloc(void** ptr, const size_t size) = 0;
virtual void Free(void* ptr) = 0;
virtual void Append(string blockInfo) = 0;

virtual void PoolOpt(vector<string> &vec_mf) = 0;

virtual void SwapOut(void* data_) = 0;
virtual void SwapIn(void* data_) = 0;
Expand All @@ -74,7 +76,9 @@ class CnMemPool : public DeviceMemPool {

void Malloc(void** ptr, const size_t size);
void Free(void* ptr);
void Append(string blockInfo){}
void Append(string blockInfo){}

void PoolOpt(vector<string> &vec_mf) override {}

void SwapOut(void* data_) override {}
void SwapIn(void* data_) override {}
Expand Down Expand Up @@ -102,7 +106,9 @@ class CudaMemPool : public DeviceMemPool {
public:
void Malloc(void** ptr, const size_t size) override;
void Free(void* ptr) override;
void Append(string blockInfo){}
void Append(string blockInfo){}

void PoolOpt(vector<string> &vec_mf) override {}

void SwapOut(void* data_) override {}
void SwapIn(void* data_) override {}
Expand Down Expand Up @@ -134,9 +140,11 @@ class SmartMemPool: public DeviceMemPool {
void getMaxLoad(void);
std::pair<size_t, size_t> GetMemUsage() override;
void Append(string blockInfo);

void PoolOpt(vector<string> &vec_mf) override {}

void SwapOut(void* data_) override {}
void SwapIn(void* data_) override {}
void SwapOut(void* data_) override {}
void SwapIn(void* data_) override {}
protected:
void Init();
private:
Expand Down Expand Up @@ -196,19 +204,22 @@ struct SwapMeta{
void* d_ptr; //not used for
};

class Swap : public DeviceMemPool {
class SwapPool : public DeviceMemPool {
public:
Swap(const MemPoolConf &conf); //constructor
SwapPool(const MemPoolConf &conf); //constructor
//TODO(junzhe) in Singa, void Malloc( void**, size_t); change to cudaMalloc and cudaFree.
void Malloc(void** ptr, const size_t size);
void Free(void* ptr);
~Swap();
~SwapPool();
void getMaxLoad(void);
std::pair<size_t, size_t> GetMemUsage() override;
void Append(string blockInfo);

void SwapOut(void* data_);
void SwapIn(void* data_);

//PoolOpt() construct pool based on MF info after Swap constructed.
void PoolOpt(vector<string> &vec_mf);
protected:
void Init();
private:
Expand All @@ -219,8 +230,15 @@ class Swap : public DeviceMemPool {
std::mutex mtx_;
vector<string> vec_block;
size_t swapLimit = 1<<23; //8MB
map<void*,swapLookUpElement>Table_id2LookUpElement; //old TODO(junzhe) remove
map<void*,pair<SwapMeta,SwapMeta>>Table_Meta;
int poolFlag = 0;
int pc = 0;
int maxLen_mf = 0;
void* ptrPool = nullptr;
map<void*,int>Table_p2r; //ptr for arrival idx, for look up Table during free
map<int,lookUpElement>Table_r2v; //r-> vertex
vector<pair<int,lookUpElement>>Vec_r2Ver; //Table_r2Ver No need anymore, replaced by Table_r2v TODO(junzhe)
// map<void*,swapLookUpElement>Table_id2LookUpElement; //old TODO(junzhe) remove
// map<void*,pair<SwapMeta,SwapMeta>>Table_Meta;
};

#endif
Expand Down
2 changes: 1 addition & 1 deletion src/core/device/cuda_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ const int kNumCudaStream = 1;
CudaGPU::CudaGPU(int id) : Device(id, kNumCudaStream) {
MemPoolConf conf;
conf.add_device(id);
pool_ = std::make_shared<Swap>(conf);
pool_ = std::make_shared<CnMemPool>(conf);
Setup();
}

Expand Down
2 changes: 1 addition & 1 deletion src/core/device/platform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ Platform::CreateCudaGPUsOn(const vector<int> &devices, size_t init_size) {
conf.add_device(device);
CHECK_LE(bytes, Platform::GetGPUMemSize(device).first);
}
auto pool = std::make_shared<CnMemPool>(conf);
auto pool = std::make_shared<SwapPool>(conf);

vector<shared_ptr<Device> > ret;
for (auto device : devices) {
Expand Down
89 changes: 86 additions & 3 deletions src/core/device/swap_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ const cudaMemcpyKind copyKind[] = {cudaMemcpyHostToHost, cudaMemcpyHostToDevice,

///functions to be used
///Section for structs and respective sorting function:
// onePieceMsg, onePairMsg, oneIterMsg, version 11/30 3pm



Expand Down Expand Up @@ -924,7 +923,8 @@ SwapGPU::SwapGPU(int id) : Device(id, kNumCudaStream) {

MemPoolConf conf;
conf.add_device(id);
pool_ = std::make_shared<Swap>(conf);
//TODO(junzhe) note that it has been <Swap> for building SwapGPU, which doesnt matter.
pool_ = std::make_shared<SwapPool>(conf);
Setup();

}
Expand Down Expand Up @@ -987,6 +987,26 @@ void* SwapGPU::Malloc(int size) {
if (size > 0) {
CUDA_CHECK(cudaSetDevice(id_));
pool_->Malloc((void**)&ptr, size);

///append vec_block_mf
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
&& ((gc - maxLen) >= three_more_globeCounter)){
string tempStr1 ="Malloc ";
stringstream strm2;
strm2<<ptr;
string tempStr2 = strm2.str();
stringstream strm3;
strm3<<size;
string tempStr3 = strm3.str();
string temp = tempStr1+tempStr2+" "+tempStr3;
vec_block_mf.push_back(temp);
}
//record mf semantics after swap plan done
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)){
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
file_mf_one_itr<<"Malloc "<<ptr<<" "<<size;
file_mf_one_itr<<endl;
}
// TODO(wangwei) remove the memset.
CUDA_CHECK(cudaMemset(ptr, 0, size));
}
Expand All @@ -1000,6 +1020,21 @@ void SwapGPU::Free(void* ptr) {
if (ptr != nullptr) {
CUDA_CHECK(cudaSetDevice(id_));
pool_->Free(ptr);
///append vec_block_mf
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
&& ((gc - maxLen) >= three_more_globeCounter)){
string tempStr1 ="Free ";
stringstream strm2;
strm2<<ptr;
string tempStr2 = strm2.str();
string temp = tempStr1+tempStr2;
vec_block_mf.push_back(temp);
}

if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)){
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
file_mf_one_itr<<"Free "<<ptr<<endl;
}
}

//cout<<"free done"<<endl;
Expand Down Expand Up @@ -1115,6 +1150,21 @@ void SwapGPU::DeploySwap_exec(int r_gc){
last_meta.block_->update_data(nullptr);
// cout<<"to free data_"<<last_meta.data_<<endl;
pool_->Free(last_meta.data_);
///append vec_block_mf
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
&& ((gc - maxLen) >= three_more_globeCounter)){
string tempStr1 ="Free ";
stringstream strm2;
strm2<<last_meta.data_;
string tempStr2 = strm2.str();
string temp = tempStr1+tempStr2;
vec_block_mf.push_back(temp);
}

if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)){
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
file_mf_one_itr<<"Free "<<last_meta.data_<<" SwapOut(Sync)"<<endl;
}
last_meta.data_ = nullptr; //not really needed TODO(junzhe)
cout<<"----sync out "<<sync_idx<<endl;
Table_meta.find(sync_idx)->second = last_meta;
Expand Down Expand Up @@ -1213,8 +1263,24 @@ void SwapGPU::Append(string blockInfo){

//test moved from start of malloc/free to end of append, only gc+1 changed
Test_sched_switch_swap();
//NOTE: this gc++ includes read/write and AppendLayer as well, in addition to malloc/free.
//NOTE: this gc includes read/write and AppendLayer as well, in addition to malloc/free.
gc++;
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) == three_more_globeCounter)){
cout<<"==================to call PoolOpt"<<endl;
fstream file_mf_8910("mf_8910.csv", ios::in|ios::out|ios::app);
for (int i = 0; i< vec_block_mf.size();i++){
file_mf_8910<<vec_block_mf[i]<<endl;
}
cout<<"len of vec_block_mf: "<<vec_block_mf.size()<<endl;
pool_->PoolOpt(vec_block_mf);
cout<<"==================to call PoolOpt done"<<endl;
}

if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
&& ((gc - three_more_globeCounter)%maxLen == 0)){
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
file_mf_one_itr<<"-----new itr------"<<endl;
}

}

Expand Down Expand Up @@ -1297,6 +1363,23 @@ void SwapGPU::SwapIn_idx(const int r_idx){
//cout<<"update block and data of r_idx: "<<r_idx<<' '<<meta.block_<<' '<<meta.data_<<endl;
void* ptr = nullptr;
pool_->Malloc((void**)&ptr, meta.size);
///append vec_block_mf
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
&& ((gc - maxLen) >= three_more_globeCounter)){
string tempStr1 ="Malloc ";
stringstream strm2;
strm2<<ptr;
string tempStr2 = strm2.str();
stringstream strm3;
strm3<<meta.size;
string tempStr3 = strm3.str();
string temp = tempStr1+tempStr2+" "+tempStr3;
vec_block_mf.push_back(temp);
}
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)){
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
file_mf_one_itr<<"Malloc "<<ptr<<" "<<meta.size<<" swapIn"<<endl;
}
//cout<<"expected results update_data:: "<<meta.block_<<" "<<ptr<<endl;
//cout<<"malloc due to swapIn ("<<r_idx<<") "<<ptr<<endl;
//void* to_rm_ptr = meta.data_;
Expand Down
Loading

0 comments on commit 2fb3f02

Please sign in to comment.