forked from PaddlePaddle/models
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcluster_train.sh
62 lines (57 loc) · 1.48 KB
/
cluster_train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
#export GLOG_v=30
#export GLOG_logtostderr=1
# start pserver0
python cluster_train.py \
--train_dir train_data \
--model_dir cluster_model \
--vocab_path vocab.txt \
--batch_size 5 \
--is_local 0 \
--role pserver \
--endpoints 127.0.0.1:6000,127.0.0.1:6001 \
--current_endpoint 127.0.0.1:6000 \
--trainers 2 \
> pserver0.log 2>&1 &
# start pserver1
python cluster_train.py \
--train_dir train_data \
--model_dir cluster_model \
--vocab_path vocab.txt \
--batch_size 5 \
--is_local 0 \
--role pserver \
--endpoints 127.0.0.1:6000,127.0.0.1:6001 \
--current_endpoint 127.0.0.1:6001 \
--trainers 2 \
> pserver1.log 2>&1 &
# start trainer0
#CUDA_VISIBLE_DEVICES=1 python cluster_train.py \
python cluster_train.py \
--train_dir train_data \
--model_dir cluster_model \
--vocab_path vocab.txt \
--batch_size 5 \
--print_batch 10 \
--use_cuda 0 \
--is_local 0 \
--role trainer \
--endpoints 127.0.0.1:6000,127.0.0.1:6001 \
--trainers 2 \
--trainer_id 0 \
> trainer0.log 2>&1 &
# start trainer1
#CUDA_VISIBLE_DEVICES=2 python cluster_train.py \
python cluster_train.py \
--train_dir train_data \
--model_dir cluster_model \
--vocab_path vocab.txt \
--batch_size 5 \
--print_batch 10 \
--use_cuda 0 \
--is_local 0 \
--role trainer \
--endpoints 127.0.0.1:6000,127.0.0.1:6001 \
--trainers 2 \
--trainer_id 1 \
> trainer1.log 2>&1 &