Skip to content

Commit 1da2fa2

Browse files
committed
Add tensorboard logging
1 parent 02d6f65 commit 1da2fa2

File tree

7 files changed

+141
-32
lines changed

7 files changed

+141
-32
lines changed

.gitignore

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,70 @@
1-
__pycache__
2-
*.pyc
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
*.c
9+
10+
# logs
11+
runs/
12+
checkpoints/
13+
14+
#other
15+
.DS_Store
16+
17+
# Distribution / packaging
18+
.Python
19+
env/
20+
build/
21+
develop-eggs/
22+
dist/
23+
downloads/
24+
eggs/
25+
.eggs/
26+
lib/
27+
lib64/
28+
parts/
29+
sdist/
30+
var/
31+
*.egg-info/
32+
.installed.cfg
33+
*.egg
34+
35+
# PyInstaller
36+
# Usually these files are written by a python script from a template
37+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
38+
*.manifest
39+
*.spec
40+
41+
# Installer logs
42+
pip-log.txt
43+
pip-delete-this-directory.txt
44+
45+
# Unit test / coverage reports
46+
htmlcov/
47+
.tox/
48+
.coverage
49+
.coverage.*
50+
.cache
51+
nosetests.xml
52+
coverage.xml
53+
*,cover
54+
.hypothesis/
55+
56+
# Translations
57+
*.mo
58+
*.pot
59+
60+
# Django stuff:
61+
*.log
62+
63+
# Sphinx documentation
64+
docs/_build/
65+
66+
# PyBuilder
67+
target/
68+
69+
#Ipython Notebook
70+
.ipynb_checkpoints

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ Install most recent nightly build (version '0.1.10+2fd4d08' or later) of PyTorch
2222
pip install git+https://github.com/pytorch/pytorch
2323
`
2424

25+
## Dependencies
26+
* pytorch
27+
* torchvision
28+
* universe (for now)
29+
* [tensorboard logger](https://github.com/TeamHG-Memex/tensorboard_logger)
30+
2531
## Results
2632

2733
With 16 processes it converges for PongDeterministic-v3 in 15 minutes.

main.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,23 @@
44
import os
55
import sys
66
import math
7+
import time
78

89
import torch
910
import torch.optim as optim
1011
import torch.multiprocessing as mp
1112
import torch.nn as nn
1213
import torch.nn.functional as F
14+
import tensorboard_logger as tb
15+
16+
import my_optim
1317
from envs import create_atari_env
1418
from model import ActorCritic
1519
from train import train
1620
from test import test
1721
from utils import logger
18-
import my_optim
22+
from utils.shared_memory import SharedCounter
23+
1924

2025
logger = logger.getLogger('main')
2126

@@ -41,16 +46,27 @@
4146
help='environment to train on (default: PongDeterministic-v3)')
4247
parser.add_argument('--no-shared', default=False, metavar='O',
4348
help='use an optimizer without shared momentum.')
44-
parser.add_argument('--max-iters', type=int, default=math.inf,
45-
help='maximum iterations per process.')
46-
49+
parser.add_argument('--max-episode-count', type=int, default=math.inf,
50+
help='maximum number of episodes to run per process.')
4751
parser.add_argument('--debug', action='store_true', default=False,
4852
help='run in a way its easier to debug')
53+
parser.add_argument('--short-description', default='no_descr',
54+
help='Short description of the run params, (used in tensorboard)')
55+
56+
def setup_loggings(args):
57+
logger.debug('CONFIGURATION: {}'.format(args))
58+
59+
cur_path = os.path.dirname(os.path.realpath(__file__))
60+
args.summ_base_dir = (cur_path+'/runs/{}/{}({})').format(args.env_name,
61+
time.strftime('%d.%m-%H.%M'), args.short_description)
62+
logger.info('logging run logs to {}'.format(args.summ_base_dir))
63+
tb.configure(args.summ_base_dir)
4964

5065
if __name__ == '__main__':
5166
args = parser.parse_args()
52-
67+
setup_loggings(args)
5368
torch.manual_seed(args.seed)
69+
5470
env = create_atari_env(args.env_name)
5571
shared_model = ActorCritic(
5672
env.observation_space.shape[0], env.action_space)
@@ -61,20 +77,23 @@
6177
else:
6278
optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr)
6379
optimizer.share_memory()
64-
80+
81+
gl_step_cnt = SharedCounter()
6582

6683
if not args.debug:
6784
processes = []
6885

69-
p = mp.Process(target=test, args=(args.num_processes, args, shared_model))
86+
p = mp.Process(target=test, args=(args.num_processes, args,
87+
shared_model, gl_step_cnt))
7088
p.start()
7189
processes.append(p)
7290
for rank in range(0, args.num_processes):
73-
p = mp.Process(target=train, args=(rank, args, shared_model, optimizer))
91+
p = mp.Process(target=train, args=(rank, args, shared_model,
92+
gl_step_cnt, optimizer))
7493
p.start()
7594
processes.append(p)
7695
for p in processes:
7796
p.join()
7897
else: ## debug is enabled
7998
# run only one process in a main, easier to debug
80-
train(0, args, shared_model, optimizer)
99+
train(0, args, shared_model, gl_step_cnt, optimizer)

model.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,8 @@ def __init__(self, num_inputs, action_space):
4444
self.lstm = nn.LSTMCell(32 * 3 * 3, 256)
4545

4646
num_outputs = action_space.n
47-
4847
self.critic_linear = nn.Linear(256, 1)
4948
self.actor_linear = nn.Linear(256, num_outputs)
50-
#self.critic_linear = nn.Linear(288, 1)
51-
#self.actor_linear = nn.Linear(288, num_outputs)
52-
5349
self.apply(weights_init)
5450
self.actor_linear.weight.data = normalized_columns_initializer(
5551
self.actor_linear.weight.data, 0.01)

test.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
11
import math
22
import os
33
import sys
4+
import time
45

56
import torch
67
import torch.nn.functional as F
78
import torch.optim as optim
9+
import tensorboard_logger as tb
10+
811
from envs import create_atari_env
912
from model import ActorCritic
1013
from torch.autograd import Variable
1114
from torchvision import datasets, transforms
12-
import time
1315
from collections import deque
1416
from utils import logger
1517

1618
logger = logger.getLogger('test')
1719

18-
def test(rank, args, shared_model):
20+
def test(rank, args, shared_model, gl_step_cnt):
1921
torch.manual_seed(args.seed + rank)
2022

2123
env = create_atari_env(args.env_name)
@@ -32,6 +34,8 @@ def test(rank, args, shared_model):
3234

3335
start_time = time.time()
3436

37+
local_episode_num = 0
38+
3539
# a quick hack to prevent the agent from stucking
3640
actions = deque(maxlen=100)
3741
episode_length = 0
@@ -61,10 +65,17 @@ def test(rank, args, shared_model):
6165
done = True
6266

6367
if done:
68+
passed_time = time.time() - start_time
69+
local_episode_num += 1
70+
global_step_count = gl_step_cnt.get_value()
71+
6472
logger.info("Time {}, episode reward {}, episode length {}".format(
6573
time.strftime("%Hh %Mm %Ss",
66-
time.gmtime(time.time() - start_time)),
74+
time.gmtime(passed_time)),
6775
reward_sum, episode_length))
76+
tb.log_value('steps_second', global_step_count / passed_time, global_step_count)
77+
tb.log_value('reward', reward_sum, global_step_count)
78+
6879
reward_sum = 0
6980
episode_length = 0
7081
actions.clear()

train.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
import math
22
import os
33
import sys
4-
import resource
5-
import gc
64

75
import torch
86
import torch.nn.functional as F
97
import torch.optim as optim
8+
109
from envs import create_atari_env
1110
from model import ActorCritic
1211
from torch.autograd import Variable
@@ -21,7 +20,7 @@ def ensure_shared_grads(model, shared_model):
2120
return
2221
shared_param._grad = param.grad
2322

24-
def train(rank, args, shared_model, optimizer=None):
23+
def train(rank, args, shared_model, gl_step_count, optimizer=None):
2524
torch.manual_seed(args.seed + rank)
2625

2726
env = create_atari_env(args.env_name)
@@ -39,8 +38,7 @@ def train(rank, args, shared_model, optimizer=None):
3938
done = True
4039

4140
episode_length = 0
42-
43-
iteration = 0
41+
episode_count = 0
4442

4543
while True:
4644

@@ -49,17 +47,11 @@ def train(rank, args, shared_model, optimizer=None):
4947
rewards = []
5048
entropies = []
5149

52-
if iteration == args.max_iters:
53-
logger.info('Max iteration {} reached..'.format(args.max_iters))
50+
if episode_count == args.max_episode_count:
51+
logger.info('Maxiumum episode count {} reached..'.format(args.max_episode_count))
52+
# TODO make sure if no train process is running test.py closes as well
5453
break
5554

56-
if iteration % 200 == 0 and rank == 0:
57-
mem_used = int(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
58-
mem_used_mb = mem_used / 1024
59-
logger.info('Memory usage of one proc: {} (mb)'.format(mem_used_mb))
60-
61-
62-
iteration += 1
6355
episode_length += 1
6456

6557
# Sync with the shared model
@@ -89,6 +81,7 @@ def train(rank, args, shared_model, optimizer=None):
8981

9082
if done:
9183
episode_length = 0
84+
episode_count += 1
9285
state = env.reset()
9386

9487
state = torch.from_numpy(state)
@@ -99,6 +92,9 @@ def train(rank, args, shared_model, optimizer=None):
9992
if done:
10093
break
10194

95+
# increment global step count
96+
gl_step_count.increment_by(step)
97+
10298
R = torch.zeros(1, 1)
10399
if not done:
104100
value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx)))

utils/shared_memory.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from multiprocessing import Value, Lock
2+
3+
class SharedCounter:
4+
def __init__(self):
5+
self.lock = Lock()
6+
self.n = Value('i', 0)
7+
8+
def increment_by(self, k):
9+
with self.lock:
10+
self.n.value += k
11+
12+
def get_value(self):
13+
return self.n.value

0 commit comments

Comments
 (0)