Skip to content

Commit a1ab7a1

Browse files
rusu24edwardMiffyli
authored andcommitted
Updated Monitor example to include plotting (#533)
* Added the actual plotting code the examples * Updated monitor example to actually include plot * Adding plotting to monitor example * Update changelog.rst * Set timesteps back to orignal value * Fixed indentation * Just create a temp folder to work with windows * Added results plotter * Update changelog.rst
1 parent a71f9db commit a1ab7a1

File tree

2 files changed

+31
-26
lines changed

2 files changed

+31
-26
lines changed

docs/guide/examples.rst

+30-26
Original file line numberDiff line numberDiff line change
@@ -163,38 +163,38 @@ If your callback returns False, training is aborted early.
163163
from stable_baselines.results_plotter import load_results, ts2xy
164164
from stable_baselines import DDPG
165165
from stable_baselines.ddpg import AdaptiveParamNoiseSpec
166+
from stable_baselines import results_plotter
166167
167168
168169
best_mean_reward, n_steps = -np.inf, 0
169170
170171
def callback(_locals, _globals):
171-
"""
172-
Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
173-
:param _locals: (dict)
174-
:param _globals: (dict)
175-
"""
176-
global n_steps, best_mean_reward
177-
# Print stats every 1000 calls
178-
if (n_steps + 1) % 1000 == 0:
179-
# Evaluate policy training performance
180-
x, y = ts2xy(load_results(log_dir), 'timesteps')
181-
if len(x) > 0:
182-
mean_reward = np.mean(y[-100:])
183-
print(x[-1], 'timesteps')
184-
print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))
185-
186-
# New best model, you could save the agent here
187-
if mean_reward > best_mean_reward:
188-
best_mean_reward = mean_reward
189-
# Example for saving best model
190-
print("Saving new best model")
191-
_locals['self'].save(log_dir + 'best_model.pkl')
192-
n_steps += 1
193-
return True
194-
172+
"""
173+
Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
174+
:param _locals: (dict)
175+
:param _globals: (dict)
176+
"""
177+
global n_steps, best_mean_reward
178+
# Print stats every 1000 calls
179+
if (n_steps + 1) % 1000 == 0:
180+
# Evaluate policy training performance
181+
x, y = ts2xy(load_results(log_dir), 'timesteps')
182+
if len(x) > 0:
183+
mean_reward = np.mean(y[-100:])
184+
print(x[-1], 'timesteps')
185+
print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))
186+
187+
# New best model, you could save the agent here
188+
if mean_reward > best_mean_reward:
189+
best_mean_reward = mean_reward
190+
# Example for saving best model
191+
print("Saving new best model")
192+
_locals['self'].save(log_dir + 'best_model.pkl')
193+
n_steps += 1
194+
return True
195195
196196
# Create log dir
197-
log_dir = "/tmp/gym/"
197+
log_dir = "tmp/"
198198
os.makedirs(log_dir, exist_ok=True)
199199
200200
# Create and wrap the environment
@@ -206,7 +206,11 @@ If your callback returns False, training is aborted early.
206206
# Because we use parameter noise, we should use a MlpPolicy with layer normalization
207207
model = DDPG(LnMlpPolicy, env, param_noise=param_noise, verbose=0)
208208
# Train the agent
209-
model.learn(total_timesteps=int(1e5), callback=callback)
209+
time_steps = 1e5
210+
model.learn(total_timesteps=int(time_steps), callback=callback)
211+
212+
results_plotter.plot_results([log_dir], time_steps, results_plotter.X_TIMESTEPS, "DDPG LunarLander")
213+
plt.show()
210214
211215
212216
Atari Games

docs/misc/changelog.rst

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Others:
3838

3939
Documentation:
4040
^^^^^^^^^^^^^^
41+
- Add plotting to the Monitor example (@rusu24edward)
4142
- Add Snake Game AI project (@pedrohbtp)
4243
- Add note on the support Tensorflow versions.
4344
- Remove unnecessary steps required for Windows installation.

0 commit comments

Comments
 (0)