Updated Monitor example to include plotting (#533)

rusu24edward · Miffyli · commit a1ab7a1c2903 · 2019-11-07T19:08:46.000+09:00
* Added the actual plotting code the examples

* Updated monitor example to actually include plot

* Adding plotting to monitor example

* Update changelog.rst

* Set timesteps back to orignal value

* Fixed indentation

* Just create a temp folder to work with windows

* Added results plotter

* Update changelog.rst
diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst
@@ -163,38 +163,38 @@ If your callback returns False, training is aborted early.
   from stable_baselines.results_plotter import load_results, ts2xy
   from stable_baselines import DDPG
   from stable_baselines.ddpg import AdaptiveParamNoiseSpec
+  from stable_baselines import results_plotter
 
 
   best_mean_reward, n_steps = -np.inf, 0
 
   def callback(_locals, _globals):
-    """
-    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
-    :param _locals: (dict)
-    :param _globals: (dict)
-    """
-    global n_steps, best_mean_reward
-    # Print stats every 1000 calls
-    if (n_steps + 1) % 1000 == 0:
-        # Evaluate policy training performance
-        x, y = ts2xy(load_results(log_dir), 'timesteps')
-        if len(x) > 0:
-            mean_reward = np.mean(y[-100:])
-            print(x[-1], 'timesteps')
-            print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))
-
-            # New best model, you could save the agent here
-            if mean_reward > best_mean_reward:
-                best_mean_reward = mean_reward
-                # Example for saving best model
-                print("Saving new best model")
-                _locals['self'].save(log_dir + 'best_model.pkl')
-    n_steps += 1
-    return True
-
+      """
+      Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
+      :param _locals: (dict)
+      :param _globals: (dict)
+      """
+      global n_steps, best_mean_reward
+      # Print stats every 1000 calls
+      if (n_steps + 1) % 1000 == 0:
+          # Evaluate policy training performance
+          x, y = ts2xy(load_results(log_dir), 'timesteps')
+          if len(x) > 0:
+              mean_reward = np.mean(y[-100:])
+              print(x[-1], 'timesteps')
+              print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))
+
+              # New best model, you could save the agent here
+              if mean_reward > best_mean_reward:
+                  best_mean_reward = mean_reward
+                  # Example for saving best model
+                  print("Saving new best model")
+                  _locals['self'].save(log_dir + 'best_model.pkl')
+      n_steps += 1
+      return True
 
   # Create log dir
-  log_dir = "/tmp/gym/"
+  log_dir = "tmp/"
   os.makedirs(log_dir, exist_ok=True)
 
   # Create and wrap the environment
@@ -206,7 +206,11 @@ If your callback returns False, training is aborted early.
   # Because we use parameter noise, we should use a MlpPolicy with layer normalization
   model = DDPG(LnMlpPolicy, env, param_noise=param_noise, verbose=0)
   # Train the agent
-  model.learn(total_timesteps=int(1e5), callback=callback)
+  time_steps = 1e5
+  model.learn(total_timesteps=int(time_steps), callback=callback)
+
+  results_plotter.plot_results([log_dir], time_steps, results_plotter.X_TIMESTEPS, "DDPG LunarLander")
+  plt.show()
 
 
 Atari Games
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -38,6 +38,7 @@ Others:
 
 Documentation:
 ^^^^^^^^^^^^^^
+- Add plotting to the Monitor example (@rusu24edward)
 - Add Snake Game AI project (@pedrohbtp)
 - Add note on the support Tensorflow versions.
 - Remove unnecessary steps required for Windows installation.