Support experiment tracking with W&B (#213)

vwxyzjn · araffin · web-flow · commit 8e3744d94976 · 2022-03-04T19:31:16.000+01:00
* Support experiment tracking with W&amp;B

* Quick fix

* Fix CI

* Update train.py

Co-authored-by: Antonin RAFFIN &lt;antonin.raffin@ensta.org&gt;

* fix CI

* Add documentation

* Update CHANGELOG.md

Co-authored-by: Antonin RAFFIN &lt;antonin.raffin@ensta.org&gt;

* Address comments

* Update

Co-authored-by: Antonin RAFFIN &lt;antonin.raffin@ensta.org&gt;
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,5 @@ rl-trained_agents/
 htmlcov/
 git_rewrite_commit_history.sh
 .vscode/
+wandb
+runs
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@
 - Upgrade to Stable-Baselines3 (SB3) >= 1.4.1a1
 - Upgrade to sb3-contrib >= 1.4.1a1
 - Upgraded to gym 0.21
+- Support experiment tracking via Weights and Biases (@vwxyzjn)
 
 ### New Features
 
diff --git a/README.md b/README.md
@@ -196,6 +196,20 @@ Note that the default hyperparameters used in the zoo when tuning are not always
 
 When working with continuous actions, we recommend to enable [gSDE](https://arxiv.org/abs/2005.05719) by uncommenting lines in [utils/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py).
 
+
+## Experiment tracking
+
+We support tracking experiment data such as learning curves and hyperparameters via [Weights and Biases](https://wandb.ai).
+
+The following command
+```
+python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3
+```
+
+yields a tracked experiment at this [URL](https://wandb.ai/openrlbenchmark/sb3/runs/1b65ldmh).
+
+
+
 ## Env normalization
 
 In the hyperparameter file, `normalize: True` means that the training environment will be wrapped in a [VecNormalize](https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/vec_normalize.py#L13) wrapper.
diff --git a/requirements.txt b/requirements.txt
@@ -13,3 +13,4 @@ cloudpickle>=1.5.0
 plotly
 panda-gym==1.1.1 # tmp fix: until compatibility with panda-gym v2
 rliable>=1.0.5
+wandb
diff --git a/train.py b/train.py
@@ -2,6 +2,7 @@
 import difflib
 import importlib
 import os
+import time
 import uuid
 
 import gym
@@ -114,6 +115,14 @@
         help="Overwrite hyperparameter (e.g. learning_rate:0.01 train_freq:10)",
     )
     parser.add_argument("-uuid", "--uuid", action="store_true", default=False, help="Ensure that the run has a unique ID")
+    parser.add_argument(
+        "--track",
+        action="store_true",
+        default=False,
+        help="if toggled, this experiment will be tracked with Weights and Biases",
+    )
+    parser.add_argument("--wandb-project-name", type=str, default="sb3", help="the wandb's project name")
+    parser.add_argument("--wandb-entity", type=str, default=None, help="the entity (team) of wandb's project")
     args = parser.parse_args()
 
     # Going through custom gym packages to let them register in the global registory
@@ -153,6 +162,26 @@
     print("=" * 10, env_id, "=" * 10)
     print(f"Seed: {args.seed}")
 
+    if args.track:
+        try:
+            import wandb
+        except ImportError:
+            raise ImportError(
+                "if you want to use Weights & Biases to track experiment, please install W&B via `pip install wandb`"
+            )
+
+        run_name = f"{args.env}__{args.algo}__{args.seed}__{int(time.time())}"
+        run = wandb.init(
+            name=run_name,
+            project=args.wandb_project_name,
+            entity=args.wandb_entity,
+            config=vars(args),
+            sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
+            monitor_gym=True,  # auto-upload the videos of agents playing the game
+            save_code=True,  # optional
+        )
+        args.tensorboard_log = f"runs/{run_name}"
+
     exp_manager = ExperimentManager(
         args,
         args.algo,
@@ -188,11 +217,17 @@
     )
 
     # Prepare experiment and launch hyperparameter optimization if needed
-    model = exp_manager.setup_experiment()
+    results = exp_manager.setup_experiment()
+    if results is not None:
+        model, saved_hyperparams = results
+        if args.track:
+            # we need to save the loaded hyperparameters
+            args.saved_hyperparams = saved_hyperparams
+            run.config.setdefaults(vars(args))
 
-    # Normal training
-    if model is not None:
-        exp_manager.learn(model)
-        exp_manager.save_trained_model(model)
+        # Normal training
+        if model is not None:
+            exp_manager.learn(model)
+            exp_manager.save_trained_model(model)
     else:
         exp_manager.hyperparameters_optimization()
diff --git a/utils/exp_manager.py b/utils/exp_manager.py
@@ -153,7 +153,7 @@ def __init__(
         )
         self.params_path = f"{self.save_path}/{self.env_id}"
 
-    def setup_experiment(self) -> Optional[BaseAlgorithm]:
+    def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:
         """
         Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
         create the environment and possibly the model.
@@ -187,7 +187,7 @@ def setup_experiment(self) -> Optional[BaseAlgorithm]:
             )
 
         self._save_config(saved_hyperparams)
-        return model
+        return model, saved_hyperparams
 
     def learn(self, model: BaseAlgorithm) -> None:
         """

Original file line number	Diff line number	Diff line change
`@@ -153,7 +153,7 @@ def __init__(`
`153`	`153`	`)`
`154`	`154`	`self.params_path = f"{self.save_path}/{self.env_id}"`
`155`	`155`
`156`		`- def setup_experiment(self) -> Optional[BaseAlgorithm]:`
	`156`	`+ def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:`
`157`	`157`	`"""`
`158`	`158`	`Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)`
`159`	`159`	`create the environment and possibly the model.`
`@@ -187,7 +187,7 @@ def setup_experiment(self) -> Optional[BaseAlgorithm]:`
`187`	`187`	`)`
`188`	`188`
`189`	`189`	`self._save_config(saved_hyperparams)`
`190`		`- return model`
	`190`	`+ return model, saved_hyperparams`
`191`	`191`
`192`	`192`	`def learn(self, model: BaseAlgorithm) -> None:`
`193`	`193`	`"""`