Added ppo hyperparams for 10 mujoco envs (#155)

SonSang · araffin · web-flow · commit 0955752a5db6 · 2021-09-01T21:24:36.000+02:00
Co-authored-by: Antonin RAFFIN &lt;antonin.raffin@ensta.org&gt;
diff --git a/hyperparams/ppo.yml b/hyperparams/ppo.yml
@@ -343,3 +343,185 @@ CarRacing-v0:
   policy_kwargs: "dict(log_std_init=-2,
                        ortho_init=False,
                        )"
+
+# Tuned
+# 10 mujoco envs
+Ant-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e7
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.98
+  learning_rate: 1.90609e-05
+  ent_coef: 4.9646e-07
+  clip_range: 0.1
+  n_epochs: 10
+  gae_lambda: 0.8
+  max_grad_norm: 0.6
+  vf_coef: 0.677239
+
+HalfCheetah-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 64
+  n_steps: 512
+  gamma: 0.98
+  learning_rate: 2.0633e-05
+  ent_coef: 0.000401762
+  clip_range: 0.1
+  n_epochs: 20
+  gae_lambda: 0.92
+  max_grad_norm: 0.8
+  vf_coef: 0.58096
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+Hopper-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.999
+  learning_rate: 9.80828e-05
+  ent_coef: 0.00229519
+  clip_range: 0.2
+  n_epochs: 5
+  gae_lambda: 0.99
+  max_grad_norm: 0.7
+  vf_coef: 0.835671
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+HumanoidStandup-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e7
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.99
+  learning_rate: 2.55673e-05
+  ent_coef: 3.62109e-06
+  clip_range: 0.3
+  n_epochs: 20
+  gae_lambda: 0.9
+  max_grad_norm: 0.7
+  vf_coef: 0.430793
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+Humanoid-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e7
+  batch_size: 256
+  n_steps: 512
+  gamma: 0.95
+  learning_rate: 3.56987e-05
+  ent_coef: 0.00238306
+  clip_range: 0.3
+  n_epochs: 5
+  gae_lambda: 0.9
+  max_grad_norm: 2
+  vf_coef: 0.431892
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+InvertedDoublePendulum-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 512
+  n_steps: 128
+  gamma: 0.98
+  learning_rate: 0.000155454
+  ent_coef: 1.05057e-06
+  clip_range: 0.4
+  n_epochs: 10
+  gae_lambda: 0.8
+  max_grad_norm: 0.5
+  vf_coef: 0.695929
+
+InvertedPendulum-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 64
+  n_steps: 32
+  gamma: 0.999
+  learning_rate: 0.000222425
+  ent_coef: 1.37976e-07
+  clip_range: 0.4
+  n_epochs: 5
+  gae_lambda: 0.9
+  max_grad_norm: 0.3
+  vf_coef: 0.19816
+
+Reacher-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.9
+  learning_rate: 0.000104019
+  ent_coef: 7.52585e-08
+  clip_range: 0.3
+  n_epochs: 5
+  gae_lambda: 1.0
+  max_grad_norm: 0.9
+  vf_coef: 0.950368
+
+Swimmer-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.9999
+  learning_rate: 5.49717e-05
+  ent_coef: 0.0554757
+  clip_range: 0.3
+  n_epochs: 10
+  gae_lambda: 0.95
+  max_grad_norm: 0.6
+  vf_coef: 0.38782
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+Walker2d-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.99
+  learning_rate: 5.05041e-05
+  ent_coef: 0.000585045
+  clip_range: 0.1
+  n_epochs: 20
+  gae_lambda: 0.95
+  max_grad_norm: 1
+  vf_coef: 0.871923