Skip to content

Commit 0955752

Browse files
SonSangaraffin
andauthored
Added ppo hyperparams for 10 mujoco envs (#155)
Co-authored-by: Antonin RAFFIN <[email protected]>
1 parent 6cac948 commit 0955752

File tree

1 file changed

+182
-0
lines changed

1 file changed

+182
-0
lines changed

hyperparams/ppo.yml

+182
Original file line numberDiff line numberDiff line change
@@ -343,3 +343,185 @@ CarRacing-v0:
343343
policy_kwargs: "dict(log_std_init=-2,
344344
ortho_init=False,
345345
)"
346+
347+
# Tuned
348+
# 10 mujoco envs
349+
Ant-v2:
350+
n_envs: 1
351+
policy: 'MlpPolicy'
352+
n_timesteps: !!float 1e7
353+
batch_size: 32
354+
n_steps: 512
355+
gamma: 0.98
356+
learning_rate: 1.90609e-05
357+
ent_coef: 4.9646e-07
358+
clip_range: 0.1
359+
n_epochs: 10
360+
gae_lambda: 0.8
361+
max_grad_norm: 0.6
362+
vf_coef: 0.677239
363+
364+
HalfCheetah-v2:
365+
n_envs: 1
366+
policy: 'MlpPolicy'
367+
n_timesteps: !!float 1e6
368+
batch_size: 64
369+
n_steps: 512
370+
gamma: 0.98
371+
learning_rate: 2.0633e-05
372+
ent_coef: 0.000401762
373+
clip_range: 0.1
374+
n_epochs: 20
375+
gae_lambda: 0.92
376+
max_grad_norm: 0.8
377+
vf_coef: 0.58096
378+
policy_kwargs: "dict(
379+
log_std_init=-2,
380+
ortho_init=False,
381+
activation_fn=nn.ReLU,
382+
net_arch=[dict(pi=[256, 256], vf=[256, 256])]
383+
)"
384+
385+
Hopper-v2:
386+
n_envs: 1
387+
policy: 'MlpPolicy'
388+
n_timesteps: !!float 1e6
389+
batch_size: 32
390+
n_steps: 512
391+
gamma: 0.999
392+
learning_rate: 9.80828e-05
393+
ent_coef: 0.00229519
394+
clip_range: 0.2
395+
n_epochs: 5
396+
gae_lambda: 0.99
397+
max_grad_norm: 0.7
398+
vf_coef: 0.835671
399+
policy_kwargs: "dict(
400+
log_std_init=-2,
401+
ortho_init=False,
402+
activation_fn=nn.ReLU,
403+
net_arch=[dict(pi=[256, 256], vf=[256, 256])]
404+
)"
405+
406+
HumanoidStandup-v2:
407+
n_envs: 1
408+
policy: 'MlpPolicy'
409+
n_timesteps: !!float 1e7
410+
batch_size: 32
411+
n_steps: 512
412+
gamma: 0.99
413+
learning_rate: 2.55673e-05
414+
ent_coef: 3.62109e-06
415+
clip_range: 0.3
416+
n_epochs: 20
417+
gae_lambda: 0.9
418+
max_grad_norm: 0.7
419+
vf_coef: 0.430793
420+
policy_kwargs: "dict(
421+
log_std_init=-2,
422+
ortho_init=False,
423+
activation_fn=nn.ReLU,
424+
net_arch=[dict(pi=[256, 256], vf=[256, 256])]
425+
)"
426+
427+
Humanoid-v2:
428+
n_envs: 1
429+
policy: 'MlpPolicy'
430+
n_timesteps: !!float 1e7
431+
batch_size: 256
432+
n_steps: 512
433+
gamma: 0.95
434+
learning_rate: 3.56987e-05
435+
ent_coef: 0.00238306
436+
clip_range: 0.3
437+
n_epochs: 5
438+
gae_lambda: 0.9
439+
max_grad_norm: 2
440+
vf_coef: 0.431892
441+
policy_kwargs: "dict(
442+
log_std_init=-2,
443+
ortho_init=False,
444+
activation_fn=nn.ReLU,
445+
net_arch=[dict(pi=[256, 256], vf=[256, 256])]
446+
)"
447+
448+
InvertedDoublePendulum-v2:
449+
n_envs: 1
450+
policy: 'MlpPolicy'
451+
n_timesteps: !!float 1e6
452+
batch_size: 512
453+
n_steps: 128
454+
gamma: 0.98
455+
learning_rate: 0.000155454
456+
ent_coef: 1.05057e-06
457+
clip_range: 0.4
458+
n_epochs: 10
459+
gae_lambda: 0.8
460+
max_grad_norm: 0.5
461+
vf_coef: 0.695929
462+
463+
InvertedPendulum-v2:
464+
n_envs: 1
465+
policy: 'MlpPolicy'
466+
n_timesteps: !!float 1e6
467+
batch_size: 64
468+
n_steps: 32
469+
gamma: 0.999
470+
learning_rate: 0.000222425
471+
ent_coef: 1.37976e-07
472+
clip_range: 0.4
473+
n_epochs: 5
474+
gae_lambda: 0.9
475+
max_grad_norm: 0.3
476+
vf_coef: 0.19816
477+
478+
Reacher-v2:
479+
n_envs: 1
480+
policy: 'MlpPolicy'
481+
n_timesteps: !!float 1e6
482+
batch_size: 32
483+
n_steps: 512
484+
gamma: 0.9
485+
learning_rate: 0.000104019
486+
ent_coef: 7.52585e-08
487+
clip_range: 0.3
488+
n_epochs: 5
489+
gae_lambda: 1.0
490+
max_grad_norm: 0.9
491+
vf_coef: 0.950368
492+
493+
Swimmer-v2:
494+
n_envs: 1
495+
policy: 'MlpPolicy'
496+
n_timesteps: !!float 1e6
497+
batch_size: 32
498+
n_steps: 512
499+
gamma: 0.9999
500+
learning_rate: 5.49717e-05
501+
ent_coef: 0.0554757
502+
clip_range: 0.3
503+
n_epochs: 10
504+
gae_lambda: 0.95
505+
max_grad_norm: 0.6
506+
vf_coef: 0.38782
507+
policy_kwargs: "dict(
508+
log_std_init=-2,
509+
ortho_init=False,
510+
activation_fn=nn.ReLU,
511+
net_arch=[dict(pi=[256, 256], vf=[256, 256])]
512+
)"
513+
514+
Walker2d-v2:
515+
n_envs: 1
516+
policy: 'MlpPolicy'
517+
n_timesteps: !!float 1e6
518+
batch_size: 32
519+
n_steps: 512
520+
gamma: 0.99
521+
learning_rate: 5.05041e-05
522+
ent_coef: 0.000585045
523+
clip_range: 0.1
524+
n_epochs: 20
525+
gae_lambda: 0.95
526+
max_grad_norm: 1
527+
vf_coef: 0.871923

0 commit comments

Comments
 (0)