1
- from typing import Any , ClassVar , Dict , Optional , Type , TypeVar , Union
1
+ from typing import Any , ClassVar , Optional , TypeVar , Union
2
2
3
3
import torch as th
4
4
from gymnasium import spaces
@@ -57,15 +57,15 @@ class A2C(OnPolicyAlgorithm):
57
57
:param _init_setup_model: Whether or not to build the network at the creation of the instance
58
58
"""
59
59
60
- policy_aliases : ClassVar [Dict [str , Type [BasePolicy ]]] = {
60
+ policy_aliases : ClassVar [dict [str , type [BasePolicy ]]] = {
61
61
"MlpPolicy" : ActorCriticPolicy ,
62
62
"CnnPolicy" : ActorCriticCnnPolicy ,
63
63
"MultiInputPolicy" : MultiInputActorCriticPolicy ,
64
64
}
65
65
66
66
def __init__ (
67
67
self ,
68
- policy : Union [str , Type [ActorCriticPolicy ]],
68
+ policy : Union [str , type [ActorCriticPolicy ]],
69
69
env : Union [GymEnv , str ],
70
70
learning_rate : Union [float , Schedule ] = 7e-4 ,
71
71
n_steps : int = 5 ,
@@ -78,12 +78,12 @@ def __init__(
78
78
use_rms_prop : bool = True ,
79
79
use_sde : bool = False ,
80
80
sde_sample_freq : int = - 1 ,
81
- rollout_buffer_class : Optional [Type [RolloutBuffer ]] = None ,
82
- rollout_buffer_kwargs : Optional [Dict [str , Any ]] = None ,
81
+ rollout_buffer_class : Optional [type [RolloutBuffer ]] = None ,
82
+ rollout_buffer_kwargs : Optional [dict [str , Any ]] = None ,
83
83
normalize_advantage : bool = False ,
84
84
stats_window_size : int = 100 ,
85
85
tensorboard_log : Optional [str ] = None ,
86
- policy_kwargs : Optional [Dict [str , Any ]] = None ,
86
+ policy_kwargs : Optional [dict [str , Any ]] = None ,
87
87
verbose : int = 0 ,
88
88
seed : Optional [int ] = None ,
89
89
device : Union [th .device , str ] = "auto" ,
0 commit comments