22import warnings
33from typing import Any , Dict , List , Optional , Tuple
44
5- import gym
5+ import gymnasium as gym
66import numpy as np
77from ray .rllib .env .env_context import EnvContext
88from ray .rllib .utils .spaces .repeated import Repeated
@@ -61,17 +61,17 @@ def __init__(self, env_config: EnvContext) -> None:
6161 self .action_space = gym .spaces .Discrete (self .max_num_children )
6262 logger .debug ("leaving graphenv construction" )
6363
64- def reset (self ) -> Dict [str , np .ndarray ]:
64+ def reset (self , * , seed = None , options = None ) -> Tuple [ Dict [str , np .ndarray ], Dict ]:
6565 """Reset this state to the root vertex. It is possible for state.root to
6666 return different root vertices on each call.
6767
6868 Returns:
6969 Dict[str, np.ndarray]: Observation of the root vertex.
7070 """
7171 self .state = self .state .root
72- return self .make_observation ()
72+ return self .make_observation (), self . state . info
7373
74- def step (self , action : int ) -> Tuple [Dict [str , np .ndarray ], float , bool , dict ]:
74+ def step (self , action : int ) -> Tuple [Dict [str , np .ndarray ], float , bool , bool , dict ]:
7575 """Steps the environment to a new state by taking an action. In the
7676 case of GraphEnv, the action specifies which next vertex to move to and
7777 this method advances the environment to that vertex.
@@ -86,7 +86,8 @@ def step(self, action: int) -> Tuple[Dict[str, np.ndarray], float, bool, dict]:
8686 Tuple[Dict[str, np.ndarray], float, bool, dict]: Tuple of:
8787 a dictionary of the new state's observation,
8888 the reward received by moving to the new state's vertex,
89- a bool which is true iff the new stae is a terminal vertex,
89+ a bool which is true iff the new state is a terminal vertex,
90+ a bool which is true if the search is truncated
9091 a dictionary of debugging information related to this call
9192 """
9293
@@ -115,10 +116,17 @@ def step(self, action: int) -> Tuple[Dict[str, np.ndarray], float, bool, dict]:
115116 RuntimeWarning ,
116117 )
117118
119+ # In RLlib 2.3, the config options "no_done_at_end", "horizon", and "soft_horizon" are no longer supported
120+ # according to the migration guide https://docs.google.com/document/d/1lxYK1dI5s0Wo_jmB6V6XiP-_aEBsXDykXkD1AXRase4/edit#
121+ # Instead, wrap your gymnasium environment with a TimeLimit wrapper,
122+ # which will set truncated according to the number of timesteps
123+ # see https://gymnasium.farama.org/api/wrappers/misc_wrappers/#gymnasium.wrappers.TimeLimit
124+ truncated = False
118125 result = (
119126 self .make_observation (),
120127 self .state .reward ,
121128 self .state .terminal ,
129+ truncated ,
122130 self .state .info ,
123131 )
124132 logger .debug (
0 commit comments