Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Gymnasium v1.1.0 #2095

Merged
merged 4 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/guide/vec_envs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ Consider the following example for a custom env:
print(vec_env.env_method("get_wrapper_attr", "mu"))
# Change `mu` attribute via the setter
vec_env.env_method("set_mu", "mu", 0.1)
# If the variable exists, you can also use `set_wrapper_attr` to set it
assert vec_env.has_attr("mu")
vec_env.env_method("set_wrapper_attr", "mu", 0.1)


In this example ``env.mu`` cannot be accessed/changed directly because it is wrapped in a ``VecEnv`` and because it could be wrapped with other wrappers (see `GH#1573 <https://github.com/DLR-RM/stable-baselines3/issues/1573>`_ for a longer explanation).
Expand Down
6 changes: 5 additions & 1 deletion docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Changelog
==========

Release 2.6.0a1 (WIP)
Release 2.6.0a2 (WIP)
--------------------------


Expand All @@ -14,6 +14,7 @@ New Features:
^^^^^^^^^^^^^
- Added ``has_attr`` method for ``VecEnv`` to check if an attribute exists
- Added ``LogEveryNTimesteps`` callback to dump logs every N timesteps (note: you need to pass ``log_interval=None`` to avoid any interference)
- Added Gymnasium v1.1 support

Bug Fixes:
^^^^^^^^^^
Expand All @@ -36,11 +37,14 @@ Others:
^^^^^^^
- Updated black from v24 to v25
- Improved error messages when checking Box space equality (loading ``VecNormalize``)
- Updated test to reflect how ``set_wrapper_attr`` should be used now

Documentation:
^^^^^^^^^^^^^^
- Clarify the use of Gym wrappers with ``make_vec_env`` in the section on Vectorized Environments (@pstahlhofen)
- Updated callback doc for ``EveryNTimesteps``
- Added doc on how to set env attributes via ``VecEnv`` calls


Release 2.5.0 (2025-01-27)
--------------------------
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
packages=[package for package in find_packages() if package.startswith("stable_baselines3")],
package_data={"stable_baselines3": ["py.typed", "version.txt"]},
install_requires=[
"gymnasium>=0.29.1,<1.1.0",
"gymnasium>=0.29.1,<1.2.0",
"numpy>=1.20,<3.0",
"torch>=2.3,<3.0",
# For saving models
Expand Down
6 changes: 3 additions & 3 deletions stable_baselines3/common/off_policy_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def dump_logs(self) -> None:
self.logger.record("time/time_elapsed", int(time_elapsed), exclude="tensorboard")
self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
if self.use_sde:
self.logger.record("train/std", (self.actor.get_std()).mean().item())
self.logger.record("train/std", (self.actor.get_std()).mean().item()) # type: ignore[operator]

if len(self.ep_success_buffer) > 0:
self.logger.record("rollout/success_rate", safe_mean(self.ep_success_buffer))
Expand Down Expand Up @@ -544,14 +544,14 @@ def collect_rollouts(
assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."

if self.use_sde:
self.actor.reset_noise(env.num_envs)
self.actor.reset_noise(env.num_envs) # type: ignore[operator]

callback.on_rollout_start()
continue_training = True
while should_collect_more_steps(train_freq, num_collected_steps, num_collected_episodes):
if self.use_sde and self.sde_sample_freq > 0 and num_collected_steps % self.sde_sample_freq == 0:
# Sample a new noise matrix
self.actor.reset_noise(env.num_envs)
self.actor.reset_noise(env.num_envs) # type: ignore[operator]

# Select action randomly or according to policy
actions, buffer_actions = self._sample_action(learning_starts, action_noise, env.num_envs)
Expand Down
1 change: 1 addition & 0 deletions stable_baselines3/sac/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ def train(self, gradient_steps: int, batch_size: int = 64) -> None:
# so we don't change it with other losses
# see https://github.com/rail-berkeley/softlearning/issues/60
ent_coef = th.exp(self.log_ent_coef.detach())
assert isinstance(self.target_entropy, float)
ent_coef_loss = -(self.log_ent_coef * (log_prob + self.target_entropy).detach()).mean()
ent_coef_losses.append(ent_coef_loss.item())
else:
Expand Down
2 changes: 1 addition & 1 deletion stable_baselines3/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.6.0a1
2.6.0a2
56 changes: 31 additions & 25 deletions tests/test_vec_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def test_vecenv_custom_calls(vec_env_class, vec_env_wrapper):
"""Test access to methods/attributes of vectorized environments"""

def make_env():
return CustomGymEnv(spaces.Box(low=np.zeros(2), high=np.ones(2)))
# Wrap the env to check that get_attr and set_attr are working properly
return Monitor(CustomGymEnv(spaces.Box(low=np.zeros(2), high=np.ones(2))))

vec_env = vec_env_class([make_env for _ in range(N_ENVS)])

Expand Down Expand Up @@ -133,21 +134,23 @@ def make_env():

assert not vec_env.has_attr("dummy2")
# Set the value on the original env
# Note: doesn't work anymore with gym >= 1.1,
# the value needs to exists before
# `set_wrapper_attr` doesn't exist before v1.0
if gym.__version__ > "1":
vec_env.env_method("set_wrapper_attr", "dummy2", 2)
assert vec_env.get_attr("dummy2") == [2] * N_ENVS
if vec_env_class == DummyVecEnv:
assert vec_env.envs[0].unwrapped.dummy2 == 2
# if vec_env_class == DummyVecEnv:
# assert vec_env.envs[0].unwrapped.dummy2 == 2

env_method_results = vec_env.env_method("custom_method", 1, indices=None, dim_1=2)
setattr_results = []
# Set current_step to an arbitrary value
# Set new variable dummy1 of the last wrapper to an arbitrary value
for env_idx in range(N_ENVS):
setattr_results.append(vec_env.set_attr("current_step", env_idx, indices=env_idx))
setattr_results.append(vec_env.set_attr("dummy1", env_idx, indices=env_idx))
# Retrieve the value for each environment
assert vec_env.has_attr("current_step")
getattr_results = vec_env.get_attr("current_step")
assert vec_env.has_attr("dummy1")
getattr_results = vec_env.get_attr("dummy1")

assert len(env_method_results) == N_ENVS
assert len(setattr_results) == N_ENVS
Expand All @@ -165,28 +168,31 @@ def make_env():
assert len(env_method_subset) == 2

# Test to change value for all the environments
setattr_result = vec_env.set_attr("current_step", 42, indices=None)
getattr_result = vec_env.get_attr("current_step")
setattr_result = vec_env.set_attr("dummy1", 42, indices=None)
getattr_result = vec_env.get_attr("dummy1")
assert setattr_result is None
assert getattr_result == [42 for _ in range(N_ENVS)]

# Additional tests for setattr that does not affect all the environments
vec_env.reset()
setattr_result = vec_env.set_attr("current_step", 12, indices=[0, 1])
getattr_result = vec_env.get_attr("current_step")
getattr_result_subset = vec_env.get_attr("current_step", indices=[0, 1])
assert setattr_result is None
assert getattr_result == [12 for _ in range(2)] + [0 for _ in range(N_ENVS - 2)]
assert getattr_result_subset == [12, 12]
assert vec_env.get_attr("current_step", indices=[0, 2]) == [12, 0]

vec_env.reset()
# Change value only for first and last environment
setattr_result = vec_env.set_attr("current_step", 12, indices=[0, -1])
getattr_result = vec_env.get_attr("current_step")
assert setattr_result is None
assert getattr_result == [12] + [0 for _ in range(N_ENVS - 2)] + [12]
assert vec_env.get_attr("current_step", indices=[-1]) == [12]
# Since gym >= 0.29, set_attr only sets the attribute on the last wrapper
# but `set_wrapper_attr` doesn't exist before v1.0
if gym.__version__ > "1":
setattr_result = vec_env.env_method("set_wrapper_attr", "current_step", 12, indices=[0, 1])
getattr_result = vec_env.get_attr("current_step")
getattr_result_subset = vec_env.get_attr("current_step", indices=[0, 1])
assert setattr_result == [True, True]
assert getattr_result == [12 for _ in range(2)] + [0 for _ in range(N_ENVS - 2)]
assert getattr_result_subset == [12, 12]
assert vec_env.get_attr("current_step", indices=[0, 2]) == [12, 0]

vec_env.reset()
# Change value only for first and last environment
setattr_result = vec_env.env_method("set_wrapper_attr", "current_step", 12, indices=[0, -1])
getattr_result = vec_env.get_attr("current_step")
assert setattr_result == [True, True]
assert getattr_result == [12] + [0 for _ in range(N_ENVS - 2)] + [12]
assert vec_env.get_attr("current_step", indices=[-1]) == [12]

# Checks that options are correctly passed
assert vec_env.get_attr("current_options")[0] is None
Expand Down Expand Up @@ -281,7 +287,7 @@ def test_vecenv_terminal_obs(vec_env_class, vec_env_wrapper):
("discrete", spaces.Discrete(2)),
("multidiscrete", spaces.MultiDiscrete([2, 3])),
("multibinary", spaces.MultiBinary(3)),
("continuous", spaces.Box(low=np.zeros(2), high=np.ones(2))),
("continuous", spaces.Box(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32))),
]
)

Expand Down