Skip to content

Commit 55d6f18

Browse files
Add support for Gymnasium v1.1.0 (#2095)
* Add support for Gymnasium v1.1.0 * Update tests to reflect new set_wrapper_attr behavior * Fix mypy issues * Update doc and changelog --------- Co-authored-by: Antonin RAFFIN <[email protected]>
1 parent fa21bce commit 55d6f18

File tree

7 files changed

+45
-31
lines changed

7 files changed

+45
-31
lines changed

docs/guide/vec_envs.rst

+3
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ Consider the following example for a custom env:
148148
print(vec_env.env_method("get_wrapper_attr", "mu"))
149149
# Change `mu` attribute via the setter
150150
vec_env.env_method("set_mu", "mu", 0.1)
151+
# If the variable exists, you can also use `set_wrapper_attr` to set it
152+
assert vec_env.has_attr("mu")
153+
vec_env.env_method("set_wrapper_attr", "mu", 0.1)
151154
152155
153156
In this example ``env.mu`` cannot be accessed/changed directly because it is wrapped in a ``VecEnv`` and because it could be wrapped with other wrappers (see `GH#1573 <https://github.com/DLR-RM/stable-baselines3/issues/1573>`_ for a longer explanation).

docs/misc/changelog.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Changelog
44
==========
55

6-
Release 2.6.0a1 (WIP)
6+
Release 2.6.0a2 (WIP)
77
--------------------------
88

99

@@ -14,6 +14,7 @@ New Features:
1414
^^^^^^^^^^^^^
1515
- Added ``has_attr`` method for ``VecEnv`` to check if an attribute exists
1616
- Added ``LogEveryNTimesteps`` callback to dump logs every N timesteps (note: you need to pass ``log_interval=None`` to avoid any interference)
17+
- Added Gymnasium v1.1 support
1718

1819
Bug Fixes:
1920
^^^^^^^^^^
@@ -36,11 +37,14 @@ Others:
3637
^^^^^^^
3738
- Updated black from v24 to v25
3839
- Improved error messages when checking Box space equality (loading ``VecNormalize``)
40+
- Updated test to reflect how ``set_wrapper_attr`` should be used now
3941

4042
Documentation:
4143
^^^^^^^^^^^^^^
4244
- Clarify the use of Gym wrappers with ``make_vec_env`` in the section on Vectorized Environments (@pstahlhofen)
4345
- Updated callback doc for ``EveryNTimesteps``
46+
- Added doc on how to set env attributes via ``VecEnv`` calls
47+
4448

4549
Release 2.5.0 (2025-01-27)
4650
--------------------------

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
packages=[package for package in find_packages() if package.startswith("stable_baselines3")],
7777
package_data={"stable_baselines3": ["py.typed", "version.txt"]},
7878
install_requires=[
79-
"gymnasium>=0.29.1,<1.1.0",
79+
"gymnasium>=0.29.1,<1.2.0",
8080
"numpy>=1.20,<3.0",
8181
"torch>=2.3,<3.0",
8282
# For saving models

stable_baselines3/common/off_policy_algorithm.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ def dump_logs(self) -> None:
423423
self.logger.record("time/time_elapsed", int(time_elapsed), exclude="tensorboard")
424424
self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
425425
if self.use_sde:
426-
self.logger.record("train/std", (self.actor.get_std()).mean().item())
426+
self.logger.record("train/std", (self.actor.get_std()).mean().item()) # type: ignore[operator]
427427

428428
if len(self.ep_success_buffer) > 0:
429429
self.logger.record("rollout/success_rate", safe_mean(self.ep_success_buffer))
@@ -544,14 +544,14 @@ def collect_rollouts(
544544
assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."
545545

546546
if self.use_sde:
547-
self.actor.reset_noise(env.num_envs)
547+
self.actor.reset_noise(env.num_envs) # type: ignore[operator]
548548

549549
callback.on_rollout_start()
550550
continue_training = True
551551
while should_collect_more_steps(train_freq, num_collected_steps, num_collected_episodes):
552552
if self.use_sde and self.sde_sample_freq > 0 and num_collected_steps % self.sde_sample_freq == 0:
553553
# Sample a new noise matrix
554-
self.actor.reset_noise(env.num_envs)
554+
self.actor.reset_noise(env.num_envs) # type: ignore[operator]
555555

556556
# Select action randomly or according to policy
557557
actions, buffer_actions = self._sample_action(learning_starts, action_noise, env.num_envs)

stable_baselines3/sac/sac.py

+1
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ def train(self, gradient_steps: int, batch_size: int = 64) -> None:
228228
# so we don't change it with other losses
229229
# see https://github.com/rail-berkeley/softlearning/issues/60
230230
ent_coef = th.exp(self.log_ent_coef.detach())
231+
assert isinstance(self.target_entropy, float)
231232
ent_coef_loss = -(self.log_ent_coef * (log_prob + self.target_entropy).detach()).mean()
232233
ent_coef_losses.append(ent_coef_loss.item())
233234
else:

stable_baselines3/version.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.6.0a1
1+
2.6.0a2

tests/test_vec_envs.py

+31-25
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ def test_vecenv_custom_calls(vec_env_class, vec_env_wrapper):
9797
"""Test access to methods/attributes of vectorized environments"""
9898

9999
def make_env():
100-
return CustomGymEnv(spaces.Box(low=np.zeros(2), high=np.ones(2)))
100+
# Wrap the env to check that get_attr and set_attr are working properly
101+
return Monitor(CustomGymEnv(spaces.Box(low=np.zeros(2), high=np.ones(2))))
101102

102103
vec_env = vec_env_class([make_env for _ in range(N_ENVS)])
103104

@@ -133,21 +134,23 @@ def make_env():
133134

134135
assert not vec_env.has_attr("dummy2")
135136
# Set the value on the original env
137+
# Note: doesn't work anymore with gym >= 1.1,
138+
# the value needs to exists before
136139
# `set_wrapper_attr` doesn't exist before v1.0
137140
if gym.__version__ > "1":
138141
vec_env.env_method("set_wrapper_attr", "dummy2", 2)
139142
assert vec_env.get_attr("dummy2") == [2] * N_ENVS
140-
if vec_env_class == DummyVecEnv:
141-
assert vec_env.envs[0].unwrapped.dummy2 == 2
143+
# if vec_env_class == DummyVecEnv:
144+
# assert vec_env.envs[0].unwrapped.dummy2 == 2
142145

143146
env_method_results = vec_env.env_method("custom_method", 1, indices=None, dim_1=2)
144147
setattr_results = []
145-
# Set current_step to an arbitrary value
148+
# Set new variable dummy1 of the last wrapper to an arbitrary value
146149
for env_idx in range(N_ENVS):
147-
setattr_results.append(vec_env.set_attr("current_step", env_idx, indices=env_idx))
150+
setattr_results.append(vec_env.set_attr("dummy1", env_idx, indices=env_idx))
148151
# Retrieve the value for each environment
149-
assert vec_env.has_attr("current_step")
150-
getattr_results = vec_env.get_attr("current_step")
152+
assert vec_env.has_attr("dummy1")
153+
getattr_results = vec_env.get_attr("dummy1")
151154

152155
assert len(env_method_results) == N_ENVS
153156
assert len(setattr_results) == N_ENVS
@@ -165,28 +168,31 @@ def make_env():
165168
assert len(env_method_subset) == 2
166169

167170
# Test to change value for all the environments
168-
setattr_result = vec_env.set_attr("current_step", 42, indices=None)
169-
getattr_result = vec_env.get_attr("current_step")
171+
setattr_result = vec_env.set_attr("dummy1", 42, indices=None)
172+
getattr_result = vec_env.get_attr("dummy1")
170173
assert setattr_result is None
171174
assert getattr_result == [42 for _ in range(N_ENVS)]
172175

173176
# Additional tests for setattr that does not affect all the environments
174177
vec_env.reset()
175-
setattr_result = vec_env.set_attr("current_step", 12, indices=[0, 1])
176-
getattr_result = vec_env.get_attr("current_step")
177-
getattr_result_subset = vec_env.get_attr("current_step", indices=[0, 1])
178-
assert setattr_result is None
179-
assert getattr_result == [12 for _ in range(2)] + [0 for _ in range(N_ENVS - 2)]
180-
assert getattr_result_subset == [12, 12]
181-
assert vec_env.get_attr("current_step", indices=[0, 2]) == [12, 0]
182-
183-
vec_env.reset()
184-
# Change value only for first and last environment
185-
setattr_result = vec_env.set_attr("current_step", 12, indices=[0, -1])
186-
getattr_result = vec_env.get_attr("current_step")
187-
assert setattr_result is None
188-
assert getattr_result == [12] + [0 for _ in range(N_ENVS - 2)] + [12]
189-
assert vec_env.get_attr("current_step", indices=[-1]) == [12]
178+
# Since gym >= 0.29, set_attr only sets the attribute on the last wrapper
179+
# but `set_wrapper_attr` doesn't exist before v1.0
180+
if gym.__version__ > "1":
181+
setattr_result = vec_env.env_method("set_wrapper_attr", "current_step", 12, indices=[0, 1])
182+
getattr_result = vec_env.get_attr("current_step")
183+
getattr_result_subset = vec_env.get_attr("current_step", indices=[0, 1])
184+
assert setattr_result == [True, True]
185+
assert getattr_result == [12 for _ in range(2)] + [0 for _ in range(N_ENVS - 2)]
186+
assert getattr_result_subset == [12, 12]
187+
assert vec_env.get_attr("current_step", indices=[0, 2]) == [12, 0]
188+
189+
vec_env.reset()
190+
# Change value only for first and last environment
191+
setattr_result = vec_env.env_method("set_wrapper_attr", "current_step", 12, indices=[0, -1])
192+
getattr_result = vec_env.get_attr("current_step")
193+
assert setattr_result == [True, True]
194+
assert getattr_result == [12] + [0 for _ in range(N_ENVS - 2)] + [12]
195+
assert vec_env.get_attr("current_step", indices=[-1]) == [12]
190196

191197
# Checks that options are correctly passed
192198
assert vec_env.get_attr("current_options")[0] is None
@@ -281,7 +287,7 @@ def test_vecenv_terminal_obs(vec_env_class, vec_env_wrapper):
281287
("discrete", spaces.Discrete(2)),
282288
("multidiscrete", spaces.MultiDiscrete([2, 3])),
283289
("multibinary", spaces.MultiBinary(3)),
284-
("continuous", spaces.Box(low=np.zeros(2), high=np.ones(2))),
290+
("continuous", spaces.Box(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32))),
285291
]
286292
)
287293

0 commit comments

Comments
 (0)