deathcoder
diff --git a/‎.github/workflows/ci.yml
Lines changed: 14 additions & 11 deletions b/‎.github/workflows/ci.yml
Lines changed: 14 additions & 11 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/conda_env.yml
Lines changed: 3 additions & 3 deletions b/‎docs/conda_env.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/conf.py
Lines changed: 1 addition & 2 deletions b/‎docs/conf.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎docs/guide/install.rst
Lines changed: 1 addition & 1 deletion b/‎docs/guide/install.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/index.rst
Lines changed: 2 additions & 0 deletions b/‎docs/index.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/misc/changelog.rst
Lines changed: 44 additions & 7 deletions b/‎docs/misc/changelog.rst
Lines changed: 44 additions & 7 deletions
diff --git a/‎docs/modules/dqn.rst
Lines changed: 1 addition & 0 deletions b/‎docs/modules/dqn.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/modules/sac.rst
Lines changed: 3 additions & 0 deletions b/‎docs/modules/sac.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 2 additions & 3 deletions b/‎pyproject.toml
Lines changed: 2 additions & 3 deletions
@@ -20,8 +20,13 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
-
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        include:
+          # Default version
+          - gymnasium-version: "1.0.0"
+          # Add a new config to test gym<1.0
+          - python-version: "3.10"
+            gymnasium-version: "0.29.1"
     steps:
       - uses: actions/checkout@v3
       - name: Set up Python ${{ matrix.python-version }}
@@ -37,15 +42,15 @@ jobs:
           # See https://github.com/astral-sh/uv/issues/1497
           uv pip install --system torch==2.3.1+cpu --index https://download.pytorch.org/whl/cpu
 
-          # Install Atari Roms
-          uv pip install --system autorom
-          wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
-          base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
-          AutoROM --accept-license --source-file Roms.tar.gz
-
-          uv pip install --system .[extra_no_roms,tests,docs]
+          uv pip install --system .[extra,tests,docs]
           # Use headless version
           uv pip install --system opencv-python-headless
+      - name: Install specific version of gym
+        run: |
+          uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
+          uv pip install --system "numpy<2"
+        # Only run for python 3.10, downgrade gym to 0.29.1, numpy<2
+        if: matrix.gymnasium-version != '1.0.0'
       - name: Lint with ruff
         run: |
           make lint
@@ -58,8 +63,6 @@ jobs:
       - name: Type check
         run: |
           make type
-        # Do not run for python 3.8 (mypy internal error)
-        if: matrix.python-version != '3.8'
       - name: Test with pytest
         run: |
           make pytest
@@ -100,10 +100,10 @@ It provides a minimal number of features compared to SB3 but can be much faster
 
 ## Installation
 
-**Note:** Stable-Baselines3 supports PyTorch >= 1.13
+**Note:** Stable-Baselines3 supports PyTorch >= 2.3
 
 ### Prerequisites
-Stable Baselines3 requires Python 3.8+.
+Stable Baselines3 requires Python 3.9+.
 
 #### Windows
 
 
@@ -8,12 +8,12 @@ dependencies:
   - python=3.11
   - pytorch=2.5.0=py3.11_cpu_0
   - pip:
-    - gymnasium>=0.28.1,<0.30
+    - gymnasium>=0.29.1,<1.1.0
     - cloudpickle
     - opencv-python-headless
     - pandas
-    - numpy>=1.20,<2.0
+    - numpy>=1.20,<3.0
     - matplotlib
-    - sphinx>=5,<8
+    - sphinx>=5,<9
     - sphinx_rtd_theme>=1.3.0
     - sphinx_copybutton
@@ -14,7 +14,6 @@
 import datetime
 import os
 import sys
-from typing import Dict
 
 # We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
 # PyEnchant.
@@ -151,7 +150,7 @@ def setup(app):
 
 # -- Options for LaTeX output ------------------------------------------------
 
-latex_elements: Dict[str, str] = {
+latex_elements: dict[str, str] = {
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
 
@@ -7,7 +7,7 @@ Installation
 Prerequisites
 -------------
 
-Stable-Baselines3 requires python 3.8+ and PyTorch >= 1.13
+Stable-Baselines3 requires python 3.9+ and PyTorch >= 2.3
 
 Windows
 ~~~~~~~
 
@@ -20,6 +20,8 @@ RL Baselines3 Zoo provides a collection of pre-trained agents, scripts for train
 
 SB3 Contrib (experimental RL code, latest algorithms): https://github.com/Stable-Baselines-Team/stable-baselines3-contrib
 
+SBX (SB3 + Jax): https://github.com/araffin/sbx
+
 
 Main Features
 --------------
 
@@ -3,10 +3,45 @@
 Changelog
 ==========
 
-Release 2.4.0a10 (WIP)
+Release 2.5.0a0 (WIP)
 --------------------------
 
-**New algorithm: CrossQ in SB3 Contrib**
+Breaking Changes:
+^^^^^^^^^^^^^^^^^
+- Increased minimum required version of PyTorch to 2.3.0
+- Removed support for Python 3.8
+
+New Features:
+^^^^^^^^^^^^^
+- Added support for NumPy v2.0: ``VecNormalize`` now cast normalized rewards to float32, updated bit flipping env to avoid overflow issues too
+- Added official support for Python 3.12
+
+Bug Fixes:
+^^^^^^^^^^
+
+`SB3-Contrib`_
+^^^^^^^^^^^^^^
+
+`RL Zoo`_
+^^^^^^^^^
+
+`SBX`_ (SB3 + Jax)
+^^^^^^^^^^^^^^^^^^
+
+Deprecations:
+^^^^^^^^^^^^^
+
+Others:
+^^^^^^^
+
+Documentation:
+^^^^^^^^^^^^^^
+
+
+Release 2.4.0 (2024-11-18)
+--------------------------
+
+**New algorithm: CrossQ in SB3 Contrib, Gymnasium v1.0 support**
 
 .. note::
 
@@ -18,18 +53,20 @@ Release 2.4.0a10 (WIP)
 .. warning::
 
     Stable-Baselines3 (SB3) v2.4.0 will be the last one supporting Python 3.8 (end of life in October 2024)
-    and PyTorch < 2.0.
-    We highly recommended you to upgrade to Python >= 3.9 and PyTorch >= 2.0.
+    and PyTorch < 2.3.
+    We highly recommended you to upgrade to Python >= 3.9 and PyTorch >= 2.3 (compatible with NumPy v2).
 
 
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
+- Increased minimum required version of Gymnasium to 0.29.1
 
 New Features:
 ^^^^^^^^^^^^^
 - Added support for ``pre_linear_modules`` and ``post_linear_modules`` in ``create_mlp`` (useful for adding normalization layers, like in DroQ or CrossQ)
 - Enabled np.ndarray logging for TensorBoardOutputFormat as histogram (see GH#1634) (@iwishwasaneagle)
 - Updated env checker to warn users when using multi-dim array to define `MultiDiscrete` spaces
+- Added support for Gymnasium v1.0
 
 Bug Fixes:
 ^^^^^^^^^^
@@ -57,6 +94,7 @@ Bug Fixes:
 `SBX`_ (SB3 + Jax)
 ^^^^^^^^^^^^^^^^^^
 - Added CNN support for DQN
+- Bug fix for SAC and related algorithms, optimize log of ent coeff to be consistent with SB3
 
 Deprecations:
 ^^^^^^^^^^^^^
@@ -69,14 +107,13 @@ Others:
 - Added a warning to recommend using CPU with on policy algorithms (A2C/PPO) and ``MlpPolicy``
 - Switched to uv to download packages faster on GitHub CI
 - Updated dependencies for read the doc
-
-Bug Fixes:
-^^^^^^^^^^
+- Removed unnecessary ``copy_obs_dict`` method for ``SubprocVecEnv``, remove the use of ordered dict and rename ``flatten_obs`` to ``stack_obs``
 
 Documentation:
 ^^^^^^^^^^^^^^
 - Updated PPO doc to recommend using CPU with ``MlpPolicy``
 - Clarified documentation about planned features and citing software
+- Added a note about the fact we are optimizing log of ent coeff for SAC
 
 Release 2.3.2 (2024-04-27)
 --------------------------
 
@@ -25,6 +25,7 @@ Notes
 
 - Original paper: https://arxiv.org/abs/1312.5602
 - Further reference: https://www.nature.com/articles/nature14236
+- Tutorial "From Tabular Q-Learning to DQN": https://github.com/araffin/rlss23-dqn-tutorial
 
 .. note::
     This implementation provides only vanilla Deep Q-Learning and has no extensions such as Double-DQN, Dueling-DQN and Prioritized Experience Replay.
 
@@ -35,6 +35,9 @@ Notes
     which is the equivalent to the inverse of reward scale in the original SAC paper.
     The main reason is that it avoids having too high errors when updating the Q functions.
 
+.. note::
+    When automatically adjusting the temperature (alpha/entropy coefficient), we optimize the logarithm of the entropy coefficient instead of the entropy coefficient itself. This is consistent with the original implementation and has proven to be more stable
+    (see issues `GH#36 <https://github.com/DLR-RM/stable-baselines3/issues/36>`_, `#55 <https://github.com/araffin/sbx/issues/55>`_ and others).
 
 .. note::
 
 
@@ -1,8 +1,8 @@
 [tool.ruff]
 # Same as Black.
 line-length = 127
-# Assume Python 3.8
-target-version = "py38"
+# Assume Python 3.9
+target-version = "py39"
 
 [tool.ruff.lint]
 # See https://beta.ruff.rs/docs/rules/
@@ -18,7 +18,6 @@ ignore = ["B028", "RUF013"]
 # ClassVar, implicit optional check not needed for tests
 "./tests/*.py" = ["RUF012", "RUF013"]
 
-
 [tool.ruff.lint.mccabe]
 # Unlike Flake8, default to a complexity level of 10.
 max-complexity = 15