microsoft · Lixuyizhi · Sep 28, 2025
diff --git a/docs/component/rl/framework.rst b/docs/component/rl/framework.rst
@@ -3,7 +3,7 @@ The Framework of QlibRL
 
 QlibRL contains a full set of components that cover the entire lifecycle of an RL pipeline, including building the simulator of the market, shaping states & actions, training policies (strategies), and backtesting strategies in the simulated environment.
 
-QlibRL is basically implemented with the support of Tianshou and Gym frameworks. The high-level structure of QlibRL is demonstrated below:
+QlibRL is basically implemented with the support of Tianshou and Gymnasium frameworks. The high-level structure of QlibRL is demonstrated below:
 
 .. image:: ../../_static/img/QlibRL_framework.png
    :width: 600
@@ -15,7 +15,7 @@ EnvWrapper
 ------------
 EnvWrapper is the complete capsulation of the simulated environment. It receives actions from outside (policy/strategy/agent), simulates the changes in the market, and then replies rewards and updated states, thus forming an interaction loop.
 
-In QlibRL, EnvWrapper is a subclass of gym.Env, so it implements all necessary interfaces of gym.Env. Any classes or pipelines that accept gym.Env should also accept EnvWrapper. Developers do not need to implement their own EnvWrapper to build their own environment. Instead, they only need to implement 4 components of the EnvWrapper:
+In QlibRL, EnvWrapper is a subclass of gymnasium.Env, so it implements all necessary interfaces of gymnasium.Env. Any classes or pipelines that accept gymnasium.Env should also accept EnvWrapper. Developers do not need to implement their own EnvWrapper to build their own environment. Instead, they only need to implement 4 components of the EnvWrapper:
 
 - `Simulator`
     The simulator is the core component responsible for the environment simulation. Developers could implement all the logic that is directly related to the environment simulation in the Simulator in any way they like. In QlibRL, there are already two implementations of Simulator for single asset trading: 1) ``SingleAssetOrderExecution``, which is built based on Qlib's backtest toolkits and hence considers a lot of practical trading details but is slow. 2) ``SimpleSingleAssetOrderExecution``, which is built based on a simplified trading simulator, which ignores a lot of details (e.g. trading limitations, rounding) but is quite fast.

diff --git a/pyproject.toml b/pyproject.toml
@@ -45,7 +45,7 @@ dependencies = [
   "pymongo",
   "loguru",
   "lightgbm",
-  "gym",
+  "gymnasium",
   "cvxpy",
   "joblib",
   "matplotlib",

diff --git a/qlib/rl/interpreter.py b/qlib/rl/interpreter.py
@@ -5,9 +5,9 @@
 
 from typing import Any, Generic, TypeVar
 
-import gym
+import gymnasium as gym  
 import numpy as np
-from gym import spaces
+from gymnasium import spaces
 
 from qlib.typehint import final
 from .simulator import ActType, StateType

diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import pandas as pd
-from gym import spaces
+from gymnasium import spaces
 
 from qlib.constant import EPS
 from qlib.rl.data.base import ProcessedDataProvider

diff --git a/qlib/rl/order_execution/policy.py b/qlib/rl/order_execution/policy.py
@@ -6,11 +6,11 @@
 from pathlib import Path
 from typing import Any, Dict, Generator, Iterable, Optional, OrderedDict, Tuple, cast
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
-from gym.spaces import Discrete
+from gymnasium.spaces import Discrete
 from tianshou.data import Batch, ReplayBuffer, to_torch
 from tianshou.policy import BasePolicy, PPOPolicy, DQNPolicy
 

diff --git a/qlib/rl/utils/env_wrapper.py b/qlib/rl/utils/env_wrapper.py
@@ -6,8 +6,8 @@
 import weakref
 from typing import Any, Callable, cast, Dict, Generic, Iterable, Iterator, Optional, Tuple
 
-import gym
-from gym import Space
+import gymnasium as gym
+from gymnasium import Space
 
 from qlib.rl.aux_info import AuxiliaryInfoCollector
 from qlib.rl.interpreter import ActionInterpreter, ObsType, PolicyActType, StateInterpreter

diff --git a/qlib/rl/utils/finite_env.py b/qlib/rl/utils/finite_env.py
@@ -13,7 +13,7 @@
 from contextlib import contextmanager
 from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Type, Union, cast
 
-import gym
+import gymnasium as gym
 import numpy as np
 from tianshou.env import BaseVectorEnv, DummyVectorEnv, ShmemVectorEnv, SubprocVectorEnv
 

diff --git a/scripts/collect_info.py b/scripts/collect_info.py
@@ -45,7 +45,7 @@ def qlib(self):
             "pymongo",
             "loguru",
             "lightgbm",
-            "gym",
+            "gymnasium",
             "cvxpy",
             "joblib",
             "matplotlib",

diff --git a/qlib/tests/__init__.py → tests/__init__.py b/qlib/tests/__init__.py → tests/__init__.py
diff --git a/qlib/tests/config.py → tests/config.py b/qlib/tests/config.py → tests/config.py
diff --git a/qlib/tests/data.py → tests/data.py b/qlib/tests/data.py → tests/data.py
diff --git a/tests/rl/test_finite_env.py b/tests/rl/test_finite_env.py
@@ -3,7 +3,7 @@
 
 from collections import Counter
 
-import gym
+import gymnasium as gym
 import numpy as np
 from tianshou.data import Batch, Collector
 from tianshou.policy import BasePolicy

diff --git a/tests/rl/test_logger.py b/tests/rl/test_logger.py
@@ -7,10 +7,10 @@
 import re
 from typing import Any, Tuple
 
-import gym
+import gymnasium as gym
 import numpy as np
 import pandas as pd
-from gym import spaces
+from gymnasium import spaces
 from tianshou.data import Collector, Batch
 from tianshou.policy import BasePolicy
 

diff --git a/tests/rl/test_trainer.py b/tests/rl/test_trainer.py
@@ -7,7 +7,7 @@
 
 import torch
 import torch.nn as nn
-from gym import spaces
+from gymnasium import spaces
 from tianshou.policy import PPOPolicy
 
 from qlib.config import C