From 0bc1a2fdb4db9f2154657f2ec4f2e033d8f3dd96 Mon Sep 17 00:00:00 2001
From: Lixuyizhi <lixuyizhi@163.com>
Date: Sun, 28 Sep 2025 10:03:20 +0800
Subject: [PATCH] fix: resolve backtest module crash due to Gym and NumPy 2.0+
 incompatibility

---
 docs/component/rl/framework.rst        | 4 ++--
 pyproject.toml                         | 2 +-
 qlib/rl/interpreter.py                 | 4 ++--
 qlib/rl/order_execution/interpreter.py | 2 +-
 qlib/rl/order_execution/policy.py      | 4 ++--
 qlib/rl/utils/env_wrapper.py           | 4 ++--
 qlib/rl/utils/finite_env.py            | 2 +-
 scripts/collect_info.py                | 2 +-
 {qlib/tests => tests}/__init__.py      | 0
 {qlib/tests => tests}/config.py        | 0
 {qlib/tests => tests}/data.py          | 0
 tests/rl/test_finite_env.py            | 2 +-
 tests/rl/test_logger.py                | 4 ++--
 tests/rl/test_trainer.py               | 2 +-
 14 files changed, 16 insertions(+), 16 deletions(-)
 rename {qlib/tests => tests}/__init__.py (100%)
 rename {qlib/tests => tests}/config.py (100%)
 rename {qlib/tests => tests}/data.py (100%)

diff --git a/docs/component/rl/framework.rst b/docs/component/rl/framework.rst
index 1a15450532..e65bffae3e 100644
--- a/docs/component/rl/framework.rst
+++ b/docs/component/rl/framework.rst
@@ -3,7 +3,7 @@ The Framework of QlibRL
 
 QlibRL contains a full set of components that cover the entire lifecycle of an RL pipeline, including building the simulator of the market, shaping states & actions, training policies (strategies), and backtesting strategies in the simulated environment.
 
-QlibRL is basically implemented with the support of Tianshou and Gym frameworks. The high-level structure of QlibRL is demonstrated below:
+QlibRL is basically implemented with the support of Tianshou and Gymnasium frameworks. The high-level structure of QlibRL is demonstrated below:
 
 .. image:: ../../_static/img/QlibRL_framework.png
    :width: 600
@@ -15,7 +15,7 @@ EnvWrapper
 ------------
 EnvWrapper is the complete capsulation of the simulated environment. It receives actions from outside (policy/strategy/agent), simulates the changes in the market, and then replies rewards and updated states, thus forming an interaction loop.
 
-In QlibRL, EnvWrapper is a subclass of gym.Env, so it implements all necessary interfaces of gym.Env. Any classes or pipelines that accept gym.Env should also accept EnvWrapper. Developers do not need to implement their own EnvWrapper to build their own environment. Instead, they only need to implement 4 components of the EnvWrapper:
+In QlibRL, EnvWrapper is a subclass of gymnasium.Env, so it implements all necessary interfaces of gymnasium.Env. Any classes or pipelines that accept gymnasium.Env should also accept EnvWrapper. Developers do not need to implement their own EnvWrapper to build their own environment. Instead, they only need to implement 4 components of the EnvWrapper:
 
 - `Simulator`
     The simulator is the core component responsible for the environment simulation. Developers could implement all the logic that is directly related to the environment simulation in the Simulator in any way they like. In QlibRL, there are already two implementations of Simulator for single asset trading: 1) ``SingleAssetOrderExecution``, which is built based on Qlib's backtest toolkits and hence considers a lot of practical trading details but is slow. 2) ``SimpleSingleAssetOrderExecution``, which is built based on a simplified trading simulator, which ignores a lot of details (e.g. trading limitations, rounding) but is quite fast.
diff --git a/pyproject.toml b/pyproject.toml
index fe48d090c0..2d43a9a98a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,7 +45,7 @@ dependencies = [
   "pymongo",
   "loguru",
   "lightgbm",
-  "gym",
+  "gymnasium",
   "cvxpy",
   "joblib",
   "matplotlib",
diff --git a/qlib/rl/interpreter.py b/qlib/rl/interpreter.py
index 5c9cc26c4e..61717087e5 100644
--- a/qlib/rl/interpreter.py
+++ b/qlib/rl/interpreter.py
@@ -5,9 +5,9 @@
 
 from typing import Any, Generic, TypeVar
 
-import gym
+import gymnasium as gym  
 import numpy as np
-from gym import spaces
+from gymnasium import spaces
 
 from qlib.typehint import final
 from .simulator import ActType, StateType
diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py
index 01b0811530..5ab98a737a 100644
--- a/qlib/rl/order_execution/interpreter.py
+++ b/qlib/rl/order_execution/interpreter.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import pandas as pd
-from gym import spaces
+from gymnasium import spaces
 
 from qlib.constant import EPS
 from qlib.rl.data.base import ProcessedDataProvider
diff --git a/qlib/rl/order_execution/policy.py b/qlib/rl/order_execution/policy.py
index a46b587aa1..53bc51ef2c 100644
--- a/qlib/rl/order_execution/policy.py
+++ b/qlib/rl/order_execution/policy.py
@@ -6,11 +6,11 @@
 from pathlib import Path
 from typing import Any, Dict, Generator, Iterable, Optional, OrderedDict, Tuple, cast
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
-from gym.spaces import Discrete
+from gymnasium.spaces import Discrete
 from tianshou.data import Batch, ReplayBuffer, to_torch
 from tianshou.policy import BasePolicy, PPOPolicy, DQNPolicy
 
diff --git a/qlib/rl/utils/env_wrapper.py b/qlib/rl/utils/env_wrapper.py
index e863b709a1..51634b6e64 100644
--- a/qlib/rl/utils/env_wrapper.py
+++ b/qlib/rl/utils/env_wrapper.py
@@ -6,8 +6,8 @@
 import weakref
 from typing import Any, Callable, cast, Dict, Generic, Iterable, Iterator, Optional, Tuple
 
-import gym
-from gym import Space
+import gymnasium as gym
+from gymnasium import Space
 
 from qlib.rl.aux_info import AuxiliaryInfoCollector
 from qlib.rl.interpreter import ActionInterpreter, ObsType, PolicyActType, StateInterpreter
diff --git a/qlib/rl/utils/finite_env.py b/qlib/rl/utils/finite_env.py
index 87f0900e16..a1faa4b23e 100644
--- a/qlib/rl/utils/finite_env.py
+++ b/qlib/rl/utils/finite_env.py
@@ -13,7 +13,7 @@
 from contextlib import contextmanager
 from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Type, Union, cast
 
-import gym
+import gymnasium as gym
 import numpy as np
 from tianshou.env import BaseVectorEnv, DummyVectorEnv, ShmemVectorEnv, SubprocVectorEnv
 
diff --git a/scripts/collect_info.py b/scripts/collect_info.py
index 9e7a6395ef..6cae858798 100644
--- a/scripts/collect_info.py
+++ b/scripts/collect_info.py
@@ -45,7 +45,7 @@ def qlib(self):
             "pymongo",
             "loguru",
             "lightgbm",
-            "gym",
+            "gymnasium",
             "cvxpy",
             "joblib",
             "matplotlib",
diff --git a/qlib/tests/__init__.py b/tests/__init__.py
similarity index 100%
rename from qlib/tests/__init__.py
rename to tests/__init__.py
diff --git a/qlib/tests/config.py b/tests/config.py
similarity index 100%
rename from qlib/tests/config.py
rename to tests/config.py
diff --git a/qlib/tests/data.py b/tests/data.py
similarity index 100%
rename from qlib/tests/data.py
rename to tests/data.py
diff --git a/tests/rl/test_finite_env.py b/tests/rl/test_finite_env.py
index d6f2a2ec95..579ddcae96 100644
--- a/tests/rl/test_finite_env.py
+++ b/tests/rl/test_finite_env.py
@@ -3,7 +3,7 @@
 
 from collections import Counter
 
-import gym
+import gymnasium as gym
 import numpy as np
 from tianshou.data import Batch, Collector
 from tianshou.policy import BasePolicy
diff --git a/tests/rl/test_logger.py b/tests/rl/test_logger.py
index e100e5046b..4429b58359 100644
--- a/tests/rl/test_logger.py
+++ b/tests/rl/test_logger.py
@@ -7,10 +7,10 @@
 import re
 from typing import Any, Tuple
 
-import gym
+import gymnasium as gym
 import numpy as np
 import pandas as pd
-from gym import spaces
+from gymnasium import spaces
 from tianshou.data import Collector, Batch
 from tianshou.policy import BasePolicy
 
diff --git a/tests/rl/test_trainer.py b/tests/rl/test_trainer.py
index f842d9781b..e541c5c71d 100644
--- a/tests/rl/test_trainer.py
+++ b/tests/rl/test_trainer.py
@@ -7,7 +7,7 @@
 
 import torch
 import torch.nn as nn
-from gym import spaces
+from gymnasium import spaces
 from tianshou.policy import PPOPolicy
 
 from qlib.config import C