From 0bc1a2fdb4db9f2154657f2ec4f2e033d8f3dd96 Mon Sep 17 00:00:00 2001 From: Lixuyizhi Date: Sun, 28 Sep 2025 10:03:20 +0800 Subject: [PATCH] fix: resolve backtest module crash due to Gym and NumPy 2.0+ incompatibility --- docs/component/rl/framework.rst | 4 ++-- pyproject.toml | 2 +- qlib/rl/interpreter.py | 4 ++-- qlib/rl/order_execution/interpreter.py | 2 +- qlib/rl/order_execution/policy.py | 4 ++-- qlib/rl/utils/env_wrapper.py | 4 ++-- qlib/rl/utils/finite_env.py | 2 +- scripts/collect_info.py | 2 +- {qlib/tests => tests}/__init__.py | 0 {qlib/tests => tests}/config.py | 0 {qlib/tests => tests}/data.py | 0 tests/rl/test_finite_env.py | 2 +- tests/rl/test_logger.py | 4 ++-- tests/rl/test_trainer.py | 2 +- 14 files changed, 16 insertions(+), 16 deletions(-) rename {qlib/tests => tests}/__init__.py (100%) rename {qlib/tests => tests}/config.py (100%) rename {qlib/tests => tests}/data.py (100%) diff --git a/docs/component/rl/framework.rst b/docs/component/rl/framework.rst index 1a15450532..e65bffae3e 100644 --- a/docs/component/rl/framework.rst +++ b/docs/component/rl/framework.rst @@ -3,7 +3,7 @@ The Framework of QlibRL QlibRL contains a full set of components that cover the entire lifecycle of an RL pipeline, including building the simulator of the market, shaping states & actions, training policies (strategies), and backtesting strategies in the simulated environment. -QlibRL is basically implemented with the support of Tianshou and Gym frameworks. The high-level structure of QlibRL is demonstrated below: +QlibRL is basically implemented with the support of Tianshou and Gymnasium frameworks. The high-level structure of QlibRL is demonstrated below: .. image:: ../../_static/img/QlibRL_framework.png :width: 600 @@ -15,7 +15,7 @@ EnvWrapper ------------ EnvWrapper is the complete capsulation of the simulated environment. It receives actions from outside (policy/strategy/agent), simulates the changes in the market, and then replies rewards and updated states, thus forming an interaction loop. -In QlibRL, EnvWrapper is a subclass of gym.Env, so it implements all necessary interfaces of gym.Env. Any classes or pipelines that accept gym.Env should also accept EnvWrapper. Developers do not need to implement their own EnvWrapper to build their own environment. Instead, they only need to implement 4 components of the EnvWrapper: +In QlibRL, EnvWrapper is a subclass of gymnasium.Env, so it implements all necessary interfaces of gymnasium.Env. Any classes or pipelines that accept gymnasium.Env should also accept EnvWrapper. Developers do not need to implement their own EnvWrapper to build their own environment. Instead, they only need to implement 4 components of the EnvWrapper: - `Simulator` The simulator is the core component responsible for the environment simulation. Developers could implement all the logic that is directly related to the environment simulation in the Simulator in any way they like. In QlibRL, there are already two implementations of Simulator for single asset trading: 1) ``SingleAssetOrderExecution``, which is built based on Qlib's backtest toolkits and hence considers a lot of practical trading details but is slow. 2) ``SimpleSingleAssetOrderExecution``, which is built based on a simplified trading simulator, which ignores a lot of details (e.g. trading limitations, rounding) but is quite fast. diff --git a/pyproject.toml b/pyproject.toml index fe48d090c0..2d43a9a98a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ dependencies = [ "pymongo", "loguru", "lightgbm", - "gym", + "gymnasium", "cvxpy", "joblib", "matplotlib", diff --git a/qlib/rl/interpreter.py b/qlib/rl/interpreter.py index 5c9cc26c4e..61717087e5 100644 --- a/qlib/rl/interpreter.py +++ b/qlib/rl/interpreter.py @@ -5,9 +5,9 @@ from typing import Any, Generic, TypeVar -import gym +import gymnasium as gym import numpy as np -from gym import spaces +from gymnasium import spaces from qlib.typehint import final from .simulator import ActType, StateType diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py index 01b0811530..5ab98a737a 100644 --- a/qlib/rl/order_execution/interpreter.py +++ b/qlib/rl/order_execution/interpreter.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from gym import spaces +from gymnasium import spaces from qlib.constant import EPS from qlib.rl.data.base import ProcessedDataProvider diff --git a/qlib/rl/order_execution/policy.py b/qlib/rl/order_execution/policy.py index a46b587aa1..53bc51ef2c 100644 --- a/qlib/rl/order_execution/policy.py +++ b/qlib/rl/order_execution/policy.py @@ -6,11 +6,11 @@ from pathlib import Path from typing import Any, Dict, Generator, Iterable, Optional, OrderedDict, Tuple, cast -import gym +import gymnasium as gym import numpy as np import torch import torch.nn as nn -from gym.spaces import Discrete +from gymnasium.spaces import Discrete from tianshou.data import Batch, ReplayBuffer, to_torch from tianshou.policy import BasePolicy, PPOPolicy, DQNPolicy diff --git a/qlib/rl/utils/env_wrapper.py b/qlib/rl/utils/env_wrapper.py index e863b709a1..51634b6e64 100644 --- a/qlib/rl/utils/env_wrapper.py +++ b/qlib/rl/utils/env_wrapper.py @@ -6,8 +6,8 @@ import weakref from typing import Any, Callable, cast, Dict, Generic, Iterable, Iterator, Optional, Tuple -import gym -from gym import Space +import gymnasium as gym +from gymnasium import Space from qlib.rl.aux_info import AuxiliaryInfoCollector from qlib.rl.interpreter import ActionInterpreter, ObsType, PolicyActType, StateInterpreter diff --git a/qlib/rl/utils/finite_env.py b/qlib/rl/utils/finite_env.py index 87f0900e16..a1faa4b23e 100644 --- a/qlib/rl/utils/finite_env.py +++ b/qlib/rl/utils/finite_env.py @@ -13,7 +13,7 @@ from contextlib import contextmanager from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Type, Union, cast -import gym +import gymnasium as gym import numpy as np from tianshou.env import BaseVectorEnv, DummyVectorEnv, ShmemVectorEnv, SubprocVectorEnv diff --git a/scripts/collect_info.py b/scripts/collect_info.py index 9e7a6395ef..6cae858798 100644 --- a/scripts/collect_info.py +++ b/scripts/collect_info.py @@ -45,7 +45,7 @@ def qlib(self): "pymongo", "loguru", "lightgbm", - "gym", + "gymnasium", "cvxpy", "joblib", "matplotlib", diff --git a/qlib/tests/__init__.py b/tests/__init__.py similarity index 100% rename from qlib/tests/__init__.py rename to tests/__init__.py diff --git a/qlib/tests/config.py b/tests/config.py similarity index 100% rename from qlib/tests/config.py rename to tests/config.py diff --git a/qlib/tests/data.py b/tests/data.py similarity index 100% rename from qlib/tests/data.py rename to tests/data.py diff --git a/tests/rl/test_finite_env.py b/tests/rl/test_finite_env.py index d6f2a2ec95..579ddcae96 100644 --- a/tests/rl/test_finite_env.py +++ b/tests/rl/test_finite_env.py @@ -3,7 +3,7 @@ from collections import Counter -import gym +import gymnasium as gym import numpy as np from tianshou.data import Batch, Collector from tianshou.policy import BasePolicy diff --git a/tests/rl/test_logger.py b/tests/rl/test_logger.py index e100e5046b..4429b58359 100644 --- a/tests/rl/test_logger.py +++ b/tests/rl/test_logger.py @@ -7,10 +7,10 @@ import re from typing import Any, Tuple -import gym +import gymnasium as gym import numpy as np import pandas as pd -from gym import spaces +from gymnasium import spaces from tianshou.data import Collector, Batch from tianshou.policy import BasePolicy diff --git a/tests/rl/test_trainer.py b/tests/rl/test_trainer.py index f842d9781b..e541c5c71d 100644 --- a/tests/rl/test_trainer.py +++ b/tests/rl/test_trainer.py @@ -7,7 +7,7 @@ import torch import torch.nn as nn -from gym import spaces +from gymnasium import spaces from tianshou.policy import PPOPolicy from qlib.config import C