Skip to content

Commit e0cda88

Browse files
authored
[python-package] remove uses of deprecated NumPy random number generation APIs, require 'numpy>=1.17.0' (#6468)
1 parent ebac9e8 commit e0cda88

File tree

10 files changed

+223
-218
lines changed

10 files changed

+223
-218
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ python-package/lightgbm/VERSION.txt
405405

406406
# R build artefacts
407407
**/autom4te.cache/
408-
conftest*
408+
R-package/conftest*
409409
R-package/config.status
410410
!R-package/data/agaricus.test.rda
411411
!R-package/data/agaricus.train.rda

docs/Python-Intro.rst

+9-5
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,9 @@ Many of the examples in this page use functionality from ``numpy``. To run the e
5959

6060
.. code:: python
6161
62-
data = np.random.rand(500, 10) # 500 entities, each contains 10 features
63-
label = np.random.randint(2, size=500) # binary target
62+
rng = np.random.default_rng()
63+
data = rng.uniform(size=(500, 10)) # 500 entities, each contains 10 features
64+
label = rng.integers(low=0, high=2, size=(500, )) # binary target
6465
train_data = lgb.Dataset(data, label=label)
6566
6667
**To load a scipy.sparse.csr\_matrix array into Dataset:**
@@ -139,15 +140,17 @@ It doesn't need to convert to one-hot encoding, and is much faster than one-hot
139140

140141
.. code:: python
141142
142-
w = np.random.rand(500, )
143+
rng = np.random.default_rng()
144+
w = rng.uniform(size=(500, ))
143145
train_data = lgb.Dataset(data, label=label, weight=w)
144146
145147
or
146148

147149
.. code:: python
148150
149151
train_data = lgb.Dataset(data, label=label)
150-
w = np.random.rand(500, )
152+
rng = np.random.default_rng()
153+
w = rng.uniform(size=(500, ))
151154
train_data.set_weight(w)
152155
153156
And you can use ``Dataset.set_init_score()`` to set initial score, and ``Dataset.set_group()`` to set group/query data for ranking tasks.
@@ -249,7 +252,8 @@ A model that has been trained or loaded can perform predictions on datasets:
249252
.. code:: python
250253
251254
# 7 entities, each contains 10 features
252-
data = np.random.rand(7, 10)
255+
rng = np.random.default_rng()
256+
data = rng.uniform(size=(7, 10))
253257
ypred = bst.predict(data)
254258
255259
If early stopping is enabled during training, you can get predictions from the best iteration with ``bst.best_iteration``:

examples/python-guide/logistic_regression.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@
2222
#################
2323
# Simulate some binary data with a single categorical and
2424
# single continuous predictor
25-
np.random.seed(0)
25+
rng = np.random.default_rng(seed=0)
2626
N = 1000
2727
X = pd.DataFrame({"continuous": range(N), "categorical": np.repeat([0, 1, 2, 3, 4], N / 5)})
2828
CATEGORICAL_EFFECTS = [-1, -1, -2, -2, 2]
2929
LINEAR_TERM = np.array(
3030
[-0.5 + 0.01 * X["continuous"][k] + CATEGORICAL_EFFECTS[X["categorical"][k]] for k in range(X.shape[0])]
31-
) + np.random.normal(0, 1, X.shape[0])
31+
) + rng.normal(loc=0, scale=1, size=X.shape[0])
3232
TRUE_PROB = expit(LINEAR_TERM)
33-
Y = np.random.binomial(1, TRUE_PROB, size=N)
33+
Y = rng.binomial(n=1, p=TRUE_PROB, size=N)
3434
DATA = {
3535
"X": X,
3636
"probability_labels": TRUE_PROB,
@@ -65,10 +65,9 @@ def experiment(objective, label_type, data):
6565
result : dict
6666
Experiment summary stats.
6767
"""
68-
np.random.seed(0)
6968
nrounds = 5
7069
lgb_data = data[f"lgb_with_{label_type}_labels"]
71-
params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1}
70+
params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1, "seed": 123}
7271
time_zero = time.time()
7372
gbm = lgb.train(params, lgb_data, num_boost_round=nrounds)
7473
y_fitted = gbm.predict(data["X"])

python-package/lightgbm/compat.py

-12
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,6 @@ def __init__(self, *args: Any, **kwargs: Any):
3737

3838
concat = None
3939

40-
"""numpy"""
41-
try:
42-
from numpy.random import Generator as np_random_Generator
43-
except ImportError:
44-
45-
class np_random_Generator: # type: ignore
46-
"""Dummy class for np.random.Generator."""
47-
48-
def __init__(self, *args: Any, **kwargs: Any):
49-
pass
50-
51-
5240
"""matplotlib"""
5341
try:
5442
import matplotlib # noqa: F401

python-package/lightgbm/sklearn.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
_LGBMModelBase,
4242
_LGBMRegressorBase,
4343
dt_DataTable,
44-
np_random_Generator,
4544
pd_DataFrame,
4645
)
4746
from .engine import train
@@ -476,7 +475,7 @@ def __init__(
476475
colsample_bytree: float = 1.0,
477476
reg_alpha: float = 0.0,
478477
reg_lambda: float = 0.0,
479-
random_state: Optional[Union[int, np.random.RandomState, "np.random.Generator"]] = None,
478+
random_state: Optional[Union[int, np.random.RandomState, np.random.Generator]] = None,
480479
n_jobs: Optional[int] = None,
481480
importance_type: str = "split",
482481
**kwargs: Any,
@@ -739,7 +738,7 @@ def _process_params(self, stage: str) -> Dict[str, Any]:
739738

740739
if isinstance(params["random_state"], np.random.RandomState):
741740
params["random_state"] = params["random_state"].randint(np.iinfo(np.int32).max)
742-
elif isinstance(params["random_state"], np_random_Generator):
741+
elif isinstance(params["random_state"], np.random.Generator):
743742
params["random_state"] = int(params["random_state"].integers(np.iinfo(np.int32).max))
744743
if self._n_classes > 2:
745744
for alias in _ConfigAliases.get("num_class"):

python-package/pyproject.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ classifiers = [
1919
"Topic :: Scientific/Engineering :: Artificial Intelligence"
2020
]
2121
dependencies = [
22-
"numpy",
22+
"numpy>=1.17.0",
2323
"scipy"
2424
]
2525
description = "LightGBM Python Package"
@@ -156,6 +156,8 @@ select = [
156156
"E",
157157
# pyflakes
158158
"F",
159+
# NumPy-specific rules
160+
"NPY",
159161
# pylint
160162
"PL",
161163
# flake8-return: unnecessary assignment before return

tests/python_package_test/conftest.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import numpy as np
2+
import pytest
3+
4+
5+
@pytest.fixture(scope="function")
6+
def rng():
7+
return np.random.default_rng()
8+
9+
10+
@pytest.fixture(scope="function")
11+
def rng_fixed_seed():
12+
return np.random.default_rng(seed=42)

0 commit comments

Comments
 (0)