From c62b23f019b59e0963454c798d2a88e426657448 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <github@benfrederickson.com>
Date: Fri, 29 Sep 2023 12:00:27 -0700
Subject: [PATCH] Use np.random.Generator for rng (#694)

* Use np.random.Generator for rng

Replace RandomState with random.Generator for generating random numbers.
The RandomState is marked as a legacy api, and will have no further improvements
made to it. The Generator API also gives us some extra features, like
the ability to specify dtypes rather than converting after the fact etc.

* spelling

* windows fix
---
 implicit/cpu/als.py                       |  6 +++---
 implicit/cpu/bpr.pyx                      |  8 ++++----
 implicit/cpu/lmf.pyx                      |  6 +++---
 implicit/evaluation.pyx                   |  2 +-
 implicit/gpu/bpr.py                       |  8 ++++----
 implicit/gpu/matrix_factorization_base.py |  6 +++++-
 implicit/utils.py                         | 13 +++++++------
 pyproject.toml                            |  2 --
 setup.py                                  |  2 +-
 tests/als_test.py                         |  2 +-
 tests/bpr_test.py                         |  2 +-
 tests/gpu_test.py                         |  9 +++++----
 12 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/implicit/cpu/als.py b/implicit/cpu/als.py
index 2c8472fa..1ec7a919 100644
--- a/implicit/cpu/als.py
+++ b/implicit/cpu/als.py
@@ -45,7 +45,7 @@ class AlternatingLeastSquares(MatrixFactorizationBase):
     num_threads : int, optional
         The number of threads to use for fitting the model and batch recommend calls.
         Specifying 0 means to default to the number of cores on the machine.
-    random_state : int, numpy.random.RandomState or None, optional
+    random_state : int, numpy.random.RandomState, np.random.Generator or None, optional
         The random state for seeding the initial item and user factors.
         Default is None.
 
@@ -141,9 +141,9 @@ def fit(self, user_items, show_progress=True, callback=None):
         s = time.time()
         # Initialize the variables randomly if they haven't already been set
         if self.user_factors is None:
-            self.user_factors = random_state.rand(users, self.factors).astype(self.dtype) * 0.01
+            self.user_factors = random_state.random((users, self.factors), dtype=self.dtype) * 0.01
         if self.item_factors is None:
-            self.item_factors = random_state.rand(items, self.factors).astype(self.dtype) * 0.01
+            self.item_factors = random_state.random((items, self.factors), dtype=self.dtype) * 0.01
 
         log.debug("Initialized factors in %s", time.time() - s)
 
diff --git a/implicit/cpu/bpr.pyx b/implicit/cpu/bpr.pyx
index 2349b260..16dd2a69 100644
--- a/implicit/cpu/bpr.pyx
+++ b/implicit/cpu/bpr.pyx
@@ -93,7 +93,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
     num_threads : int, optional
         The number of threads to use for fitting the model and batch recommend calls.
         Specifying 0 means to default to the number of cores on the machine.
-    random_state : int, RandomState or None, optional
+    random_state : int, RandomState, Generator or None, optional
         The random state for seeding the initial item and user factors.
         Default is None.
 
@@ -156,7 +156,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
         # Note: the final dimension is for the item bias term - which is set to a 1 for all users
         # this simplifies interfacing with approximate nearest neighbours libraries etc
         if self.item_factors is None:
-            self.item_factors = (rs.rand(items, self.factors + 1).astype(self.dtype) - .5)
+            self.item_factors = (rs.random((items, self.factors + 1), dtype=self.dtype) - .5)
             self.item_factors /= self.factors
 
             # set factors to all zeros for items without any ratings
@@ -164,7 +164,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
             self.item_factors[item_counts == 0] = np.zeros(self.factors + 1)
 
         if self.user_factors is None:
-            self.user_factors = (rs.rand(users, self.factors + 1).astype(self.dtype) - .5)
+            self.user_factors = (rs.random((users, self.factors + 1), dtype=self.dtype) - .5)
             self.user_factors /= self.factors
 
             # set factors to all zeros for users without any ratings
@@ -183,7 +183,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
             num_threads = multiprocessing.cpu_count()
 
         # initialize RNG's, one per thread. Also pass the seeds for each thread's RNG
-        cdef long[:] rng_seeds = rs.randint(0, 2**31, size=num_threads)
+        cdef long[:] rng_seeds = rs.integers(0, 2**31, size=num_threads, dtype="long")
         cdef RNGVector rng = RNGVector(num_threads, len(user_items.data) - 1, rng_seeds)
 
         log.debug("Running %i BPR training epochs", self.iterations)
diff --git a/implicit/cpu/lmf.pyx b/implicit/cpu/lmf.pyx
index 4c45c48c..7349470c 100644
--- a/implicit/cpu/lmf.pyx
+++ b/implicit/cpu/lmf.pyx
@@ -151,14 +151,14 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
         # user_factors[-2] = user bias, item factors[-1] = item bias
         # This significantly simplifies both training, and serving
         if self.item_factors is None:
-            self.item_factors = rs.normal(size=(items, self.factors + 2)).astype(np.float32)
+            self.item_factors = rs.standard_normal(size=(items, self.factors + 2), dtype=np.float32)
             self.item_factors[:, -1] = 1.0
 
             # set factors to all zeros for items without any ratings
             self.item_factors[item_counts == 0] = np.zeros(self.factors + 2)
 
         if self.user_factors is None:
-            self.user_factors = rs.normal(size=(users, self.factors + 2)).astype(np.float32)
+            self.user_factors = rs.standard_normal(size=(users, self.factors + 2), dtype=np.float32)
             self.user_factors[:, -2] = 1.0
 
             # set factors to all zeros for users without any ratings
@@ -173,7 +173,7 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
             num_threads = multiprocessing.cpu_count()
 
         # initialize RNG's, one per thread. Also pass the seeds for each thread's RNG
-        cdef long[:] rng_seeds = rs.randint(0, 2**31, size=num_threads)
+        cdef long[:] rng_seeds = rs.integers(0, 2**31, size=num_threads, dtype="long")
         cdef RNGVector rng = RNGVector(num_threads, len(user_items.data) - 1, rng_seeds)
 
         log.debug("Running %i LMF training epochs", self.iterations)
diff --git a/implicit/evaluation.pyx b/implicit/evaluation.pyx
index bac38dc7..bd590401 100644
--- a/implicit/evaluation.pyx
+++ b/implicit/evaluation.pyx
@@ -30,7 +30,7 @@ def train_test_split(ratings, train_percentage=0.8, random_state=None):
 
     ratings = ratings.tocoo()
     random_state = check_random_state(random_state)
-    random_index = random_state.random_sample(len(ratings.data))
+    random_index = random_state.random(len(ratings.data))
     train_index = random_index < train_percentage
     test_index = random_index >= train_percentage
 
diff --git a/implicit/gpu/bpr.py b/implicit/gpu/bpr.py
index 44a16ec3..cffc471e 100644
--- a/implicit/gpu/bpr.py
+++ b/implicit/gpu/bpr.py
@@ -33,7 +33,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
         When sampling negative items, check if the randomly picked negative item has actually
         been liked by the user. This check increases the time needed to train but usually leads
         to better predictions.
-    random_state : int, RandomState or None, optional
+    random_state : int, RandomState, Generator or None, optional
         The random state for seeding the initial item and user factors.
         Default is None.
 
@@ -103,7 +103,7 @@ def fit(self, user_items, show_progress=True, callback=None):
         # Note: the final dimension is for the item bias term - which is set to a 1 for all users
         # this simplifies interfacing with approximate nearest neighbours libraries etc
         if self.item_factors is None:
-            item_factors = rs.rand(items, self.factors + 1).astype("float32") - 0.5
+            item_factors = rs.random((items, self.factors + 1), "float32") - 0.5
             item_factors /= self.factors
 
             # set factors to all zeros for items without any ratings
@@ -112,7 +112,7 @@ def fit(self, user_items, show_progress=True, callback=None):
             self.item_factors = implicit.gpu.Matrix(item_factors)
 
         if self.user_factors is None:
-            user_factors = rs.rand(users, self.factors + 1).astype("float32") - 0.5
+            user_factors = rs.random((users, self.factors + 1), "float32") - 0.5
             user_factors /= self.factors
 
             # set factors to all zeros for users without any ratings
@@ -142,7 +142,7 @@ def fit(self, user_items, show_progress=True, callback=None):
                     Y,
                     self.learning_rate,
                     self.regularization,
-                    rs.randint(2**31),
+                    rs.integers(2**31),
                     self.verify_negative_samples,
                 )
                 progress.update(1)
diff --git a/implicit/gpu/matrix_factorization_base.py b/implicit/gpu/matrix_factorization_base.py
index 057f74af..4b5933ca 100644
--- a/implicit/gpu/matrix_factorization_base.py
+++ b/implicit/gpu/matrix_factorization_base.py
@@ -239,7 +239,7 @@ def check_random_state(random_state):
 
     Parameters
     ----------
-    random_state : int, None or RandomState
+    random_state : int, None, np.random.RandomState or np.random.Generator
         The existing RandomState. If None, or an int, will be used
         to seed a new curand RandomState generator
     """
@@ -247,6 +247,10 @@ def check_random_state(random_state):
         # we need to convert from numpy random state our internal random state
         return implicit.gpu.RandomState(random_state.randint(2**31))
 
+    if isinstance(random_state, np.random.Generator):
+        # we need to convert from numpy random state our internal random state
+        return implicit.gpu.RandomState(random_state.integers(2**31))
+
     # otherwise try to initialize a new one, and let it fail through
     # on the numpy side if it doesn't work
     return implicit.gpu.RandomState(random_state or int(time.time()))
diff --git a/implicit/utils.py b/implicit/utils.py
index 3c213c29..4e86ee71 100644
--- a/implicit/utils.py
+++ b/implicit/utils.py
@@ -65,21 +65,22 @@ def check_blas_config():
 def check_random_state(random_state):
     """Validate the random state.
 
-    Check a random seed or existing numpy RandomState
-    and get back an initialized RandomState.
+    Check a random seed or existing numpy rng
+    and get back an initialized numpy.randon.Generator
 
     Parameters
     ----------
-    random_state : int, None or RandomState
+    random_state : int, None, np.random.RandomState or np.random.Generator
         The existing RandomState. If None, or an int, will be used
         to seed a new numpy RandomState.
     """
-    # if it's an existing random state, pass through
+    # backwards compatibility
     if isinstance(random_state, np.random.RandomState):
-        return random_state
+        return np.random.default_rng(random_state.rand_int(2**31))
+
     # otherwise try to initialize a new one, and let it fail through
     # on the numpy side if it doesn't work
-    return np.random.RandomState(random_state)
+    return np.random.default_rng(random_state)
 
 
 def augment_inner_product_matrix(factors):
diff --git a/pyproject.toml b/pyproject.toml
index 030dac37..5e95e224 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,5 @@ CUDACXX = "/usr/local/cuda/bin/nvcc"
 [tool.cibuildwheel.macos]
 archs = ["x86_64", "universal2", "arm64"]
 
-
-
 [tool.pytest.ini_options]
 filterwarnings = ['ignore::implicit.utils.ParameterWarning']
diff --git a/setup.py b/setup.py
index c348d9cd..c0b5e3ea 100644
--- a/setup.py
+++ b/setup.py
@@ -44,6 +44,6 @@ def exclude_non_implicit_cmake_files(cmake_manifest):
         "Collaborative Filtering, Recommender Systems"
     ),
     packages=find_packages(),
-    install_requires=["numpy", "scipy>=0.16", "tqdm>=4.27", "threadpoolctl"],
+    install_requires=["numpy>=1.17.0", "scipy>=0.16", "tqdm>=4.27", "threadpoolctl"],
     cmake_process_manifest_hook=exclude_non_implicit_cmake_files,
 )
diff --git a/tests/als_test.py b/tests/als_test.py
index a29c1056..c8526a83 100644
--- a/tests/als_test.py
+++ b/tests/als_test.py
@@ -179,7 +179,7 @@ def test_factorize(use_native, use_gpu, use_cg, dtype):
     reconstructed = rows.dot(cols.T)
     for i in range(counts.shape[0]):
         for j in range(counts.shape[1]):
-            assert pytest.approx(counts[i, j], abs=1e-4) == reconstructed[i, j], (
+            assert pytest.approx(counts[i, j], abs=1e-3) == reconstructed[i, j], (
                 "failed to reconstruct row=%s, col=%s,"
                 " value=%.5f, dtype=%s, cg=%s, native=%s gpu=%s"
                 % (i, j, reconstructed[i, j], dtype, use_cg, use_native, use_gpu)
diff --git a/tests/bpr_test.py b/tests/bpr_test.py
index 9c7f3eb3..a2b1b6cd 100644
--- a/tests/bpr_test.py
+++ b/tests/bpr_test.py
@@ -22,7 +22,7 @@ def _get_model(self):
                 factors=3,
                 regularization=0,
                 use_gpu=True,
-                learning_rate=0.05,
+                learning_rate=0.1,
                 random_state=42,
             )
 
diff --git a/tests/gpu_test.py b/tests/gpu_test.py
index 5012f7f5..4d208517 100644
--- a/tests/gpu_test.py
+++ b/tests/gpu_test.py
@@ -25,9 +25,10 @@ def test_topk_ascending(k, batch, temp_memory):
 def test_topk_random(k, batch, temp_memory):
     num_items = 1000
     factors = 10
-    np.random.seed(0)
-    items = np.random.uniform(size=(num_items, factors)).astype("float32")
-    queries = np.random.uniform(size=(batch, factors)).astype("float32")
+
+    rs = np.random.default_rng(0)
+    items = rs.random(size=(num_items, factors), dtype="float32")
+    queries = rs.random(size=(batch, factors), dtype="float32")
     _check_knn_queries(items, queries, k, max_temp_memory=temp_memory)
 
 
@@ -46,8 +47,8 @@ def _check_knn_queries(items, queries, k=5, max_temp_memory=500_000_000):
         exact_distances[r] = batch[r][exact_ids[r]]
 
     # make sure that we match
-    assert_array_equal(ids, exact_ids)
     assert_allclose(distances, exact_distances, rtol=1e-06)
+    assert_array_equal(ids, exact_ids)
 
 
 @pytest.mark.skipif(not implicit.gpu.HAS_CUDA, reason="needs cuda build")