diff --git a/datasketch/minhash.py b/datasketch/minhash.py index 31422986..6fe9f81d 100644 --- a/datasketch/minhash.py +++ b/datasketch/minhash.py @@ -48,7 +48,7 @@ def __init__(self, num_perm=128, seed=1, hashobj=sha1, if hashvalues is not None: self.hashvalues = hashvalues else: - self.hashvalues = np.array([_max_hash for _ in range(num_perm)]) + self.hashvalues = np.array([_max_hash for _ in range(num_perm)], dtype=np.int) # Initalize permutation function parameters if permutations is not None: self.permutations = permutations @@ -88,7 +88,7 @@ def clear(self): ''' Clear the current state of the Minhash. ''' - self.hashvalues = np.array([_max_hash for _ in range(num_perm)]) + self.hashvalues = np.array([_max_hash for _ in range(num_perm)], dtype=np.int) def copy(self): ''' diff --git a/datasketch/weighted_minhash.py b/datasketch/weighted_minhash.py index 336218f7..0497e693 100644 --- a/datasketch/weighted_minhash.py +++ b/datasketch/weighted_minhash.py @@ -87,7 +87,7 @@ def minhash(self, v): raise ValueError("Input dimension mismatch, expecting %d" % self.dim) if not isinstance(v, np.ndarray): v = np.array(v) - hashvalues = np.zeros((self.sample_size, 2)) + hashvalues = np.zeros((self.sample_size, 2), dtype=np.int) for i in range(self.sample_size): t = np.floor((np.log(v) / self.rs[i]) + self.betas[i]) ln_y = (t - self.betas[i]) * self.rs[i] diff --git a/test/weighted_minhash_test.py b/test/weighted_minhash_test.py index 5046931f..f384909b 100644 --- a/test/weighted_minhash_test.py +++ b/test/weighted_minhash_test.py @@ -28,6 +28,7 @@ def test_minhash(self): self.assertIsInstance(m, WeightedMinHash) self.assertEqual(len(m.hashvalues), 4) self.assertEqual(len(m), 4) + self.assertTrue(m.hashvalues.dtype == np.int) if __name__ == "__main__": unittest.main()