diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7baeb59e..fd1768f0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,14 +48,14 @@ repos: # requirements.txt h5py>=2.10.0, wheel>=0.33.1, - numpy>=1.22.0, + numpy<2.0.0, pandas>=1.1.2, python-dateutil>=2.7.5, pytz>=2020.1, pyarrow>=1.0.1, chardet>=3.0.4, fastavro>=1.0.0.post1, - python-snappy>=0.5.4, + python-snappy>=0.7.1, charset-normalizer>=1.3.6, psutil>=4.0.0, scipy>=1.4.1, @@ -80,7 +80,7 @@ repos: # requirements-ml.txt scikit-learn>=0.23.2, - 'keras>=2.4.3,<3.0.0', + 'keras>=2.4.3,<=3.4.0', rapidfuzz>=2.6.1, "tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin'", "tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64'", @@ -108,7 +108,7 @@ repos: rev: "0.48" hooks: - id: check-manifest - additional_dependencies: ['h5py', 'wheel', 'future', 'numpy', 'pandas', + additional_dependencies: ['h5py', 'wheel', 'future', 'numpy<2.0.0', 'pandas', 'python-dateutil', 'pytz', 'pyarrow', 'chardet', 'fastavro', 'python-snappy', 'charset-normalizer', 'psutil', 'scipy', 'requests', 'networkx','typing-extensions', 'HLL', 'datasketches', 'boto3'] diff --git a/dataprofiler/__init__.py b/dataprofiler/__init__.py index 2e89d3e2..5f218bd8 100644 --- a/dataprofiler/__init__.py +++ b/dataprofiler/__init__.py @@ -20,22 +20,6 @@ from .validators.base_validators import Validator from .version import __version__ -try: - import snappy -except ImportError: - import warnings - - warnings.warn( - "Snappy must be installed to use parquet/avro datasets." - "\n\n" - "For macOS use Homebrew:\n" - "\t`brew install snappy`" - "\n\n" - "For linux use apt-get:\n`" - "\tsudo apt-get -y install libsnappy-dev`\n", - ImportWarning, - ) - def set_seed(seed=None): # also check it's an integer diff --git a/dataprofiler/tests/test_data_profiler.py b/dataprofiler/tests/test_data_profiler.py index ef7664ce..9ebdfa03 100644 --- a/dataprofiler/tests/test_data_profiler.py +++ b/dataprofiler/tests/test_data_profiler.py @@ -56,46 +56,6 @@ def test_data_profiling(self): self.assertIsNotNone(profile.profile) self.assertIsNotNone(profile.report()) - def test_no_snappy(self): - import importlib - import sys - import types - - orig_import = __import__ - # necessary for any wrapper around the library to test if snappy caught - # as an issue - - def reload_data_profiler(): - """Recursively reload modules.""" - sys_modules = sys.modules.copy() - for module_name, module in sys_modules.items(): - # Only reload top level of the dataprofiler - if "dataprofiler" in module_name and len(module_name.split(".")) < 3: - if isinstance(module, types.ModuleType): - importlib.reload(module) - - def import_mock(name, *args, **kwargs): - if name == "snappy": - raise ImportError("test") - return orig_import(name, *args, **kwargs) - - with mock.patch("builtins.__import__", side_effect=import_mock): - with self.assertWarns(ImportWarning) as w: - import dataprofiler - - reload_data_profiler() - - self.assertEqual( - str(w.warning), - "Snappy must be installed to use parquet/avro datasets." - "\n\n" - "For macOS use Homebrew:\n" - "\t`brew install snappy`" - "\n\n" - "For linux use apt-get:\n`" - "\tsudo apt-get -y install libsnappy-dev`\n", - ) - def test_no_tensorflow(self): import sys diff --git a/requirements-dev.txt b/requirements-dev.txt index f6343283..8c7c7868 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -check-manifest>=0.48 +check-manifest>=0.50 black>=24.3.0 isort==5.12.0 pre-commit==2.19.0 diff --git a/requirements-ml.txt b/requirements-ml.txt index 6da08b31..31f9ca63 100644 --- a/requirements-ml.txt +++ b/requirements-ml.txt @@ -1,5 +1,5 @@ scikit-learn>=0.23.2 -keras>=3.0.0 +keras<=3.4.0 rapidfuzz>=2.6.1 tensorflow>=2.16.0; sys.platform != 'darwin' tensorflow>=2.16.0; sys_platform == 'darwin' and platform_machine != 'arm64' diff --git a/requirements.txt b/requirements.txt index 152b5eb3..3ccc4c6f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,21 @@ h5py>=2.10.0 wheel>=0.33.1 -numpy>=1.22.0 +numpy<2.0.0 pandas>=1.1.2 python-dateutil>=2.7.5 pytz>=2020.1 pyarrow>=1.0.1 chardet>=3.0.4 fastavro>=1.1.0 -python-snappy>=0.5.4 +python-snappy>=0.7.1 charset-normalizer>=1.3.6 psutil>=4.0.0 scipy>=1.10.0 -requests>=2.28.1 +requests==2.32.* networkx>=2.5.1 typing-extensions>=3.10.0.2 HLL>=2.0.3 datasketches>=4.1.0 packaging>=23.0 boto3>=1.28.61 +# adding comment to trigger mend check diff --git a/tox.ini b/tox.ini index 21d418e9..4ee6081b 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py39, py310, 311, pypi-description, manifest, precom +envlist = py39, py310, py311, pypi-description, manifest, precom [testenv]