Skip to content

Commit e9e7897

Browse files
armaan-dhillonJGSweetsgliptaktaylorfturnerSchadtJ
authored
Staging release 0.13.0 (#1165)
* refactor: Upgrade the models to use keras 3.0 (#1138) * Replace snappy with cramjam (#1091) * add downloads tile (#1085) * Replace snappy with cramjam * Delete test_no_snappy --------- Co-authored-by: Taylor Turner <[email protected]> * pre-commit fix (#1122) * Bug fix for float precision calculation using categorical data with trailing zeros. (#1125) * Revert "Bug fix for float precision calculation using categorical data with t…" (#1133) This reverts commit d3159bd. * refactor: move layers outside of class * refactor: update model to keras 3.0 * fix: manifest * fix: bugs in compile and train * fix: bug in load_from_library * fix: bugs in CharCNN * refactor: loading tf model labeler * fix: bug in data_labeler identification * fix: update model to use proper softmax layer names * fix: formatting * fix: remove unused line * refactor: drop support for 3.8 * fix: comments * fix: comment --------- Co-authored-by: Gábor Lipták <[email protected]> Co-authored-by: Taylor Turner <[email protected]> Co-authored-by: James Schadt <[email protected]> * Fix Tox (#1143) * tox new * update * update * update * update * update * update * update * update tox.ini * update * update * remove docs * empty retrigger * update (#1146) * Add Python 3.11 to GHA (#1090) * add downloads tile (#1085) * Add Python 3.11 to GHA * Replace snappy with cramjam (#1091) * add downloads tile (#1085) * Replace snappy with cramjam * Delete test_no_snappy --------- Co-authored-by: Taylor Turner <[email protected]> * Update dask modules * Install dask dataframe * Update dask modules in precommit * Correct copy/paste error * Try again to clear Unicode * Rolled back pre-commit dask version * Add py311 to tox * Bump dask to 2024.4.1 * Bump python-snappy 0.7.1 * Rewrite labeler test * Correct isort * Satisfy black * And flake8 * Synced with requirements --------- Co-authored-by: Taylor Turner <[email protected]> * [Vuln Fix]: Resolve mend vulnerabilities related to requests. (#1162) * resolved check-manifest issue * updating keras version pin to <=3.4.0 * adding comment in requirements.txt to trigger mend check --------- Co-authored-by: Armaan <[email protected]> --------- Co-authored-by: JGSweets <[email protected]> Co-authored-by: Gábor Lipták <[email protected]> Co-authored-by: Taylor Turner <[email protected]> Co-authored-by: James Schadt <[email protected]> Co-authored-by: Michael Davis <[email protected]>
1 parent 4545841 commit e9e7897

7 files changed

+11
-66
lines changed

.pre-commit-config.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ repos:
4848
# requirements.txt
4949
h5py>=2.10.0,
5050
wheel>=0.33.1,
51-
numpy>=1.22.0,
51+
numpy<2.0.0,
5252
pandas>=1.1.2,
5353
python-dateutil>=2.7.5,
5454
pytz>=2020.1,
5555
pyarrow>=1.0.1,
5656
chardet>=3.0.4,
5757
fastavro>=1.0.0.post1,
58-
python-snappy>=0.5.4,
58+
python-snappy>=0.7.1,
5959
charset-normalizer>=1.3.6,
6060
psutil>=4.0.0,
6161
scipy>=1.4.1,
@@ -80,7 +80,7 @@ repos:
8080

8181
# requirements-ml.txt
8282
scikit-learn>=0.23.2,
83-
'keras>=2.4.3,<3.0.0',
83+
'keras>=2.4.3,<=3.4.0',
8484
rapidfuzz>=2.6.1,
8585
"tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin'",
8686
"tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64'",
@@ -108,7 +108,7 @@ repos:
108108
rev: "0.48"
109109
hooks:
110110
- id: check-manifest
111-
additional_dependencies: ['h5py', 'wheel', 'future', 'numpy', 'pandas',
111+
additional_dependencies: ['h5py', 'wheel', 'future', 'numpy<2.0.0', 'pandas',
112112
'python-dateutil', 'pytz', 'pyarrow', 'chardet', 'fastavro',
113113
'python-snappy', 'charset-normalizer', 'psutil', 'scipy', 'requests',
114114
'networkx','typing-extensions', 'HLL', 'datasketches', 'boto3']

dataprofiler/__init__.py

-16
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,6 @@
2020
from .validators.base_validators import Validator
2121
from .version import __version__
2222

23-
try:
24-
import snappy
25-
except ImportError:
26-
import warnings
27-
28-
warnings.warn(
29-
"Snappy must be installed to use parquet/avro datasets."
30-
"\n\n"
31-
"For macOS use Homebrew:\n"
32-
"\t`brew install snappy`"
33-
"\n\n"
34-
"For linux use apt-get:\n`"
35-
"\tsudo apt-get -y install libsnappy-dev`\n",
36-
ImportWarning,
37-
)
38-
3923

4024
def set_seed(seed=None):
4125
# also check it's an integer

dataprofiler/tests/test_data_profiler.py

-40
Original file line numberDiff line numberDiff line change
@@ -56,46 +56,6 @@ def test_data_profiling(self):
5656
self.assertIsNotNone(profile.profile)
5757
self.assertIsNotNone(profile.report())
5858

59-
def test_no_snappy(self):
60-
import importlib
61-
import sys
62-
import types
63-
64-
orig_import = __import__
65-
# necessary for any wrapper around the library to test if snappy caught
66-
# as an issue
67-
68-
def reload_data_profiler():
69-
"""Recursively reload modules."""
70-
sys_modules = sys.modules.copy()
71-
for module_name, module in sys_modules.items():
72-
# Only reload top level of the dataprofiler
73-
if "dataprofiler" in module_name and len(module_name.split(".")) < 3:
74-
if isinstance(module, types.ModuleType):
75-
importlib.reload(module)
76-
77-
def import_mock(name, *args, **kwargs):
78-
if name == "snappy":
79-
raise ImportError("test")
80-
return orig_import(name, *args, **kwargs)
81-
82-
with mock.patch("builtins.__import__", side_effect=import_mock):
83-
with self.assertWarns(ImportWarning) as w:
84-
import dataprofiler
85-
86-
reload_data_profiler()
87-
88-
self.assertEqual(
89-
str(w.warning),
90-
"Snappy must be installed to use parquet/avro datasets."
91-
"\n\n"
92-
"For macOS use Homebrew:\n"
93-
"\t`brew install snappy`"
94-
"\n\n"
95-
"For linux use apt-get:\n`"
96-
"\tsudo apt-get -y install libsnappy-dev`\n",
97-
)
98-
9959
def test_no_tensorflow(self):
10060
import sys
10161

requirements-dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
check-manifest>=0.48
1+
check-manifest>=0.50
22
black>=24.3.0
33
isort==5.12.0
44
pre-commit==2.19.0

requirements-ml.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
scikit-learn>=0.23.2
2-
keras>=3.0.0
2+
keras<=3.4.0
33
rapidfuzz>=2.6.1
44
tensorflow>=2.16.0; sys.platform != 'darwin'
55
tensorflow>=2.16.0; sys_platform == 'darwin' and platform_machine != 'arm64'

requirements.txt

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
h5py>=2.10.0
22
wheel>=0.33.1
3-
numpy>=1.22.0
3+
numpy<2.0.0
44
pandas>=1.1.2
55
python-dateutil>=2.7.5
66
pytz>=2020.1
77
pyarrow>=1.0.1
88
chardet>=3.0.4
99
fastavro>=1.1.0
10-
python-snappy>=0.5.4
10+
python-snappy>=0.7.1
1111
charset-normalizer>=1.3.6
1212
psutil>=4.0.0
1313
scipy>=1.10.0
14-
requests>=2.28.1
14+
requests==2.32.*
1515
networkx>=2.5.1
1616
typing-extensions>=3.10.0.2
1717
HLL>=2.0.3
1818
datasketches>=4.1.0
1919
packaging>=23.0
2020
boto3>=1.28.61
21+
# adding comment to trigger mend check

tox.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[tox]
2-
envlist = py39, py310, 311, pypi-description, manifest, precom
2+
envlist = py39, py310, py311, pypi-description, manifest, precom
33

44

55
[testenv]

0 commit comments

Comments
 (0)