Bumping version to 0.0b19

aws · Jul 25, 2019 · 9ffaf74 · 9ffaf74
1 parent 9377338
commit 9ffaf74
Show file tree

Hide file tree

Showing 12 changed files with 49 additions and 30 deletions.
diff --git a/awswrangler/__version__.py b/awswrangler/__version__.py
@@ -1,4 +1,4 @@
 __title__ = "awswrangler"
 __description__ = "Utility belt to handle data on AWS."
-__version__ = "0.0b18"
+__version__ = "0.0b19"
 __license__ = "Apache License 2.0"
diff --git a/awswrangler/pandas.py b/awswrangler/pandas.py
@@ -154,6 +154,22 @@ def _read_csv_iterator(
         logger.debug(f"total_size: {total_size}")
         if total_size <= 0:
             raise EmptyS3Object(metadata)
+        elif total_size <= max_result_size:
+            yield Pandas._read_csv_once(
+                client_s3=client_s3,
+                bucket_name=bucket_name,
+                key_path=key_path,
+                header=header,
+                names=names,
+                dtype=dtype,
+                sep=sep,
+                lineterminator=lineterminator,
+                quotechar=quotechar,
+                quoting=quoting,
+                escapechar=escapechar,
+                parse_dates=parse_dates,
+                infer_datetime_format=infer_datetime_format,
+                encoding=encoding)
         else:
             bounders = calculate_bounders(num_items=total_size,
                                           max_size=max_result_size)

diff --git a/awswrangler/session.py b/awswrangler/session.py
@@ -227,7 +227,6 @@ class SessionPrimitives:
     It is required to "share" the session attributes to other processes.
     That must be "pickable"!
     """
-
     def __init__(
             self,
             profile_name=None,

diff --git a/building/Dockerfile b/building/Dockerfile
@@ -1,12 +1,12 @@
-FROM lambci/lambda:build-python3.6
+FROM lambci/lambda:build-python3.7
 
 RUN pip install --upgrade pip
 
 ADD requirements.txt /root/
-RUN pip install -r /root/requirements.txt
+RUN pip install --upgrade -r /root/requirements.txt
 RUN rm -rf /root/requirements.txt
 ADD requirements-dev.txt /root/
-RUN pip install -r /root/requirements-dev.txt
+RUN pip install --upgrade -r /root/requirements-dev.txt
 RUN rm -rf /root/requirements-dev.txt
 
 ENTRYPOINT ["/bin/sh"]
diff --git a/building/build-image.sh b/building/build-image.sh
@@ -2,6 +2,4 @@
 
 cp ../requirements.txt .
 cp ../requirements-dev.txt .
-pip install -r requirements.txt
-pip install -r requirements-dev.txt
 docker build -t awswrangler-building .
diff --git a/building/build-lambda-layer.sh b/building/build-lambda-layer.sh
@@ -6,7 +6,7 @@ cd ~
 # Clone desired Arrow version
 rm -rf arrow dist pyarrow*
 git clone \
-    --branch apache-arrow-0.14.0 \
+    --branch apache-arrow-0.14.1 \
     --single-branch \
     https://github.com/apache/arrow.git
 
@@ -18,7 +18,7 @@ yum install -y \
     flex \
     autoconf \
     python36-devel
-pip install six numpy pandas cython pytest cmake wheel
+pip install --upgrade six numpy pandas cython pytest cmake wheel
 
 # Build Arrow
 export ARROW_HOME=$(pwd)/dist
@@ -55,7 +55,7 @@ cp dist/pyarrow-*.whl ~
 popd
 
 # Extracting files
-pip install pyarrow-*whl  -t pyarrow_files
+pip install pyarrow-*whl -t pyarrow_files
 
 # Go back to AWSWRANGLER directory
 cd /aws-data-wrangler/

diff --git a/install-dev.sh b/install-dev.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
 
 pip install --upgrade pip
-pip install -r requirements.txt
-pip install -r requirements-dev.txt
+pip install --upgrade -r requirements.txt
+pip install --upgrade -r requirements-dev.txt
 cd testing
 ./build-image.sh
 cd ../building

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,7 +1,7 @@
-yapf>=0.27.0
-flake8>=3.7.7
-pytest>=4.3.1
-cfn-lint>=0.22.0
+yapf>=0.28.0
+flake8>=3.7.8
+pytest>=5.0.1
+cfn-lint>=0.22.4
 twine>=1.13.0
 pyspark>=2.4.3
 wheel>=0.33.4
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
-boto3>=1.9.164
-s3fs>=0.2.2
-pandas>=0.24.2
-pyarrow>=0.14.0
+boto3>=1.9.196
+pandas>=0.25.0
+s3fs>=0.3.1
+pyarrow>=0.14.1
 tenacity>=5.0.4
 pg8000>=1.13.2
diff --git a/setup.py b/setup.py
@@ -22,10 +22,10 @@
                            exclude=["tests"]),
     python_requires=">=3.6",
     install_requires=[
-        "pyarrow>=0.14.0",
-        "pandas>=0.24.2",
-        "boto3>=1.9.130",
-        "s3fs>=0.2.1",
+        "pyarrow>=0.14.1",
+        "pandas>=0.25.0",
+        "boto3>=1.9.196",
+        "s3fs>=0.3.1",
         "tenacity>=5.0.4",
         "pg8000>=1.13.2",
     ],

diff --git a/testing/build-image.sh b/testing/build-image.sh
@@ -2,6 +2,4 @@
 
 cp ../requirements.txt .
 cp ../requirements-dev.txt .
-pip install -r requirements.txt
-pip install -r requirements-dev.txt
 docker build -t awswrangler-testing .
diff --git a/testing/test_awswrangler/test_pandas.py b/testing/test_awswrangler/test_pandas.py
@@ -183,9 +183,15 @@ def test_to_s3(
     assert factor * len(dataframe.index) == len(dataframe2.index)
 
 
-@pytest.mark.parametrize("sample, row_num", [("data_samples/micro.csv", 30),
-                                             ("data_samples/small.csv", 100)])
-def test_read_sql_athena_iterator(session, bucket, database, sample, row_num):
+@pytest.mark.parametrize("sample, row_num, max_result_size",
+                         [("data_samples/micro.csv", 30, 100),
+                          ("data_samples/small.csv", 100, 100),
+                          ("data_samples/micro.csv", 30, 500),
+                          ("data_samples/small.csv", 100, 500),
+                          ("data_samples/micro.csv", 30, 3000),
+                          ("data_samples/small.csv", 100, 3000)])
+def test_read_sql_athena_iterator(session, bucket, database, sample, row_num,
+                                  max_result_size):
     dataframe_sample = pandas.read_csv(sample)
     path = f"s3://{bucket}/test/"
     session.pandas.to_parquet(dataframe=dataframe_sample,
@@ -196,7 +202,9 @@ def test_read_sql_athena_iterator(session, bucket, database, sample, row_num):
     total_count = 0
     for counter in range(10):
         dataframe_iter = session.pandas.read_sql_athena(
-            sql="select * from test", database=database, max_result_size=200)
+            sql="select * from test",
+            database=database,
+            max_result_size=max_result_size)
         total_count = 0
         for dataframe in dataframe_iter:
             total_count += len(dataframe.index)