Skip to content

Commit

Permalink
Bumping version to 0.0b19
Browse files Browse the repository at this point in the history
  • Loading branch information
igorborgest committed Jul 25, 2019
1 parent 9377338 commit 9ffaf74
Show file tree
Hide file tree
Showing 12 changed files with 49 additions and 30 deletions.
2 changes: 1 addition & 1 deletion awswrangler/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__title__ = "awswrangler"
__description__ = "Utility belt to handle data on AWS."
__version__ = "0.0b18"
__version__ = "0.0b19"
__license__ = "Apache License 2.0"
16 changes: 16 additions & 0 deletions awswrangler/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,22 @@ def _read_csv_iterator(
logger.debug(f"total_size: {total_size}")
if total_size <= 0:
raise EmptyS3Object(metadata)
elif total_size <= max_result_size:
yield Pandas._read_csv_once(
client_s3=client_s3,
bucket_name=bucket_name,
key_path=key_path,
header=header,
names=names,
dtype=dtype,
sep=sep,
lineterminator=lineterminator,
quotechar=quotechar,
quoting=quoting,
escapechar=escapechar,
parse_dates=parse_dates,
infer_datetime_format=infer_datetime_format,
encoding=encoding)
else:
bounders = calculate_bounders(num_items=total_size,
max_size=max_result_size)
Expand Down
1 change: 0 additions & 1 deletion awswrangler/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ class SessionPrimitives:
It is required to "share" the session attributes to other processes.
That must be "pickable"!
"""

def __init__(
self,
profile_name=None,
Expand Down
6 changes: 3 additions & 3 deletions building/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
FROM lambci/lambda:build-python3.6
FROM lambci/lambda:build-python3.7

RUN pip install --upgrade pip

ADD requirements.txt /root/
RUN pip install -r /root/requirements.txt
RUN pip install --upgrade -r /root/requirements.txt
RUN rm -rf /root/requirements.txt
ADD requirements-dev.txt /root/
RUN pip install -r /root/requirements-dev.txt
RUN pip install --upgrade -r /root/requirements-dev.txt
RUN rm -rf /root/requirements-dev.txt

ENTRYPOINT ["/bin/sh"]
2 changes: 0 additions & 2 deletions building/build-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,4 @@

cp ../requirements.txt .
cp ../requirements-dev.txt .
pip install -r requirements.txt
pip install -r requirements-dev.txt
docker build -t awswrangler-building .
6 changes: 3 additions & 3 deletions building/build-lambda-layer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cd ~
# Clone desired Arrow version
rm -rf arrow dist pyarrow*
git clone \
--branch apache-arrow-0.14.0 \
--branch apache-arrow-0.14.1 \
--single-branch \
https://github.com/apache/arrow.git

Expand All @@ -18,7 +18,7 @@ yum install -y \
flex \
autoconf \
python36-devel
pip install six numpy pandas cython pytest cmake wheel
pip install --upgrade six numpy pandas cython pytest cmake wheel

# Build Arrow
export ARROW_HOME=$(pwd)/dist
Expand Down Expand Up @@ -55,7 +55,7 @@ cp dist/pyarrow-*.whl ~
popd

# Extracting files
pip install pyarrow-*whl -t pyarrow_files
pip install pyarrow-*whl -t pyarrow_files

# Go back to AWSWRANGLER directory
cd /aws-data-wrangler/
Expand Down
4 changes: 2 additions & 2 deletions install-dev.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash

pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-dev.txt
pip install --upgrade -r requirements.txt
pip install --upgrade -r requirements-dev.txt
cd testing
./build-image.sh
cd ../building
Expand Down
8 changes: 4 additions & 4 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
yapf>=0.27.0
flake8>=3.7.7
pytest>=4.3.1
cfn-lint>=0.22.0
yapf>=0.28.0
flake8>=3.7.8
pytest>=5.0.1
cfn-lint>=0.22.4
twine>=1.13.0
pyspark>=2.4.3
wheel>=0.33.4
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
boto3>=1.9.164
s3fs>=0.2.2
pandas>=0.24.2
pyarrow>=0.14.0
boto3>=1.9.196
pandas>=0.25.0
s3fs>=0.3.1
pyarrow>=0.14.1
tenacity>=5.0.4
pg8000>=1.13.2
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
exclude=["tests"]),
python_requires=">=3.6",
install_requires=[
"pyarrow>=0.14.0",
"pandas>=0.24.2",
"boto3>=1.9.130",
"s3fs>=0.2.1",
"pyarrow>=0.14.1",
"pandas>=0.25.0",
"boto3>=1.9.196",
"s3fs>=0.3.1",
"tenacity>=5.0.4",
"pg8000>=1.13.2",
],
Expand Down
2 changes: 0 additions & 2 deletions testing/build-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,4 @@

cp ../requirements.txt .
cp ../requirements-dev.txt .
pip install -r requirements.txt
pip install -r requirements-dev.txt
docker build -t awswrangler-testing .
16 changes: 12 additions & 4 deletions testing/test_awswrangler/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,15 @@ def test_to_s3(
assert factor * len(dataframe.index) == len(dataframe2.index)


@pytest.mark.parametrize("sample, row_num", [("data_samples/micro.csv", 30),
("data_samples/small.csv", 100)])
def test_read_sql_athena_iterator(session, bucket, database, sample, row_num):
@pytest.mark.parametrize("sample, row_num, max_result_size",
[("data_samples/micro.csv", 30, 100),
("data_samples/small.csv", 100, 100),
("data_samples/micro.csv", 30, 500),
("data_samples/small.csv", 100, 500),
("data_samples/micro.csv", 30, 3000),
("data_samples/small.csv", 100, 3000)])
def test_read_sql_athena_iterator(session, bucket, database, sample, row_num,
max_result_size):
dataframe_sample = pandas.read_csv(sample)
path = f"s3://{bucket}/test/"
session.pandas.to_parquet(dataframe=dataframe_sample,
Expand All @@ -196,7 +202,9 @@ def test_read_sql_athena_iterator(session, bucket, database, sample, row_num):
total_count = 0
for counter in range(10):
dataframe_iter = session.pandas.read_sql_athena(
sql="select * from test", database=database, max_result_size=200)
sql="select * from test",
database=database,
max_result_size=max_result_size)
total_count = 0
for dataframe in dataframe_iter:
total_count += len(dataframe.index)
Expand Down

0 comments on commit 9ffaf74

Please sign in to comment.