Skip to content

Commit

Permalink
Fix the utility to grab S3 JSON files
Browse files Browse the repository at this point in the history
This additionally adds new test coverage and
works around a travis-ci/issues/7940 .
  • Loading branch information
Dexterp37 committed Sep 6, 2017
1 parent eacd8a3 commit 130b08e
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ install:
- pip install -e . --process-dependency-links
- pip install -r test_requirements.txt
script:
# The line below is to work around https://github.com/travis-ci/travis-ci/issues/7940
- export BOTO_CONFIG=/dev/null
- flake8 taar tests
- py.test --cov taar tests
after_success:
Expand Down
47 changes: 37 additions & 10 deletions taar/recommenders/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import boto3
import json
import logging
import os
from tempfile import gettempdir

import boto3
from botocore.exceptions import ClientError
import requests
import shutil
from botocore.exceptions import ClientError
from tempfile import gettempdir, NamedTemporaryFile


logger = logging.getLogger(__name__)


def fetch_json(uri):
Expand All @@ -24,21 +28,44 @@ def fetch_json(uri):
return r.json()


def get_s3_cache_filename(s3_bucket, s3_key):
return os.path.join(gettempdir(),
'_'.join([s3_bucket, s3_key]).replace('/', '_'))


def get_s3_json_content(s3_bucket, s3_key):
"""Download and parse a json file stored on AWS S3.
The file is downloaded and then cached for future use.
"""
local_filename = '_'.join([s3_bucket, s3_key]).replace('/', '_')
local_path = os.path.join(gettempdir(), local_filename)
local_path = get_s3_cache_filename(s3_bucket, s3_key)

if not os.path.exists(local_path):
with open(local_path, 'wb') as data:
# Use NamedTemporaryFile, so that the file gets removed.
with NamedTemporaryFile() as temp_file:
try:
s3 = boto3.client('s3')
s3.download_fileobj(s3_bucket, s3_key, data)
s3.download_fileobj(s3_bucket, s3_key, temp_file)
# Flush the file.
temp_file.flush()
except ClientError:
logger.exception("Failed to download from S3", extra={
"bucket": s3_bucket,
"key": s3_key})
return None

with open(local_path, 'r') as data:
return json.loads(data.read())
with open(local_path, 'wb') as data:
temp_file.seek(0)
shutil.copyfileobj(temp_file, data)

# It can happen to have corrupted files. Account for the
# sad reality of life.
try:
with open(local_path, 'r') as data:
return json.loads(data.read())
except ValueError:
# Remove the corrupted cache file.
logging.error("Removing corrupted S3 cache", extra={"cache_path": local_path})
os.remove(local_path)

return None
1 change: 1 addition & 0 deletions test_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pytest
pytest-cov
flake8
moto
responses
coveralls
31 changes: 31 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import boto3
import os
import taar.recommenders.utils as utils
from moto import mock_s3


@mock_s3
def test_get_non_existing():
bucket = 'test-bucket'
key = 'non-existing.json'

conn = boto3.resource('s3', region_name='us-west-2')
conn.create_bucket(Bucket=bucket)

assert utils.get_s3_json_content(bucket, key) is None
assert os.path.exists(utils.get_s3_cache_filename(bucket, key)) is False


@mock_s3
def test_get_corrupted():
bucket = 'test-bucket'
key = 'corrupted.json'

conn = boto3.resource('s3', region_name='us-west-2')
conn.create_bucket(Bucket=bucket)

# Write a corrupted file to the mocked S3.
conn.Object(bucket, key).put(Body='This is invalid JSON.')

assert utils.get_s3_json_content(bucket, key) is None
assert os.path.exists(utils.get_s3_cache_filename(bucket, key)) is False

0 comments on commit 130b08e

Please sign in to comment.