Skip to content

Commit

Permalink
fix up directory init/path and add unit tests with coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
brianthomas committed Mar 24, 2017
1 parent c46d201 commit 3663727
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 191 deletions.
Empty file added apod/__init__.py
Empty file.
208 changes: 33 additions & 175 deletions apod/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@
@author=bathomas @[email protected]
'''

from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from datetime import datetime
from flask import request, jsonify, render_template, Flask
from flask.ext.cors import CORS
import json
import requests
from flask_cors import CORS, cross_origin
from utility import parse_apod, get_concepts
import logging

app = Flask(__name__)
CORS(app)

LOG = logging.getLogger(__name__)
logging.basicConfig(level=logging.WARN)
logging.basicConfig(level=logging.DEBUG)
#LOG.setLevel(logging.DEBUG)

# this should reflect both this service and the backing
Expand All @@ -30,8 +28,6 @@
ALLOWED_APOD_FIELDS = ['concept_tags', 'date', 'hd']
ALCHEMY_API_KEY = None

# location of backing APOD service
BASE = 'http://apod.nasa.gov/apod/'

try:
with open('alchemy_api.key', 'r') as f:
Expand All @@ -50,79 +46,54 @@ def _abort(code, msg, usage=True):

return response

def _get_apod_chars(dt):

media_type = 'image'
date_str = dt.strftime('%y%m%d')
apod_url = '%sap%s.html' % (BASE, date_str)
LOG.debug("OPENING URL:"+apod_url)
soup = BeautifulSoup(requests.get(apod_url).text, "html.parser")
LOG.debug("getting the data url")
data = None
hd_data = None
if soup.img:
# it is an image, so get both the low- and high-resolution data
data = BASE + soup.img['src']
hd_data = data

LOG.debug("getting the link for hd_data")
for link in soup.find_all('a', href=True):
if link['href'] and link['href'].startswith("image"):
hd_data = BASE + link['href']
break
else:
# its a video
media_type = 'video'
data = soup.iframe['src']

return _explanation(soup), _title(soup), _copyright(soup), data, hd_data, media_type


def _apod_characteristics(dt, use_default_today_date=False):
"""Accepts a date in '%Y-%m-%d' format. Returns the URL of the APOD image
of that day, noting that """
def _usage(joinstr="', '", prestr="'"):
return "Allowed request fields for "+APOD_METHOD_NAME+" method are "+prestr+joinstr.join(ALLOWED_APOD_FIELDS)

LOG.debug("apod chars called")
def _validate (data):
LOG.debug("_validate(data) called")
for key in data:
if key not in ALLOWED_APOD_FIELDS:
return False
return True

def _validate_date (dt):

try:
return _get_apod_chars(dt)
LOG.debug("_validate_date(dt) called")
today = datetime.today()
begin = datetime (1995, 6, 16) # first APOD image date

except Exception as ex:
# validate input
if (dt > today) or (dt < begin):

# handle edge case where the service local time
# miss-matches with 'todays date' of the underlying APOD
# service (can happen because they are deployed in different
# timezones). Use the fallback of prior day's date
today_str = today.strftime('%b %d, %Y')
begin_str = begin.strftime('%b %d, %Y')

raise ValueError('Date must be between %s and %s.' % (begin_str, today_str))

if use_default_today_date:
# try to get the day before
dt = dt - timedelta(days=1)
return _get_apod_chars(dt)
else:
# pass exception up the call stack
raise Exception(ex)

def _apod_handler(dt, use_concept_tags=False, use_default_today_date=False):
"""Accepts a parameter dictionary. Returns the response object to be
served through the API."""
try:
d = {}
explanation, title, copyright, url, hdurl, media_type = _apod_characteristics(dt, use_default_today_date)
explanation, title, copyrght, url, hdurl, media_type = parse_apod(dt, use_default_today_date)
LOG.debug("managed to get apod characteristics")

d['explanation'] = explanation
d['title'] = title
d['url'] = url
if hdurl:
d['hdurl'] = hdurl
d['media_type'] = media_type
if copyright:
d['copyright'] = copyright

if copyrght:
d['copyright'] = copyrght

if use_concept_tags:
if ALCHEMY_API_KEY == None:
d['concepts'] = "concept_tags functionality turned off in current service"
else:
d['concepts'] = _concepts(explanation, ALCHEMY_API_KEY)
d['concepts'] = get_concepts(request, explanation, ALCHEMY_API_KEY)

return d

except Exception as e:
Expand All @@ -131,121 +102,7 @@ def _apod_handler(dt, use_concept_tags=False, use_default_today_date=False):
# return code 500 here
return _abort(500, "Internal Service Error", usage=False)

def _concepts(text, apikey):
"""Returns the concepts associated with the text, interleaved with integer
keys indicating the index."""
cbase = 'http://access.alchemyapi.com/calls/text/TextGetRankedConcepts'

params = dict(
outputMode='json',
apikey=apikey,
text=text
)

try:

LOG.debug("Getting response")
response = json.loads(request.get(cbase, fields=params))
clist = [concept['text'] for concept in response['concepts']]
return {k: v for k, v in zip(range(len(clist)), clist)}

except Exception as ex:
raise ValueError(ex)


def _title(soup):
"""Accepts a BeautifulSoup object for the APOD HTML page and returns the
APOD image title. Highly idiosyncratic with adaptations for different
HTML structures that appear over time."""
LOG.debug("getting the title")
try:
# Handler for later APOD entries
center_selection = soup.find_all('center')[1]
bold_selection = center_selection.find_all('b')[0]
return bold_selection.text.strip(' ')
except Exception:
# Handler for early APOD entries
text = soup.title.text.split(' - ')[-1]
return text.strip()
else:
raise ValueError('Unsupported schema for given date.')

def _copyright(soup):
"""Accepts a BeautifulSoup object for the APOD HTML page and returns the
APOD image copyright. Highly idiosyncratic with adaptations for different
HTML structures that appear over time."""
LOG.debug("getting the copyright")
try:
# Handler for later APOD entries

# There's no uniform handling of copyright (sigh). Well, we just have to
# try every stinking text block we find...

for element in soup.findAll('b', text=True):
#LOG.debug("TEXT: "+element.text)
# search text for explicit match
if "Copyright" in element.text:
LOG.debug("Found Copyright text:"+str(element.text))
LOG.debug(" element:"+str(element))
# pull the copyright from the link text
link_selection = element.parent.find_all('a')[0]
if "Copyright" in link_selection.text:
# hmm. older style, try to grab from 2nd link
LOG.debug("trying olderstyle copyright grab")
link_selection = element.parent.find_all('a')[1]
# return
return link_selection.text.strip(' ')

except Exception as ex:
LOG.error(str(ex))
raise ValueError('Unsupported schema for given date.')

# NO stated copyright, so we return None
return None

def _explanation(soup):
"""Accepts a BeautifulSoup object for the APOD HTML page and returns the
APOD image explanation. Highly idiosyncratic."""
# Handler for later APOD entries
LOG.debug("getting the explanation")
s = soup.find_all('p')[2].text
s = s.replace('\n', ' ')
s = s.replace(' ', ' ')
s = s.strip(' ').strip('Explanation: ')
s = s.split(' Tomorrow\'s picture')[0]
s = s.split('digg_url')[0]
s = s.strip(' ')
if s == '':
# Handler for earlier APOD entries
texts = [x.strip() for x in soup.text.split('\n')]
begin_idx = texts.index('Explanation:') + 1
idx = texts[begin_idx:].index('')
s = (' ').join(texts[begin_idx:begin_idx + idx])
return s

def _usage(joinstr="', '", prestr="'"):
return "Allowed request fields for "+APOD_METHOD_NAME+" method are "+prestr+joinstr.join(ALLOWED_APOD_FIELDS)

def _validate (data):
for key in data:
if key not in ALLOWED_APOD_FIELDS:
return False
return True

def _validate_date (dt):

today = datetime.today()
begin = datetime (1995, 6, 16) # first APOD image date

# validate input
if (dt > today) or (dt < begin):

today_str = today.strftime('%b %d, %Y')
begin_str = begin.strftime('%b %d, %Y')

raise ValueError('Date must be between %s and %s.' % (begin_str, today_str))


#
# Endpoints
#

Expand All @@ -259,6 +116,7 @@ def home():
@app.route('/'+SERVICE_VERSION+'/'+APOD_METHOD_NAME+'/', methods=['GET'])
def apod():

LOG.info("apod path called")
try:

# application/json GET method
Expand Down
15 changes: 0 additions & 15 deletions apod/tests/apod_test.py

This file was deleted.

Loading

0 comments on commit 3663727

Please sign in to comment.