Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 110 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,111 @@
*.py[co]
local_config.py
python
# Mac specific files
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.DS_Store
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
logs/
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.lock
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
venv_*
local_config.py

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# emacs
*~
5 changes: 4 additions & 1 deletion referencesrv/parser/crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,10 @@ def reference(self, refstr, words, labels):
:return:
"""
ref_dict = {}
ref_dict['authors'] = self.originator_token.collect_tagged_tokens(words, labels)
try:
ref_dict['authors'] = self.originator_token.collect_tagged_tokens(words, labels)
except Exception as err:
raise Exception('Failed to generate tagged tokens: {0}'.format(err)) from err
if 'DOI' in labels or 'ARXIV' in labels or 'ASCL' in labels:
ref_dict.update(self.numeric_token.collect_id_tagged_tokens(words, labels))
if 'YEAR' in labels:
Expand Down
23 changes: 23 additions & 0 deletions referencesrv/tests/unittests/test_referencesrv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,29 @@ def test_03(self):
"year": "2020", "volume": "9",
"journal": "JHEP", "refstr": "Penington, G, 2020, JHEP, 9"}]})

def test_04(self):
""" test that an exception is properly caught """

r = self.client.post(path='/parse',
data=json.dumps({'reference': ["They are, I find, contained in a paper of some length in vol. vi of \" Taylor's Scientific Memoirs \", 1853, pp. 114--162."]}),
headers={'accept': 'application/json'})
self.assertEqual(json.loads(r.data), {"parsed": [], "rejected":["They are, I find, contained in a paper of some length in vol. vi of \" Taylor's Scientific Memoirs \", 1853, pp. 114--162."]})
self.assertEqual(r.status_code, 200)

def test_05(self):
""" test XML endpoint with refstring that will generate exception """

# the mock is for solr call
with mock.patch.object(self.current_app.client, 'get') as get_mock:
get_mock.return_value = mock_response = mock.Mock()
mock_response.status_code = 200
mock_response.text = json.dumps({u'responseHeader': {u'status': 0, u'QTime': 60, u'params': {}},
u'response': {u'start': 0, u'numFound': 0,
u'docs': []}})
payload = {"parsed_reference":[{"refplaintext": "They are, I find, contained in a paper of some length in vol. vi of \" Taylor\'s Scientific Memoirs \", 1853, pp. 114--162."}]}
r = self.client.post(path='/xml',data=json.dumps(payload), headers={'accept': 'application/json'})
self.assertEqual(json.loads(r.data),{'resolved': [{'refstring': 'They are, I find, contained in a paper of some length in vol. vi of " Taylor\'s Scientific Memoirs ", 1853, pp. 114--162.', 'score': '0.0', 'bibcode': '...................', 'scix_id': '...................', 'comment': 'Exception: Failed to generate tagged tokens: list index out of range'}]})
self.assertEqual(r.status_code, 200)

if __name__ == "__main__":
unittest.main()
19 changes: 17 additions & 2 deletions referencesrv/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def xml_resolve(parsed_reference, returned_format):
try:
resolved = str(solve_reference(Hypotheses(parsed_reference)))
if resolved.startswith('0.0'):
raise "Not Resolved"
raise ValueError("Not Resolved")
reference_str = parsed_reference.get('refstr', None) or parsed_reference.get('refplaintext', None)
return format_resolved_reference(returned_format,
resolved=resolved,
Expand Down Expand Up @@ -264,6 +264,14 @@ def xml_resolve(parsed_reference, returned_format):
reference=reference_str,
id=parsed_reference.get('id', None),
comment=error_comment)
except Exception as e:
error_comment = 'Exception: {error}'.format(error=str(e))
current_app.logger.error(error_comment)
return format_resolved_reference(returned_format,
resolved=not_resolved,
reference=reference_str,
id=parsed_reference.get('id', None),
comment=error_comment)
else:
error_comment = 'ValueError: reference with no year and volume cannot be resolved.'
current_app.logger.error('Exception: {error}'.format(error=error_comment))
Expand Down Expand Up @@ -443,11 +451,18 @@ def parse_text():

# start_time = time.time()
results = []
rejected = []
for reference in references:
results.append(text_parser(reference))
try:
results.append(text_parser(reference))
except Exception as err:
rejected.append(reference)
current_app.logger.error('Failed to parse reference: {0} (reason: {1})'.format(reference, err))
# current_app.logger.debug("POST request with {num} reference(s) processed in {duration} ms".format(num=len(references), duration=(time.time() - start_time) * 1000))

response = {'parsed': results}
if rejected:
response = {'parsed': results, 'rejected': rejected}
if truncated_message:
response['message'] = truncated_message
return return_response(response, 200, 'application/json; charset=UTF8')
Loading