diff --git a/inspire_dojson/api.py b/inspire_dojson/api.py index ef642d4..cea95d9 100644 --- a/inspire_dojson/api.py +++ b/inspire_dojson/api.py @@ -50,10 +50,12 @@ try: unichr(0x100000) RE_INVALID_CHARS_FOR_XML = re.compile( - u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]+') + u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]+' + ) except ValueError: # pragma: no cover RE_INVALID_CHARS_FOR_XML = re.compile( - u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD]+') + u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD]+' + ) RECORD = E.record CONTROLFIELD = E.controlfield @@ -105,7 +107,9 @@ def record2marcxml_etree(record): elif schema_name == 'authors': marcjson = hepnames2marc.do(record) else: - raise NotSupportedError(u'JSON -> MARC rules missing for "{}"'.format(schema_name)) + raise NotSupportedError( + u'JSON -> MARC rules missing for "{}"'.format(schema_name) + ) record = RECORD() @@ -115,7 +119,9 @@ def record2marcxml_etree(record): value = force_single_element(values) if not isinstance(value, text_type): value = text_type(value) - record.append(CONTROLFIELD(_strip_invalid_chars_for_xml(value), {'tag': tag})) + record.append( + CONTROLFIELD(_strip_invalid_chars_for_xml(value), {'tag': tag}) + ) else: for value in force_list(values): datafield = DATAFIELD({'tag': tag, 'ind1': ind1, 'ind2': ind2}) @@ -123,7 +129,9 @@ def record2marcxml_etree(record): for el in force_list(els): if not isinstance(el, text_type): el = text_type(el) - datafield.append(SUBFIELD(_strip_invalid_chars_for_xml(el), {'code': code})) + datafield.append( + SUBFIELD(_strip_invalid_chars_for_xml(el), {'code': code}) + ) record.append(datafield) return record @@ -153,7 +161,9 @@ def cds_marcxml2record(marcxml): def _get_collections(marcjson): - collections = chain.from_iterable([force_list(el) for el in force_list(get_value(marcjson, '980__.a'))]) + collections = chain.from_iterable( + [force_list(el) for el in force_list(get_value(marcjson, '980__.a'))] + ) normalized_collections = [el.lower() for el in collections] return normalized_collections diff --git a/inspire_dojson/cds/model.py b/inspire_dojson/cds/model.py index 1fc4708..97e3da2 100644 --- a/inspire_dojson/cds/model.py +++ b/inspire_dojson/cds/model.py @@ -36,18 +36,24 @@ def add_control_number(record, blob): if '001' not in blob: return record - collections = (value.lower() for value in chain(force_list(get_value(blob, '980__.a', default=[])), - force_list(get_value(blob, '980__.c', default=[])))) + collections = ( + value.lower() + for value in chain( + force_list(get_value(blob, '980__.a', default=[])), + force_list(get_value(blob, '980__.c', default=[])), + ) + ) if 'hidden' in collections: - record.setdefault('595__', []).append({ - '9': 'CDS', - 'a': u'CDS-{}'.format(blob['001']) - }) + record.setdefault('595__', []).append( + {'9': 'CDS', 'a': u'CDS-{}'.format(blob['001'])} + ) else: - record.setdefault('035__', []).append({ - '9': 'CDS', - 'a': blob['001'], - }) + record.setdefault('035__', []).append( + { + '9': 'CDS', + 'a': blob['001'], + } + ) return record diff --git a/inspire_dojson/cds/rules.py b/inspire_dojson/cds/rules.py index aa5a94a..078778a 100644 --- a/inspire_dojson/cds/rules.py +++ b/inspire_dojson/cds/rules.py @@ -120,7 +120,7 @@ def escape_url(url): else: scheme = '' - url = quote_url(url[len(scheme):]) + url = quote_url(url[len(scheme) :]) return scheme + url @@ -135,8 +135,19 @@ def persistent_identifiers(self, key, value): @cds2hep_marc.over('035__', '^035..') @utils.for_each_value def external_sytem_identifiers(self, key, value): - ignored = {'cercer', 'inspire', 'xx', 'cern annual report', 'cmscms', 'wai01', 'spires'} - if any(val.lower() in ignored for val in chain(force_list(value.get('9')), force_list(value.get('a')))): + ignored = { + 'cercer', + 'inspire', + 'xx', + 'cern annual report', + 'cmscms', + 'wai01', + 'spires', + } + if any( + val.lower() in ignored + for val in chain(force_list(value.get('9')), force_list(value.get('a'))) + ): return if any(val.lower().endswith('cercer') for val in force_list(value.get('a'))): return @@ -151,7 +162,15 @@ def secondary_report_numbers(self, key, value): Also populates the ``500``, ``595`` and ``980`` MARC field through side effects. """ preliminary_results_prefixes = ['ATLAS-CONF-', 'CMS-PAS-', 'CMS-DP-', 'LHCB-CONF-'] - note_prefixes = ['ALICE-INT-', 'ATL-', 'ATLAS-CONF-', 'CMS-DP-', 'CMS-PAS-', 'LHCB-CONF-', 'LHCB-PUB-'] + note_prefixes = [ + 'ALICE-INT-', + 'ATL-', + 'ATLAS-CONF-', + 'CMS-DP-', + 'CMS-PAS-', + 'LHCB-CONF-', + 'LHCB-PUB-', + ] result_037 = self.get('037__', []) result_500 = self.get('500__', []) @@ -165,17 +184,21 @@ def secondary_report_numbers(self, key, value): if any(report.upper().startswith(prefix) for prefix in note_prefixes): result_980.append({'a': 'NOTE'}) - if any(report.upper().startswith(prefix) for prefix in preliminary_results_prefixes): + if any( + report.upper().startswith(prefix) for prefix in preliminary_results_prefixes + ): result_500.append({'9': 'CDS', 'a': 'Preliminary results'}) is_barcode = hidden_report.startswith('P0') or hidden_report.startswith('CM-P0') if not report.startswith('SIS-') and not is_barcode: - result_037.append({ - '9': source, - 'a': report, - 'c': value.get('c'), - 'z': hidden_report if source == 'CDS' else None, - }) + result_037.append( + { + '9': source, + 'a': report, + 'c': value.get('c'), + 'z': hidden_report if source == 'CDS' else None, + } + ) self['500__'] = result_500 self['595__'] = result_595 @@ -196,7 +219,9 @@ def languages(self, key, value): languages.append({'a': pycountry.languages.get(alpha_3=alpha_3).name}) except KeyError: with contextlib.suppress(KeyError): - languages.append({'a': pycountry.languages.get(bibliographic=alpha_3).name}) + languages.append( + {'a': pycountry.languages.get(bibliographic=alpha_3).name} + ) return languages @@ -262,7 +287,9 @@ def nonfirst_authors(self, key, value): field_700 = self.get('700__', []) field_701 = self.get('701__', []) - is_supervisor = any(el.lower().startswith('dir') for el in force_list(value.get('e', ''))) + is_supervisor = any( + el.lower().startswith('dir') for el in force_list(value.get('e', '')) + ) if is_supervisor: field_701.append(_converted_author(value)) else: @@ -346,7 +373,7 @@ def categories(self, key, value): result = { '2': 'INSPIRE', # XXX: will fail validation and be logged if invalid category - 'a': CATEGORIES.get(value.get('a'), value.get('a')) + 'a': CATEGORIES.get(value.get('a'), value.get('a')), } else: result = vanilla_dict(value) @@ -405,20 +432,28 @@ def urls(self, key, value): Also populate the ``FFT`` field through side effects. """ + def _is_preprint(value): return value.get('y', '').lower() == 'preprint' def _is_fulltext(value): - return value['u'].endswith('.pdf') and value['u'].startswith('http://cds.cern.ch') + return value['u'].endswith('.pdf') and value['u'].startswith( + 'http://cds.cern.ch' + ) def _is_local_copy(value): return 'local copy' in value.get('y', '') def _is_ignored_domain(value): - ignored_domains = ['http://cdsweb.cern.ch', 'http://cms.cern.ch', - 'http://cmsdoc.cern.ch', 'http://documents.cern.ch', - 'http://preprints.cern.ch', 'http://cds.cern.ch', - 'http://arxiv.org'] + ignored_domains = [ + 'http://cdsweb.cern.ch', + 'http://cms.cern.ch', + 'http://cmsdoc.cern.ch', + 'http://documents.cern.ch', + 'http://preprints.cern.ch', + 'http://cds.cern.ch', + 'http://arxiv.org', + ] return any(value['u'].startswith(domain) for domain in ignored_domains) field_8564 = self.get('8564_', []) @@ -431,26 +466,34 @@ def _is_ignored_domain(value): if _is_fulltext(value) and not _is_preprint(value): if _is_local_copy(value): - description = value.get('y', '').replace('local copy', 'on CERN Document Server') - field_8564.append({ - 'u': url, - 'y': description, - }) + description = value.get('y', '').replace( + 'local copy', 'on CERN Document Server' + ) + field_8564.append( + { + 'u': url, + 'y': description, + } + ) else: _, file_name = os.path.split(urllib.parse.urlparse(value['u']).path) _, extension = os.path.splitext(file_name) - field_FFT.append({ - 't': 'CDS', - 'a': url, - 'd': value.get('y', ''), - 'n': file_name, - 'f': extension, - }) + field_FFT.append( + { + 't': 'CDS', + 'a': url, + 'd': value.get('y', ''), + 'n': file_name, + 'f': extension, + } + ) elif not _is_ignored_domain(value): - field_8564.append({ - 'u': url, - 'y': value.get('y'), - }) + field_8564.append( + { + 'u': url, + 'y': value.get('y'), + } + ) self['FFT__'] = field_FFT return field_8564 diff --git a/inspire_dojson/common/rules.py b/inspire_dojson/common/rules.py index 22d67ac..a37e167 100644 --- a/inspire_dojson/common/rules.py +++ b/inspire_dojson/common/rules.py @@ -42,7 +42,11 @@ from inspire_dojson.hepnames.model import hepnames, hepnames2marc from inspire_dojson.institutions.model import institutions from inspire_dojson.journals.model import journals -from inspire_dojson.utils import force_single_element, get_recid_from_ref, get_record_ref +from inspire_dojson.utils import ( + force_single_element, + get_recid_from_ref, + get_record_ref, +) IS_INTERNAL_UID = re.compile(r'^(inspire:uid:)?\d{5}$') IS_ORCID = re.compile(r'^(orcid:)?\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$') @@ -580,6 +584,7 @@ def control_number(endpoint): Also populates the ``self`` key through side effects. """ + def _control_number(self, key, value): self['self'] = get_record_ref(int(value), endpoint) return int(value) @@ -623,6 +628,7 @@ def legacy_version2marc(self, key, value): @hepnames.over('acquisition_source', '^541..') def acquisition_source(self, key, value): """Populate the ``acquisition_source`` key.""" + def _get_datetime(value): d_value = force_single_element(value.get('d', '')) if d_value: @@ -639,7 +645,11 @@ def _get_datetime(value): a_values = force_list(value.get('a')) for a_value in a_values: if IS_INTERNAL_UID.match(a_value): - internal_uid = int(a_value[12:]) if a_value.startswith('inspire:uid:') else int(a_value) + internal_uid = ( + int(a_value[12:]) + if a_value.startswith('inspire:uid:') + else int(a_value) + ) elif IS_ORCID.match(a_value): orcid = a_value[6:] if a_value.startswith('orcid:') else a_value else: @@ -705,7 +715,8 @@ def public_notes_500(self, key, value): { 'source': value.get('9'), 'value': public_note, - } for public_note in force_list(value.get('a')) + } + for public_note in force_list(value.get('a')) ] @@ -732,7 +743,8 @@ def _private_notes_595(self, key, value): { 'source': value.get('9'), 'value': _private_note, - } for _private_note in force_list(value.get('a')) + } + for _private_note in force_list(value.get('a')) ] @@ -758,7 +770,7 @@ def inspire_categories(self, key, value): inspire_categories = self.get('inspire_categories', []) scheme = force_single_element(value.get('2')) - if scheme == 'arXiv': # XXX: we skip arXiv categories here because + if scheme == 'arXiv': # XXX: we skip arXiv categories here because return inspire_categories # we're going to add them later in a filter. source = force_single_element(value.get('9', '')).lower() @@ -774,10 +786,12 @@ def inspire_categories(self, key, value): for _term in terms: term = classify_field(_term) if term: - inspire_categories.append({ - 'term': term, - 'source': source, - }) + inspire_categories.append( + { + 'term': term, + 'source': source, + } + ) return inspire_categories @@ -837,10 +851,12 @@ def _is_internal_url(url): description = WEBLINKS.get(description, description) for url in force_list(value.get('u')): if not _is_internal_url(url): - urls.append({ - 'description': description, - 'value': url, - }) + urls.append( + { + 'description': description, + 'value': url, + } + ) return urls @@ -881,6 +897,7 @@ def external_system_identifiers(endpoint): Also populates the ``new_record`` key through side effects. """ + @utils.flatten @utils.for_each_value def _external_system_identifiers(self, key, value): @@ -892,17 +909,28 @@ def _external_system_identifiers(self, key, value): { 'schema': 'SPIRES', 'value': ext_sys_id, - } for ext_sys_id in force_list(value.get('a')) + } + for ext_sys_id in force_list(value.get('a')) ] return _external_system_identifiers -conferences.over('external_system_identifiers', '^970..')(external_system_identifiers('conferences')) -experiments.over('external_system_identifiers', '^970..')(external_system_identifiers('experiments')) -hep.over('external_system_identifiers', '^970..')(external_system_identifiers('literature')) -institutions.over('external_system_identifiers', '^970..')(external_system_identifiers('institutions')) -journals.over('external_system_identifiers', '^970..')(external_system_identifiers('journals')) +conferences.over('external_system_identifiers', '^970..')( + external_system_identifiers('conferences') +) +experiments.over('external_system_identifiers', '^970..')( + external_system_identifiers('experiments') +) +hep.over('external_system_identifiers', '^970..')( + external_system_identifiers('literature') +) +institutions.over('external_system_identifiers', '^970..')( + external_system_identifiers('institutions') +) +journals.over('external_system_identifiers', '^970..')( + external_system_identifiers('journals') +) @hep2marc.over('970', '^new_record$') @@ -918,6 +946,7 @@ def deleted(self, key, value): def deleted_records(endpoint): """Populate the ``deleted_records`` key.""" + @utils.for_each_value def _deleted_records(self, key, value): deleted_recid = maybe_int(value.get('a')) diff --git a/inspire_dojson/conferences/rules.py b/inspire_dojson/conferences/rules.py index 2cd4331..b23dce6 100644 --- a/inspire_dojson/conferences/rules.py +++ b/inspire_dojson/conferences/rules.py @@ -107,10 +107,12 @@ def contact_details(self, key, value): # we might match an email with the wrong name. if len(m_values) == len(p_values): for m_value, p_value in zip(m_values, p_values): - result.append({ - 'email': m_value, - 'name': p_value, - }) + result.append( + { + 'email': m_value, + 'name': p_value, + } + ) else: for m_value in m_values: result.append({'email': m_value}) @@ -139,10 +141,12 @@ def _last_is_incomplete(series, key): elif number and name is None and _last_is_incomplete(series, 'number'): series[-1]['number'] = number else: - series.append({ - 'name': name, - 'number': number, - }) + series.append( + { + 'name': name, + 'number': number, + } + ) return series @@ -208,8 +212,10 @@ def keywords(self, key, values): a_values = force_list(value.get('a')) for a_value in a_values: - keywords.append({ - 'source': force_single_element(sources), - 'value': a_value, - }) + keywords.append( + { + 'source': force_single_element(sources), + 'value': a_value, + } + ) return keywords diff --git a/inspire_dojson/errors.py b/inspire_dojson/errors.py index 8266652..01c1008 100644 --- a/inspire_dojson/errors.py +++ b/inspire_dojson/errors.py @@ -30,12 +30,15 @@ @python_2_unicode_compatible class DoJsonError(Exception): """Error during DoJSON processing.""" + def __str__(self): message = self.args[0] exc = u' '.join(text_type(arg) for arg in self.args[1]) try: subfields = [(k, v) for (k, v) in self.args[2].items() if k != '__order__'] - except AttributeError: # when not dealing with MARC, the value doesn't have to be a dict + except ( + AttributeError + ): # when not dealing with MARC, the value doesn't have to be a dict subfields = self.args[2] return u'{message}\n\n{exc}\n\nSubfields: {subfields}'.format( message=message, exc=exc, subfields=subfields diff --git a/inspire_dojson/experiments/rules.py b/inspire_dojson/experiments/rules.py index bcaaf56..ab00d6f 100644 --- a/inspire_dojson/experiments/rules.py +++ b/inspire_dojson/experiments/rules.py @@ -32,70 +32,71 @@ from inspire_dojson.experiments.model import experiments from inspire_dojson.utils import force_single_element, get_record_ref -EXPERIMENT_CATEGORIES_MAP = \ - {'1': 'Collider Experiments', - '1.1': 'Collider Experiments|Hadrons', - '1.1.1': 'Collider Experiments|Hadrons|p anti-p', - '1.1.2': 'Collider Experiments|Hadrons|p p', - '1.2': 'Collider Experiments|e+ e-', - '1.3': 'Collider Experiments|e p', - '1.4': 'Collider Experiments|Heavy Flavor Factory', - '1.5': 'Collider Experiments|Heavy ion', - '1.6': 'Collider Experiments|Detector development', - '2': 'Fixed Target Experiments', - '2.1': 'Fixed Target Experiments|High-momentum transfer', - '2.2': 'Fixed Target Experiments|Hadron Spectroscopy', - '2.3': 'Fixed Target Experiments|Deep inelastic scattering', - '2.4': 'Fixed Target Experiments|Drell-Yan/Dilepton production', - '2.5': 'Fixed Target Experiments|Flavor physics', - '2.6': 'Fixed Target Experiments|Lepton precision experiments', - '2.7': 'Fixed Target Experiments|Neutron/proton precision experiments', - '3': 'Neutrino (flavor) experiments', - '3.1': 'Neutrino (flavor) experiments|Accelerator', - '3.1.1': 'Neutrino (flavor) experiments|Accelerator|short-baseline', - '3.1.2': 'Neutrino (flavor) experiments|Accelerator|long-baseline', - '3.2': 'Neutrino (flavor) experiments|Reactor', - '3.2.1': 'Neutrino (flavor) experiments|Reactor|ultra-short-baseline', - '3.2.2': 'Neutrino (flavor) experiments|Reactor|longer baselines', - '3.3': 'Neutrino (flavor) experiments|Non terrestrial', - '3.3.1': 'Neutrino (flavor) experiments|Non terrestrial|Atmospheric', - '3.3.2': 'Neutrino (flavor) experiments|Non terrestrial|Solar', - '3.3.3': 'Neutrino (flavor) experiments|Non terrestrial|Cosmic', - '3.4': 'Neutrino (flavor) experiments|Neutrinoless double beta decay', - '3.5': 'Neutrino (flavor) experiments|Neutrino mass', - '4': 'Dark matter search experiments', - '4.1': 'Dark matter search experiments|Non-accelerator', - '4.2': 'Dark matter search experiments|Axion search experiments', - '4.3': 'Dark matter search experiments|Dark Forces', - '5': 'Cosmic ray/Gamma ray experiments', - '5.1': 'Cosmic ray/Gamma ray experiments|Ground array', - '5.2': 'Cosmic ray/Gamma ray experiments|Cerenkov array', - '5.3': 'Cosmic ray/Gamma ray experiments|Satellite', - '5.4': 'Cosmic ray/Gamma ray experiments|Balloon', - '6': 'Other Rare-process/exotic experiments', - '6.1': 'Other Rare-process/exotic experiments|Proton decay', - '6.2': 'Other Rare-process/exotic experiments|Modified gravity and space-time', - '6.3': 'Other Rare-process/exotic experiments|Magnetic monopoles', - '6.4': 'Other Rare-process/exotic experiments|Fractionally charged particles', - '7': 'Accelerator Test Facility Experiments', - '7.1': 'Accelerator Test Facility Experiments|Electron and positron beams', - '7.2': 'Accelerator Test Facility Experiments|Muon beams', - '7.3': 'Accelerator Test Facility Experiments|Proton beams', - '7.4': 'Accelerator Test Facility Experiments|Neutrino beams', - '8': 'Astronomy experiments', - '8.1': 'Astronomy experiments|CMB', - '8.2': 'Astronomy experiments|Survey', - '8.3': 'Astronomy experiments|Supernovae', - '8.4': 'Astronomy experiments|Gravitational waves', - '8.5': 'Astronomy experiments|Gravitational lensing/Dark matter', - '9': 'Non-experimental', - '9.1': 'Non-experimental|Data Analysis', - '9.2': 'Non-experimental|Simulation tools', - '9.2.1': 'Non-experimental|Simulation tools|Detector Simulation', - '9.2.2': 'Non-experimental|Simulation tools|Event Simulation', - '9.3': 'Non-experimental|Parton Distribution Fits', - '9.4': 'Non-experimental|Lattice Gauge Theory', - '9.5': 'Non-experimental|Neutrino Physics'} +EXPERIMENT_CATEGORIES_MAP = { + '1': 'Collider Experiments', + '1.1': 'Collider Experiments|Hadrons', + '1.1.1': 'Collider Experiments|Hadrons|p anti-p', + '1.1.2': 'Collider Experiments|Hadrons|p p', + '1.2': 'Collider Experiments|e+ e-', + '1.3': 'Collider Experiments|e p', + '1.4': 'Collider Experiments|Heavy Flavor Factory', + '1.5': 'Collider Experiments|Heavy ion', + '1.6': 'Collider Experiments|Detector development', + '2': 'Fixed Target Experiments', + '2.1': 'Fixed Target Experiments|High-momentum transfer', + '2.2': 'Fixed Target Experiments|Hadron Spectroscopy', + '2.3': 'Fixed Target Experiments|Deep inelastic scattering', + '2.4': 'Fixed Target Experiments|Drell-Yan/Dilepton production', + '2.5': 'Fixed Target Experiments|Flavor physics', + '2.6': 'Fixed Target Experiments|Lepton precision experiments', + '2.7': 'Fixed Target Experiments|Neutron/proton precision experiments', + '3': 'Neutrino (flavor) experiments', + '3.1': 'Neutrino (flavor) experiments|Accelerator', + '3.1.1': 'Neutrino (flavor) experiments|Accelerator|short-baseline', + '3.1.2': 'Neutrino (flavor) experiments|Accelerator|long-baseline', + '3.2': 'Neutrino (flavor) experiments|Reactor', + '3.2.1': 'Neutrino (flavor) experiments|Reactor|ultra-short-baseline', + '3.2.2': 'Neutrino (flavor) experiments|Reactor|longer baselines', + '3.3': 'Neutrino (flavor) experiments|Non terrestrial', + '3.3.1': 'Neutrino (flavor) experiments|Non terrestrial|Atmospheric', + '3.3.2': 'Neutrino (flavor) experiments|Non terrestrial|Solar', + '3.3.3': 'Neutrino (flavor) experiments|Non terrestrial|Cosmic', + '3.4': 'Neutrino (flavor) experiments|Neutrinoless double beta decay', + '3.5': 'Neutrino (flavor) experiments|Neutrino mass', + '4': 'Dark matter search experiments', + '4.1': 'Dark matter search experiments|Non-accelerator', + '4.2': 'Dark matter search experiments|Axion search experiments', + '4.3': 'Dark matter search experiments|Dark Forces', + '5': 'Cosmic ray/Gamma ray experiments', + '5.1': 'Cosmic ray/Gamma ray experiments|Ground array', + '5.2': 'Cosmic ray/Gamma ray experiments|Cerenkov array', + '5.3': 'Cosmic ray/Gamma ray experiments|Satellite', + '5.4': 'Cosmic ray/Gamma ray experiments|Balloon', + '6': 'Other Rare-process/exotic experiments', + '6.1': 'Other Rare-process/exotic experiments|Proton decay', + '6.2': 'Other Rare-process/exotic experiments|Modified gravity and space-time', + '6.3': 'Other Rare-process/exotic experiments|Magnetic monopoles', + '6.4': 'Other Rare-process/exotic experiments|Fractionally charged particles', + '7': 'Accelerator Test Facility Experiments', + '7.1': 'Accelerator Test Facility Experiments|Electron and positron beams', + '7.2': 'Accelerator Test Facility Experiments|Muon beams', + '7.3': 'Accelerator Test Facility Experiments|Proton beams', + '7.4': 'Accelerator Test Facility Experiments|Neutrino beams', + '8': 'Astronomy experiments', + '8.1': 'Astronomy experiments|CMB', + '8.2': 'Astronomy experiments|Survey', + '8.3': 'Astronomy experiments|Supernovae', + '8.4': 'Astronomy experiments|Gravitational waves', + '8.5': 'Astronomy experiments|Gravitational lensing/Dark matter', + '9': 'Non-experimental', + '9.1': 'Non-experimental|Data Analysis', + '9.2': 'Non-experimental|Simulation tools', + '9.2.1': 'Non-experimental|Simulation tools|Detector Simulation', + '9.2.2': 'Non-experimental|Simulation tools|Event Simulation', + '9.3': 'Non-experimental|Parton Distribution Fits', + '9.4': 'Non-experimental|Lattice Gauge Theory', + '9.5': 'Non-experimental|Neutrino Physics', +} @experiments.over('_dates', '^046..') @@ -171,6 +172,7 @@ def long_name(self, key, value): def inspire_classification(self, key, value): def _get_category(value): return EXPERIMENT_CATEGORIES_MAP.get(value.get('a')) + return _get_category(value) @@ -184,10 +186,7 @@ def name_variants(self, key, value): @utils.for_each_value def related_records(self, key, value): def _get_relation(value): - RELATIONS_MAP = { - 'a': 'predecessor', - 'b': 'successor' - } + RELATIONS_MAP = {'a': 'predecessor', 'b': 'successor'} return RELATIONS_MAP.get(value.get('w')) diff --git a/inspire_dojson/hep/model.py b/inspire_dojson/hep/model.py index 411e006..84755d5 100644 --- a/inspire_dojson/hep/model.py +++ b/inspire_dojson/hep/model.py @@ -55,7 +55,9 @@ def convert_publication_infos(record, blob): if not record.get('publication_info'): return record - record['publication_info'] = convert_old_publication_info_to_new(record['publication_info']) + record['publication_info'] = convert_old_publication_info_to_new( + record['publication_info'] + ) return record @@ -68,7 +70,9 @@ def move_incomplete_publication_infos(record, blob): if not non_empty_keys: continue if non_empty_keys.issubset({'journal_record', 'journal_title'}): - public_note = {'value': u'Submitted to {}'.format(publication_info['journal_title'])} + public_note = { + 'value': u'Submitted to {}'.format(publication_info['journal_title']) + } record.setdefault('public_notes', []).append(public_note) else: publication_infos.append(publication_info) @@ -111,7 +115,9 @@ def ensure_ordered_figures(record, blob): else: unordered_figures_list.append(figure) - record['figures'] = [value for key, value in sorted(six.iteritems(ordered_figures_dict))] + record['figures'] = [ + value for key, value in sorted(six.iteritems(ordered_figures_dict)) + ] record['figures'].extend(unordered_figures_list) return record @@ -126,7 +132,9 @@ def duplicates(elements): else: duplicate_keys_list.append(element['key']) - for index, attachment in itertools.chain(duplicates(record.get('documents', [])), duplicates(record.get('figures', []))): + for index, attachment in itertools.chain( + duplicates(record.get('documents', [])), duplicates(record.get('figures', [])) + ): attachment['key'] = u'{}_{}'.format(index, attachment['key']) return record @@ -139,15 +147,9 @@ def write_ids(record, blob): for schema, values in six.iteritems(id_dict): z_values = iter(values) a_value = next(z_values) - result_035.append({ - '9': schema, - 'a': a_value - }) + result_035.append({'9': schema, 'a': a_value}) for z_value in z_values: - result_035.append({ - '9': schema, - 'z': z_value - }) + result_035.append({'9': schema, 'z': z_value}) if 'id_dict' in record: del record['id_dict'] diff --git a/inspire_dojson/hep/rules/bd0xx.py b/inspire_dojson/hep/rules/bd0xx.py index 5b6067e..62e73c6 100644 --- a/inspire_dojson/hep/rules/bd0xx.py +++ b/inspire_dojson/hep/rules/bd0xx.py @@ -44,10 +44,13 @@ @utils.for_each_value def isbns(self, key, value): """Populate the ``isbns`` key.""" + def _get_medium(value): def _normalize(medium): schema = load_schema('hep') - valid_media = schema['properties']['isbns']['items']['properties']['medium']['enum'] + valid_media = schema['properties']['isbns']['items']['properties'][ + 'medium' + ]['enum'] medium = medium.lower().replace('-', '').replace(' ', '') if medium in valid_media: @@ -92,6 +95,7 @@ def dois(self, key, value): Also populates the ``persistent_identifiers`` key through side effects. """ + def _get_first_non_curator_source(sources): sources_without_curator = [el for el in sources if el.upper() != 'CURATOR'] return force_single_element(sources_without_curator) @@ -125,19 +129,23 @@ def _is_handle(id_, type_): source = _get_first_non_curator_source(sources) if _is_doi(id_, schema): - dois.append({ - 'material': material, - 'source': source, - 'value': normalize_doi(id_), - }) + dois.append( + { + 'material': material, + 'source': source, + 'value': normalize_doi(id_), + } + ) else: schema = 'HDL' if _is_handle(id_, schema) else schema - persistent_identifiers.append({ - 'material': material, - 'schema': schema, - 'source': source, - 'value': id_, - }) + persistent_identifiers.append( + { + 'material': material, + 'schema': schema, + 'source': source, + 'value': id_, + } + ) self['persistent_identifiers'] = persistent_identifiers return dois @@ -173,6 +181,7 @@ def texkeys(self, key, value): Also populates the ``external_system_identifiers`` and ``_desy_bookkeeping`` keys through side effects. """ + def _is_oai(id_, schema): return id_.startswith('oai:') @@ -204,10 +213,13 @@ def _is_texkey(id_, schema): elif _is_desy(id_, schema): _desy_bookkeeping.append({'identifier': id_}) else: - external_system_identifiers.insert(0, { - 'schema': schema, - 'value': id_, - }) + external_system_identifiers.insert( + 0, + { + 'schema': schema, + 'value': id_, + }, + ) for id_ in other_ids: id_ = id_.strip() @@ -221,10 +233,12 @@ def _is_texkey(id_, schema): elif _is_desy(id_, schema): _desy_bookkeeping.append({'identifier': id_}) else: - external_system_identifiers.append({ - 'schema': schema, - 'value': id_, - }) + external_system_identifiers.append( + { + 'schema': schema, + 'value': id_, + } + ) self['external_system_identifiers'] = external_system_identifiers self['_desy_bookkeeping'] = _desy_bookkeeping @@ -239,16 +253,20 @@ def texkeys2marc(self, key, value): values = force_list(value) if values: value = values[0] - result.append({ - '9': 'INSPIRETeX', - 'a': value, - }) + result.append( + { + '9': 'INSPIRETeX', + 'a': value, + } + ) for value in values[1:]: - result.append({ - '9': 'INSPIRETeX', - 'z': value, - }) + result.append( + { + '9': 'INSPIRETeX', + 'z': value, + } + ) return result @@ -261,6 +279,7 @@ def external_system_identifiers2marc(self, key, value): ``id_dict`` dictionary that holds potentially duplicate IDs that are post-processed in a filter. """ + def _is_scheme_cernkey(id_, schema): return schema == 'CERNKEY' @@ -277,14 +296,18 @@ def _is_scheme_spires(id_, schema): schema = value.get('schema') if _is_scheme_spires(id_, schema): - result_970.append({ - 'a': id_, - }) + result_970.append( + { + 'a': id_, + } + ) elif _is_scheme_cernkey(id_, schema): - result_035.append({ - '9': 'CERNKEY', - 'z': id_, - }) + result_035.append( + { + '9': 'CERNKEY', + 'z': id_, + } + ) else: id_dict[schema].append(id_) @@ -299,6 +322,7 @@ def arxiv_eprints(self, key, value): Also populates the ``report_numbers`` key through side effects. """ + def _get_clean_arxiv_eprint(id_): return id_.split(':')[-1] @@ -320,26 +344,34 @@ def _get_clean_source(source): for value in values: id_ = force_single_element(value.get('a', '')) other_id = force_single_element(value.get('z', '')) - categories = [normalize_arxiv_category(category) for category - in force_list(value.get('c'))] + categories = [ + normalize_arxiv_category(category) + for category in force_list(value.get('c')) + ] source = force_single_element(value.get('9', '')) if _is_arxiv_eprint(id_, source): - arxiv_eprints.append({ - 'categories': categories, - 'value': _get_clean_arxiv_eprint(id_), - }) + arxiv_eprints.append( + { + 'categories': categories, + 'value': _get_clean_arxiv_eprint(id_), + } + ) elif _is_hidden_report_number(other_id, source): - report_numbers.append({ - 'hidden': True, - 'source': _get_clean_source(source), - 'value': other_id, - }) + report_numbers.append( + { + 'hidden': True, + 'source': _get_clean_source(source), + 'value': other_id, + } + ) else: - report_numbers.append({ - 'source': _get_clean_source(source), - 'value': id_, - }) + report_numbers.append( + { + 'source': _get_clean_source(source), + 'value': id_, + } + ) self['report_numbers'] = report_numbers return arxiv_eprints @@ -358,23 +390,29 @@ def arxiv_eprints2marc(self, key, values): for value in values: arxiv_id = value.get('value') arxiv_id = 'arXiv:' + arxiv_id if is_arxiv_post_2007(arxiv_id) else arxiv_id - result_037.append({ - '9': 'arXiv', - 'a': arxiv_id, - 'c': force_single_element(value.get('categories')), - }) - - result_035.append({ - '9': 'arXiv', - 'a': 'oai:arXiv.org:' + value.get('value'), - }) + result_037.append( + { + '9': 'arXiv', + 'a': arxiv_id, + 'c': force_single_element(value.get('categories')), + } + ) + + result_035.append( + { + '9': 'arXiv', + 'a': 'oai:arXiv.org:' + value.get('value'), + } + ) categories = force_list(value.get('categories')) for category in categories: - result_65017.append({ - '2': 'arXiv', - 'a': category, - }) + result_65017.append( + { + '2': 'arXiv', + 'a': category, + } + ) self['65017'] = result_65017 self['035'] = result_035 @@ -385,6 +423,7 @@ def arxiv_eprints2marc(self, key, values): @utils.for_each_value def report_numbers2marc(self, key, value): """Populate the ``037`` MARC field.""" + def _get_mangled_source(source): if source == 'arXiv': return 'arXiv:reportnumber' diff --git a/inspire_dojson/hep/rules/bd1xx.py b/inspire_dojson/hep/rules/bd1xx.py index 50c7f96..54b3ebe 100644 --- a/inspire_dojson/hep/rules/bd1xx.py +++ b/inspire_dojson/hep/rules/bd1xx.py @@ -47,10 +47,12 @@ def _get_affiliations(value): # we might match a value with the wrong recid. if len(u_values) == len(z_values): for u_value, z_value in zip(u_values, z_values): - result.append({ - 'record': get_record_ref(z_value, 'institutions'), - 'value': u_value, - }) + result.append( + { + 'record': get_record_ref(z_value, 'institutions'), + 'value': u_value, + } + ) else: for u_value in u_values: result.append({'value': u_value}) @@ -58,15 +60,23 @@ def _get_affiliations(value): return dedupe_list(result) def _get_affiliations_identifiers(value): - t_values = (t_value.split(':', 1) for t_value in dedupe_list(force_list(value.get('t')))) + t_values = ( + t_value.split(':', 1) for t_value in dedupe_list(force_list(value.get('t'))) + ) - return [{'schema': schema.upper(), 'value': identifier} for schema, identifier in t_values] + return [ + {'schema': schema.upper(), 'value': identifier} + for schema, identifier in t_values + ] def _get_curated_relation(value): return value.get('y') == '1' or None def _get_emails(value): - return [el[6:] if el.startswith('email:') else el for el in force_list(value.get('m'))] + return [ + el[6:] if el.startswith('email:') else el + for el in force_list(value.get('m')) + ] def _get_full_names(value): return [full_name.strip(', ') for full_name in force_list(value.get('a'))] @@ -88,40 +98,52 @@ def _is_cern(j_value): i_values = force_list(value.get('i')) for i_value in i_values: - result.append({ - 'schema': 'INSPIRE ID', - 'value': i_value, - }) + result.append( + { + 'schema': 'INSPIRE ID', + 'value': i_value, + } + ) j_values = force_list(value.get('j')) for j_value in j_values: if _is_jacow(j_value): - result.append({ - 'schema': 'JACOW', - 'value': 'JACoW-' + j_value[6:], - }) + result.append( + { + 'schema': 'JACOW', + 'value': 'JACoW-' + j_value[6:], + } + ) elif _is_orcid(j_value): - result.append({ - 'schema': 'ORCID', - 'value': j_value[6:].replace('.', ''), - }) + result.append( + { + 'schema': 'ORCID', + 'value': j_value[6:].replace('.', ''), + } + ) elif _is_naked_orcid(j_value): - result.append({ - 'schema': 'ORCID', - 'value': j_value, - }) + result.append( + { + 'schema': 'ORCID', + 'value': j_value, + } + ) elif _is_cern(j_value): - result.append({ - 'schema': 'CERN', - 'value': 'CERN-' + j_value[5:], - }) + result.append( + { + 'schema': 'CERN', + 'value': 'CERN-' + j_value[5:], + } + ) w_values = force_list(value.get('w')) for w_value in w_values: - result.append({ - 'schema': 'INSPIRE BAI', - 'value': w_value, - }) + result.append( + { + 'schema': 'INSPIRE BAI', + 'value': w_value, + } + ) return dedupe_list(result) @@ -141,7 +163,9 @@ def _get_raw_affiliations(value): return dedupe_list([{'value': el} for el in force_list(value.get('v'))]) def _get_record(value): - return get_record_ref(maybe_int(force_single_element(value.get('x'))), 'authors') + return get_record_ref( + maybe_int(force_single_element(value.get('x'))), 'authors' + ) full_names = _get_full_names(value) if len(full_names) == 1: @@ -167,7 +191,8 @@ def _get_record(value): 'full_name': full_name, 'inspire_roles': _get_inspire_roles(value), 'raw_affiliations': _get_raw_affiliations(value), - } for full_name in full_names + } + for full_name in full_names ] @@ -213,13 +238,12 @@ def _get_ids(value): return ids def _get_affiliations(value): - return [ - aff.get('value') for aff in value.get('affiliations', []) - ] + return [aff.get('value') for aff in value.get('affiliations', [])] def _get_affiliations_identifiers(value): return [ - u'{}:{}'.format(aff.get('schema'), aff.get('value')) for aff in value.get('affiliations_identifiers', []) + u'{}:{}'.format(aff.get('schema'), aff.get('value')) + for aff in value.get('affiliations_identifiers', []) ] def _get_inspire_roles(value): @@ -227,9 +251,7 @@ def _get_inspire_roles(value): return ['ed.' for role in values if role == 'editor'] def _get_raw_affiliations(value): - return [ - aff.get('value') for aff in value.get('raw_affiliations', []) - ] + return [aff.get('value') for aff in value.get('raw_affiliations', [])] def get_value_100_700(value): ids = _get_ids(value) diff --git a/inspire_dojson/hep/rules/bd2xx.py b/inspire_dojson/hep/rules/bd2xx.py index 003492a..cca61a0 100644 --- a/inspire_dojson/hep/rules/bd2xx.py +++ b/inspire_dojson/hep/rules/bd2xx.py @@ -51,11 +51,14 @@ def titles(self, key, value): 'title': value.get('a'), } - self.setdefault('titles', []).insert(0, { - 'source': value.get('9'), - 'subtitle': value.get('b'), - 'title': value.get('a'), - }) + self.setdefault('titles', []).insert( + 0, + { + 'source': value.get('9'), + 'subtitle': value.get('b'), + 'title': value.get('a'), + }, + ) @hep.over('title_translations', '^242..') @@ -86,18 +89,21 @@ def titles2marc(self, key, values): """ first, rest = values[0], values[1:] - self.setdefault('245', []).append({ - 'a': first.get('title'), - 'b': first.get('subtitle'), - '9': first.get('source'), - }) + self.setdefault('245', []).append( + { + 'a': first.get('title'), + 'b': first.get('subtitle'), + '9': first.get('source'), + } + ) return [ { 'a': value.get('title'), 'b': value.get('subtitle'), '9': value.get('source'), - } for value in rest + } + for value in rest ] diff --git a/inspire_dojson/hep/rules/bd5xx.py b/inspire_dojson/hep/rules/bd5xx.py index ff51249..292aaf1 100644 --- a/inspire_dojson/hep/rules/bd5xx.py +++ b/inspire_dojson/hep/rules/bd5xx.py @@ -42,6 +42,7 @@ def public_notes(self, key, value): Also populates the ``curated`` and ``thesis_info`` keys through side effects. """ + def _means_not_curated(public_note): return public_note in [ '*Brief entry*', @@ -61,19 +62,25 @@ def _means_not_curated(public_note): match = IS_DEFENSE_DATE.match(public_note) if match: try: - thesis_info['defense_date'] = normalize_date(match.group('defense_date')) + thesis_info['defense_date'] = normalize_date( + match.group('defense_date') + ) except ValueError: - public_notes.append({ - 'source': source, - 'value': public_note, - }) + public_notes.append( + { + 'source': source, + 'value': public_note, + } + ) elif _means_not_curated(public_note): self['curated'] = False else: - public_notes.append({ - 'source': source, - 'value': public_note, - }) + public_notes.append( + { + 'source': source, + 'value': public_note, + } + ) self['thesis_info'] = thesis_info return public_notes @@ -82,6 +89,7 @@ def _means_not_curated(public_note): @hep.over('thesis_info', '^502..') def thesis_info(self, key, value): """Populate the ``thesis_info`` key.""" + def _get_degree_type(value): DEGREE_TYPES_MAP = { 'RAPPORT DE STAGE': 'other', @@ -110,11 +118,14 @@ def _get_institutions(value): if len(c_values) != len(z_values): return [{'name': c_value} for c_value in c_values] else: - return [{ - 'curated_relation': True, - 'name': c_value, - 'record': get_record_ref(z_value, 'institutions'), - } for c_value, z_value in zip(c_values, z_values)] + return [ + { + 'curated_relation': True, + 'name': c_value, + 'record': get_record_ref(z_value, 'institutions'), + } + for c_value, z_value in zip(c_values, z_values) + ] thesis_info = self.get('thesis_info', {}) @@ -131,6 +142,7 @@ def thesis_info2marc(self, key, value): Also populates the ``500`` MARC field through side effects. """ + def _get_b_value(value): DEGREE_TYPES_MAP = { 'bachelor': 'Bachelor', @@ -150,9 +162,11 @@ def _get_b_value(value): result_502 = self.get('502', {}) if value.get('defense_date'): - result_500.append({ - 'a': u'Presented on {}'.format(value.get('defense_date')), - }) + result_500.append( + { + 'a': u'Presented on {}'.format(value.get('defense_date')), + } + ) result_502 = { 'b': _get_b_value(value), @@ -174,10 +188,12 @@ def abstracts(self, key, value): source = force_single_element(value.get('9')) for a_value in force_list(value.get('a')): - result.append({ - 'source': source, - 'value': a_value, - }) + result.append( + { + 'source': source, + 'value': a_value, + } + ) return result @@ -218,6 +234,7 @@ def funding_info2marc(self, key, value): @utils.for_each_value def license(self, key, value): """Populate the ``license`` key.""" + def _get_license(value): a_values = force_list(value.get('a')) @@ -299,6 +316,7 @@ def _private_notes(self, key, value): Also populates the ``_export_to`` key through side effects. """ + def _is_for_cds(value): normalized_c_values = [el.upper() for el in force_list(value.get('c'))] return 'CDS' in normalized_c_values @@ -325,10 +343,12 @@ def _is_not_for_hal(value): source = force_single_element(current_value.get('9')) for _private_note in force_list(current_value.get('a')): - _private_notes.append({ - 'source': source, - 'value': _private_note, - }) + _private_notes.append( + { + 'source': source, + 'value': _private_note, + } + ) self['_export_to'] = _export_to return _private_notes @@ -341,6 +361,7 @@ def _private_notes2marc(self, key, value): Also populates the `595_H` MARC key through side effects. """ + def _is_from_hal(value): return value.get('source') == 'HAL' @@ -356,6 +377,7 @@ def _is_from_hal(value): @hep2marc.over('595', '^_export_to$') def _export_to2marc(self, key, value): """Populate the ``595`` MARC field.""" + def _is_for_cds(value): return 'CDS' in value @@ -403,10 +425,7 @@ def _desy_bookkeeping2marc(self, key, value): 's': value.get('status'), } - self.setdefault('035', []).append({ - '9': 'DESY', - 'z': value['identifier'] - }) + self.setdefault('035', []).append({'9': 'DESY', 'z': value['identifier']}) @hep.over('_private_notes', '^595.H') @@ -418,5 +437,6 @@ def _private_notes_hal(self, key, value): { 'source': 'HAL', 'value': _private_note, - } for _private_note in force_list(value.get('a')) + } + for _private_note in force_list(value.get('a')) ] diff --git a/inspire_dojson/hep/rules/bd6xx.py b/inspire_dojson/hep/rules/bd6xx.py index 1f17c12..3ef3871 100644 --- a/inspire_dojson/hep/rules/bd6xx.py +++ b/inspire_dojson/hep/rules/bd6xx.py @@ -63,10 +63,12 @@ def accelerator_experiments(self, key, value): # we might match a value with the wrong recid. if len(e_values) == len(zero_values): for e_value, zero_value in zip(e_values, zero_values): - result.append({ - 'legacy_name': e_value, - 'record': get_record_ref(zero_value, 'experiments'), - }) + result.append( + { + 'legacy_name': e_value, + 'record': get_record_ref(zero_value, 'experiments'), + } + ) else: for e_value in e_values: result.append({'legacy_name': e_value}) @@ -90,6 +92,7 @@ def keywords(self, key, values): Also populates the ``energy_ranges`` key through side effects. """ + def _get_source(value): sources = force_list(value.get('9')) if 'conference' in sources: @@ -103,7 +106,8 @@ def _get_source(value): values = force_list(values) automatic_keywords = any( a_value.lower() == '* automatic keywords *' - for value in values for a_value in force_list(value.get('a')) + for value in values + for a_value in force_list(value.get('a')) ) for value in values: @@ -118,11 +122,13 @@ def _get_source(value): for a_value in a_values: if a_value.lower() == '* automatic keywords *': continue - keywords.append({ - 'schema': schema, - 'source': source, - 'value': a_value, - }) + keywords.append( + { + 'schema': schema, + 'source': source, + 'value': a_value, + } + ) if value.get('e'): energy_ranges.append(ENERGY_RANGES_MAP.get(value.get('e'))) @@ -162,40 +168,53 @@ def keywords2marc(self, key, values): keyword = value.get('value') if schema == 'PACS' or schema == 'PDG': - result_084.append({ - '2': schema, - '9': source, - 'a': keyword, - }) + result_084.append( + { + '2': schema, + '9': source, + 'a': keyword, + } + ) elif schema == 'JACOW': - result_6531.append({ - '2': 'JACoW', - '9': source, - 'a': keyword, - }) + result_6531.append( + { + '2': 'JACoW', + '9': source, + 'a': keyword, + } + ) elif schema == 'INSPIRE': - result_695.append({ - '2': 'INSPIRE', - '9': source, - 'a': keyword, - }) + result_695.append( + { + '2': 'INSPIRE', + '9': source, + 'a': keyword, + } + ) elif schema == 'INIS': - result_695.append({ - '2': 'INIS', - '9': source, - 'a': keyword, - }) + result_695.append( + { + '2': 'INIS', + '9': source, + 'a': keyword, + } + ) elif source != 'magpie': - result_6531.append({ - '9': source, - 'a': keyword, - }) + result_6531.append( + { + '9': source, + 'a': keyword, + } + ) if automatic_keywords: - result_695.insert(0, { - '2': 'INSPIRE', - 'a': '* Automatic Keywords *', - }) + result_695.insert( + 0, + { + '2': 'INSPIRE', + 'a': '* Automatic Keywords *', + }, + ) self['6531'] = result_6531 self['084'] = result_084 diff --git a/inspire_dojson/hep/rules/bd7xx.py b/inspire_dojson/hep/rules/bd7xx.py index 81cffb0..7b01b2d 100644 --- a/inspire_dojson/hep/rules/bd7xx.py +++ b/inspire_dojson/hep/rules/bd7xx.py @@ -52,10 +52,12 @@ def collaborations(self, key, value): for g_value in force_list(value.get('g')): collaborations = normalize_collaboration(g_value) if len(collaborations) == 1: - result.append({ - 'record': get_record_ref(maybe_int(value.get('0')), 'experiments'), - 'value': collaborations[0], - }) + result.append( + { + 'record': get_record_ref(maybe_int(value.get('0')), 'experiments'), + 'value': collaborations[0], + } + ) else: result.extend({'value': collaboration} for collaboration in collaborations) @@ -73,6 +75,7 @@ def collaborations2marc(self, key, value): @utils.for_each_value def publication_info(self, key, value): """Populate the ``publication_info`` key.""" + def _get_cnum(value): w_value = force_single_element(value.get('w', '')) normalized_w_value = w_value.replace('/', '-').upper() @@ -225,14 +228,20 @@ def related_records2marc(self, key, value): 'w': get_recid_from_ref(value.get('record')), } elif value.get('relation') == 'successor': - self.setdefault('78502', []).append({ - 'i': 'superseded by', - 'w': get_recid_from_ref(value.get('record')), - }) + self.setdefault('78502', []).append( + { + 'i': 'superseded by', + 'w': get_recid_from_ref(value.get('record')), + } + ) elif value.get('relation') == 'predecessor': - self.setdefault('78002', []).append({ - 'i': 'supersedes', - 'w': get_recid_from_ref(value.get('record')), - }) + self.setdefault('78002', []).append( + { + 'i': 'supersedes', + 'w': get_recid_from_ref(value.get('record')), + } + ) else: - raise NotImplementedError(u"Unhandled relation in related_records: {}".format(value.get('relation'))) + raise NotImplementedError( + u"Unhandled relation in related_records: {}".format(value.get('relation')) + ) diff --git a/inspire_dojson/hep/rules/bd9xx.py b/inspire_dojson/hep/rules/bd9xx.py index 0a0e2f6..1c35881 100644 --- a/inspire_dojson/hep/rules/bd9xx.py +++ b/inspire_dojson/hep/rules/bd9xx.py @@ -38,7 +38,11 @@ from inspire_utils.record import get_value from inspire_dojson.hep.model import hep, hep2marc -from inspire_dojson.utils import force_single_element, get_recid_from_ref, get_record_ref +from inspire_dojson.utils import ( + force_single_element, + get_recid_from_ref, + get_record_ref, +) COLLECTIONS_MAP = { 'babar-analysisdocument': 'BABAR Analysis Documents', @@ -169,7 +173,9 @@ def document_type(self, key, value): elif normalized_a_value == 'deleted': self['deleted'] = True elif normalized_a_value in COLLECTIONS_MAP: - self.setdefault('_collections', []).append(COLLECTIONS_MAP[normalized_a_value]) + self.setdefault('_collections', []).append( + COLLECTIONS_MAP[normalized_a_value] + ) elif normalized_a_value in DOCUMENT_TYPE_MAP: document_type.append(DOCUMENT_TYPE_MAP[normalized_a_value]) elif normalized_a_value in valid_publication_types: @@ -254,14 +260,15 @@ def publication_type2marc(self, key, value): @utils.for_each_value def references(self, key, value): """Populate the ``references`` key.""" + def _has_curator_flag(value): normalized_nine_values = [el.upper() for el in force_list(value.get('9'))] return 'CURATOR' in normalized_nine_values def _is_curated(value): - is_explicitly_curated = ( - force_single_element(value.get('z')) == '1' and _has_curator_flag(value) - ) + is_explicitly_curated = force_single_element( + value.get('z') + ) == '1' and _has_curator_flag(value) has_only_0_and_z = set(value.keys()) == {'0', 'z'} return is_explicitly_curated or has_only_0_and_z @@ -322,8 +329,16 @@ def references2marc(self, key, value): external_ids = force_list(reference.get('external_system_identifiers')) u_values = force_list(get_value(reference, 'urls.value')) - u_values.extend(CDS_RECORD_FORMAT.format(el['value']) for el in external_ids if el.get('schema') == 'CDS') - u_values.extend(ADS_RECORD_FORMAT.format(el['value']) for el in external_ids if el.get('schema') == 'ADS') + u_values.extend( + CDS_RECORD_FORMAT.format(el['value']) + for el in external_ids + if el.get('schema') == 'CDS' + ) + u_values.extend( + ADS_RECORD_FORMAT.format(el['value']) + for el in external_ids + if el.get('schema') == 'ADS' + ) authors = force_list(reference.get('authors')) e_values = [el['full_name'] for el in authors if el.get('inspire_role') == 'editor'] @@ -332,10 +347,16 @@ def references2marc(self, key, value): r_values = force_list(reference.get('report_numbers')) if reference.get('arxiv_eprint'): arxiv_eprint = reference['arxiv_eprint'] - r_values.append('arXiv:' + arxiv_eprint if is_arxiv_post_2007(arxiv_eprint) else arxiv_eprint) + r_values.append( + 'arXiv:' + arxiv_eprint + if is_arxiv_post_2007(arxiv_eprint) + else arxiv_eprint + ) if reference.get('publication_info'): - reference['publication_info'] = convert_new_publication_info_to_old([reference['publication_info']])[0] + reference['publication_info'] = convert_new_publication_info_to_old( + [reference['publication_info']] + )[0] journal_title = get_value(reference, 'publication_info.journal_title') journal_volume = get_value(reference, 'publication_info.journal_volume') page_start = get_value(reference, 'publication_info.page_start') diff --git a/inspire_dojson/hep/rules/bdFFT.py b/inspire_dojson/hep/rules/bdFFT.py index b721ce4..2be81aa 100644 --- a/inspire_dojson/hep/rules/bdFFT.py +++ b/inspire_dojson/hep/rules/bdFFT.py @@ -41,8 +41,13 @@ def documents(self, key, value): Also populates the ``figures`` key through side effects. """ + def _is_hidden(value): - return 'HIDDEN' in [val.upper() for val in force_list(value.get('o'))] or _get_source(value) == 'arxiv' or None + return ( + 'HIDDEN' in [val.upper() for val in force_list(value.get('o'))] + or _get_source(value) == 'arxiv' + or None + ) def _is_figure(value): return value.get('f', "").endswith(".png") @@ -80,13 +85,15 @@ def _get_source(value): if _is_figure(value): index, caption = _get_index_and_caption(value.get('d', '')) - figures.append({ - 'key': _get_key(value), - 'caption': caption, - 'url': afs_url(value.get('a')), - 'order': index, - 'source': 'arxiv', # XXX: we don't have any other figures on legacy - }) + figures.append( + { + 'key': _get_key(value), + 'caption': caption, + 'url': afs_url(value.get('a')), + 'order': index, + 'source': 'arxiv', # XXX: we don't have any other figures on legacy + } + ) self['figures'] = figures else: return { @@ -123,7 +130,10 @@ def _get_hidden(value): def _get_filename_and_extension(value): file_name, extension = os.path.splitext(value.get('filename', value['key'])) - if file_name == "document" and value.get("material", "publication") != "publication": + if ( + file_name == "document" + and value.get("material", "publication") != "publication" + ): file_name = value["material"] return file_name, extension @@ -144,12 +154,14 @@ def figures2marc(self, key, values): fft = self.setdefault('FFT', []) for index, value in enumerate(values): file_name, extension = os.path.splitext(value.get('filename', value['key'])) - fft.append({ - 'd': u'{:05d} {}'.format(index, value.get('caption')), - 'a': afs_url_to_path(absolute_url(value.get('url'))), - 't': 'Plot', - 'n': file_name, - 'f': extension, - }) + fft.append( + { + 'd': u'{:05d} {}'.format(index, value.get('caption')), + 'a': afs_url_to_path(absolute_url(value.get('url'))), + 't': 'Plot', + 'n': file_name, + 'f': extension, + } + ) return fft diff --git a/inspire_dojson/hepnames/model.py b/inspire_dojson/hepnames/model.py index b47532d..2a5f1ea 100644 --- a/inspire_dojson/hepnames/model.py +++ b/inspire_dojson/hepnames/model.py @@ -24,7 +24,13 @@ from __future__ import absolute_import, division, print_function -from inspire_dojson.model import FilterOverdo, add_collection, add_schema, clean_marc, clean_record +from inspire_dojson.model import ( + FilterOverdo, + add_collection, + add_schema, + clean_marc, + clean_record, +) hepnames_filters = [ add_schema('authors.json'), diff --git a/inspire_dojson/hepnames/rules.py b/inspire_dojson/hepnames/rules.py index 16eae0c..9056b24 100644 --- a/inspire_dojson/hepnames/rules.py +++ b/inspire_dojson/hepnames/rules.py @@ -50,7 +50,9 @@ INSPIRE_BAI = re.compile(r'(\w+\.)+\d+') LOOKS_LIKE_CERN = re.compile(r'^\d+$|^CER[MN]?-|^CNER-|^CVERN-', re.I) NON_DIGIT = re.compile(r'[^\d]+') -LINKEDIN_URL = re.compile(r'https?://(\w+\.)?linkedin\.com/in/(?P[\w%-]+)', re.UNICODE) +LINKEDIN_URL = re.compile( + r'https?://(\w+\.)?linkedin\.com/in/(?P[\w%-]+)', re.UNICODE +) TWITTER_URL = re.compile(r'https?://(www\.)?twitter\.com/(?P\w+)') WIKIPEDIA_URL = re.compile(r'https?://(?P\w+)\.wikipedia\.org/wiki/(?P.*)') @@ -109,15 +111,20 @@ def _try_to_correct_value(schema, value): z_value = _try_to_correct_value(schema, z_value) if schema and a_value: - ids.insert(0, { - 'schema': schema, - 'value': a_value, - }) + ids.insert( + 0, + { + 'schema': schema, + 'value': a_value, + }, + ) if schema and z_value: - ids.append({ - 'schema': schema, - 'value': z_value, - }) + ids.append( + { + 'schema': schema, + 'value': z_value, + } + ) return ids @@ -128,11 +135,9 @@ def ids2marc(self, key, values): Also populates the ``8564`` and ``970`` MARC field through side effects. """ + def _convert_schema(schema): - conversion = { - 'INSPIRE BAI': 'BAI', - 'INSPIRE ID': 'INSPIRE' - } + conversion = {'INSPIRE BAI': 'BAI', 'INSPIRE ID': 'INSPIRE'} return conversion.get(schema, schema) def _is_schema_spires(id_, schema): @@ -173,10 +178,12 @@ def _is_schema_twitter(id, schema): field = 'a' else: field = 'z' - result.append({ - field: id_, - '9': _convert_schema(schema), - }) + result.append( + { + field: id_, + '9': _convert_schema(schema), + } + ) return result @@ -187,6 +194,7 @@ def name(self, key, value): Also populates the ``status``, ``birth_date`` and ``death_date`` keys through side effects. """ + def _get_title(value): c_value = force_single_element(value.get('c', '')) if c_value != 'title (e.g. Sir)': @@ -278,14 +286,20 @@ def positions(self, key, value): current_email_addresses = force_list(value.get('m')) non_current_email_addresses = force_list(value.get('o')) - email_addresses.extend({ - 'value': address, - 'current': True, - } for address in current_email_addresses) - email_addresses.extend({ - 'value': address, - 'current': False, - } for address in non_current_email_addresses) + email_addresses.extend( + { + 'value': address, + 'current': True, + } + for address in current_email_addresses + ) + email_addresses.extend( + { + 'value': address, + 'current': False, + } + for address in non_current_email_addresses + ) self['email_addresses'] = email_addresses @@ -343,9 +357,7 @@ def email_addresses2marc(self, key, value): Also populates the 371 field as a side effect. """ m_or_o = 'm' if value.get('current') else 'o' - element = { - m_or_o: value.get('value') - } + element = {m_or_o: value.get('value')} if value.get('hidden'): return element @@ -363,25 +375,30 @@ def email_addresses595(self, key, value): emails = self.get('email_addresses', []) if value.get('o'): - emails.append({ - 'value': value.get('o'), - 'current': False, - 'hidden': True, - }) + emails.append( + { + 'value': value.get('o'), + 'current': False, + 'hidden': True, + } + ) if value.get('m'): - emails.append({ - 'value': value.get('m'), - 'current': True, - 'hidden': True, - }) + emails.append( + { + 'value': value.get('m'), + 'current': True, + 'hidden': True, + } + ) notes = self.get('_private_notes', []) new_note = ( { 'source': value.get('9'), 'value': _private_note, - } for _private_note in force_list(value.get('a')) + } + for _private_note in force_list(value.get('a')) ) notes.extend(new_note) self['_private_notes'] = notes @@ -407,6 +424,7 @@ def arxiv_categories(self, key, value): Also populates the ``inspire_categories`` key through side effects. """ + def _is_arxiv(category): return category in valid_arxiv_categories() @@ -547,7 +565,7 @@ def awards(self, key, value): def awards2marc(self, key, value): return { 'a': ' '.join([value.get('name', ''), str(value.get('year', ''))]).strip(), - 'u': value.get('url') + 'u': value.get('url'), } @@ -566,9 +584,7 @@ def _get_json_experiments(marc_dict): record = get_record_ref(recid, 'experiments') yield { 'curated_relation': record is not None, - 'current': ( - marc_dict.get('z', '').lower() == 'current' - ), + 'current': (marc_dict.get('z', '').lower() == 'current'), 'end_date': end_year, 'name': name, 'record': record, @@ -637,10 +653,13 @@ def _get_id_schema(id_): recid = force_single_element(value.get('x')) record = get_record_ref(recid, 'authors') - ids = [{ - 'schema': _get_id_schema(id_), - 'value': id_, - } for id_ in force_list(value.get('i'))] + ids = [ + { + 'schema': _get_id_schema(id_), + 'value': id_, + } + for id_ in force_list(value.get('i')) + ] hidden = value.get('h') == 'HIDDEN' or None @@ -650,7 +669,7 @@ def _get_id_schema(id_): 'ids': ids, 'record': record, 'hidden': hidden, - 'curated_relation': value.get('y') == '1' if record else None + 'curated_relation': value.get('y') == '1' if record else None, } @@ -664,7 +683,7 @@ def advisors2marc(self, key, value): 'a': value.get('name'), 'g': value.get('degree_type'), 'i': ids, - 'h': 'HIDDEN' if value.get('hidden') else None + 'h': 'HIDDEN' if value.get('hidden') else None, } @@ -731,10 +750,12 @@ def new_record(self, key, value): for current_value in force_list(value): for id_ in force_list(current_value.get('a')): - ids.append({ - 'schema': 'SPIRES', - 'value': id_, - }) + ids.append( + { + 'schema': 'SPIRES', + 'value': id_, + } + ) new_recid = force_single_element(current_value.get('d', '')) if new_recid: @@ -750,6 +771,7 @@ def deleted(self, key, value): Also populates the ``stub`` key through side effects. """ + def _is_deleted(value): return force_single_element(value.get('c', '')).upper() == 'DELETED' diff --git a/inspire_dojson/institutions/rules.py b/inspire_dojson/institutions/rules.py index 03f5821..b13820b 100644 --- a/inspire_dojson/institutions/rules.py +++ b/inspire_dojson/institutions/rules.py @@ -84,17 +84,21 @@ def _split_acronym(value): for b_value in force_list(current_value.get('b')): department_name, department_acronym = _split_acronym(b_value) - institution_hierarchy.append({ - 'acronym': department_acronym, - 'name': department_name, - }) + institution_hierarchy.append( + { + 'acronym': department_acronym, + 'name': department_name, + } + ) for a_value in force_list(current_value.get('a')): institution_name, institution_acronym = _split_acronym(a_value) - institution_hierarchy.append({ - 'acronym': institution_acronym, - 'name': institution_name, - }) + institution_hierarchy.append( + { + 'acronym': institution_acronym, + 'name': institution_name, + } + ) x_values = force_list(current_value.get('x')) z_values = force_list(current_value.get('z')) @@ -103,11 +107,13 @@ def _split_acronym(value): # we might match a relation with the wrong recid. if len(x_values) == len(z_values): for _, recid in zip(x_values, z_values): - related_records.append({ - 'curated_relation': True, - 'record': get_record_ref(recid, 'institutions'), - 'relation_freetext': 'obsolete', - }) + related_records.append( + { + 'curated_relation': True, + 'record': get_record_ref(recid, 'institutions'), + 'relation_freetext': 'obsolete', + } + ) self['related_records'] = related_records self['institution_hierarchy'] = institution_hierarchy @@ -157,10 +163,7 @@ def institution_type(self, key, value): @institutions.over('name_variants', '^410..') def name_variants(self, key, value): - valid_sources = [ - 'ADS', - 'INSPIRE' - ] + valid_sources = ['ADS', 'INSPIRE'] if value.get('9') and value.get('9') not in valid_sources: return self.get('name_variants', []) @@ -173,10 +176,12 @@ def name_variants(self, key, value): source = force_single_element(value.get('9')) for name_variant in force_list(value.get('a')): - name_variants.append({ - 'source': source, - 'value': name_variant, - }) + name_variants.append( + { + 'source': source, + 'value': name_variant, + } + ) return name_variants diff --git a/inspire_dojson/journals/rules.py b/inspire_dojson/journals/rules.py index bd6743f..7f8ad78 100644 --- a/inspire_dojson/journals/rules.py +++ b/inspire_dojson/journals/rules.py @@ -151,7 +151,8 @@ def _private_notes(self, key, value): { 'source': value.get('9'), 'value': _private_note, - } for _private_note in force_list(value.get('x')) + } + for _private_note in force_list(value.get('x')) ] diff --git a/inspire_dojson/model.py b/inspire_dojson/model.py index 318e57a..98c73c9 100644 --- a/inspire_dojson/model.py +++ b/inspire_dojson/model.py @@ -54,7 +54,9 @@ def do(self, blob, **kwargs): def over(self, name, *source_tags): def decorator(creator): - return super(FilterOverdo, self).over(name, *source_tags)(self._wrap_exception(creator, name)) + return super(FilterOverdo, self).over(name, *source_tags)( + self._wrap_exception(creator, name) + ) return decorator @@ -67,9 +69,16 @@ def func(self, key, value): except Exception as exc: if type(exc) is IgnoreKey: raise exc - raise_from(DoJsonError( - u'Error in rule "{name}" for field "{key}"'.format(name=name, key=key), exc.args, value - ), exc) + raise_from( + DoJsonError( + u'Error in rule "{name}" for field "{key}"'.format( + name=name, key=key + ), + exc.args, + value, + ), + exc, + ) return func @@ -97,4 +106,5 @@ def clean_marc(record, blob): def clean_record(exclude_keys=()): def _clean_record(record, blob): return dedupe_all_lists(strip_empty_values(record), exclude_keys=exclude_keys) + return _clean_record diff --git a/inspire_dojson/utils/__init__.py b/inspire_dojson/utils/__init__.py index 9127b4a..b805180 100644 --- a/inspire_dojson/utils/__init__.py +++ b/inspire_dojson/utils/__init__.py @@ -134,9 +134,13 @@ def afs_url(file_path): if process_path: if afs_service: - return os.path.join(afs_service, urllib.request.pathname2url(file_path.encode('utf-8'))) + return os.path.join( + afs_service, urllib.request.pathname2url(file_path.encode('utf-8')) + ) file_path = os.path.join(afs_path, file_path) - return urllib.parse.urljoin('file://', urllib.request.pathname2url(file_path.encode('utf-8'))) + return urllib.parse.urljoin( + 'file://', urllib.request.pathname2url(file_path.encode('utf-8')) + ) return file_path @@ -159,7 +163,7 @@ def afs_url_to_path(url): if not afs_service or not url.startswith(afs_service): return url - path = url[len(afs_service):].lstrip('/') + path = url[len(afs_service) :].lstrip('/') return urllib.parse.urljoin('file://', os.path.join(afs_path, path)) @@ -225,6 +229,7 @@ def dedupe_all_lists(obj, exclude_keys=()): def normalize_date_aggressively(date): """Normalize date, stripping date parts until a valid date is obtained.""" + def _strip_last_part(date): parts = date.split('-') return '-'.join(parts[:-1]) diff --git a/inspire_dojson/utils/geo.py b/inspire_dojson/utils/geo.py index 7f095db..4e179d7 100644 --- a/inspire_dojson/utils/geo.py +++ b/inspire_dojson/utils/geo.py @@ -293,7 +293,7 @@ 'FR': ['FX'], 'GB': ['UK'], 'TL': ['TP'], - 'CD': ['ZR'] + 'CD': ['ZR'], } countries_alternative_spellings = { @@ -309,13 +309,20 @@ 'CN': ['PR CHINA'], 'CS': ['CZECHSOLVAKIA'], 'CZ': ['PRAGUE'], - 'DE': ['DEUTSCHLAND', 'WEST GERMANY', 'EAST GERMANY', 'BAVARIA', - 'GERMANY (DESY)'], + 'DE': ['DEUTSCHLAND', 'WEST GERMANY', 'EAST GERMANY', 'BAVARIA', 'GERMANY (DESY)'], 'ES': ['CANARY ISLANDS', 'MADRID'], 'FR': ['CORSICA'], 'GR': ['CRETE'], - 'GB': ['UK', 'ENGLAND', 'ENG', 'SCOTLAND', 'WALES', 'SCOTLAND/UK', - 'NORTHERN IRELAND', 'LONDON'], + 'GB': [ + 'UK', + 'ENGLAND', + 'ENG', + 'SCOTLAND', + 'WALES', + 'SCOTLAND/UK', + 'NORTHERN IRELAND', + 'LONDON', + ], 'ID': ['BALI'], 'IL': ['JERUSALEM'], 'IR': ['IRAN'], @@ -331,9 +338,13 @@ 'VE': ['VENEZUELA'], 'VN': ['VIETNAM'], 'US': ['UNITED STATES OF AMERICA', 'UNITED STATES', 'US', 'USA'], - 'ZA': ['SAFRICA'] + 'ZA': ['SAFRICA'], +} +countries_from_alternative_spellings = { + spelling: code + for (code, spellings) in countries_alternative_spellings.items() + for spelling in spellings } -countries_from_alternative_spellings = {spelling: code for (code, spellings) in countries_alternative_spellings.items() for spelling in spellings} us_state_to_iso_code = { @@ -387,7 +398,7 @@ 'WASHINGTON': 'WA', 'WEST VIRGINIA': 'WV', 'WISCONSIN': 'WI', - 'WYOMING': 'WY' + 'WYOMING': 'WY', } us_states_alternative_spellings = { @@ -443,12 +454,29 @@ 'WI': ['WI', 'WIS', 'WISC'], 'WY': ['WY'], } -us_states_from_alternative_spellings = {spelling: state for (state, spellings) in us_states_alternative_spellings.items() for spelling in spellings} +us_states_from_alternative_spellings = { + spelling: state + for (state, spellings) in us_states_alternative_spellings.items() + for spelling in spellings +} -south_korean_cities = ['SEOUL', 'DAEJON', 'DAEJEON', 'MT SORAK', 'POHANG', - 'JEJU ISLAND', 'CHEJU ISLAND', 'GYEONGJU', 'BUSAN', - 'DAEGU', 'GYEONGIU', 'PUSAN', 'YONGPYONG', - 'PHOENIX PARK', 'CHEJU ISLAND'] +south_korean_cities = [ + 'SEOUL', + 'DAEJON', + 'DAEJEON', + 'MT SORAK', + 'POHANG', + 'JEJU ISLAND', + 'CHEJU ISLAND', + 'GYEONGJU', + 'BUSAN', + 'DAEGU', + 'GYEONGIU', + 'PUSAN', + 'YONGPYONG', + 'PHOENIX PARK', + 'CHEJU ISLAND', +] def match_country_code(original_code): @@ -539,8 +567,9 @@ def parse_conference_address(address_string): } -def parse_institution_address(address, city, state_province, - country, postal_code, country_code): +def parse_institution_address( + address, city, state_province, country, postal_code, country_code +): """Parse an institution address.""" address_list = force_list(address) state_province = match_us_state(state_province) or state_province @@ -558,7 +587,11 @@ def parse_institution_address(address, city, state_province, if not country_code and country: country_code = match_country_name_to_its_code(country) - if not country_code and state_province and state_province in us_state_to_iso_code.values(): + if ( + not country_code + and state_province + and state_province in us_state_to_iso_code.values() + ): country_code = 'US' return { diff --git a/tests/test_api.py b/tests/test_api.py index 0a16b93..5286f4d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -202,9 +202,7 @@ def test_record2marcxml_generates_controlfields(): } expected = ( - b'\n' - b' 4328\n' - b'\n' + b'\n' b' 4328\n' b'\n' ) result = record2marcxml(record) @@ -275,9 +273,7 @@ def test_record2marcxml_supports_relative_urls(): } expected = ( - b'\n' - b' 4328\n' - b'\n' + b'\n' b' 4328\n' b'\n' ) result = record2marcxml(record) diff --git a/tests/test_cds.py b/tests/test_cds.py index eacd875..1a5573e 100644 --- a/tests/test_cds.py +++ b/tests/test_cds.py @@ -35,8 +35,8 @@ def test_external_system_identifiers_from_001(): subschema = schema['properties']['external_system_identifiers'] snippet = ( - '2270264' - ) # cds.cern.ch/record/2270264 + '2270264' # cds.cern.ch/record/2270264 + ) expected = [ { @@ -686,8 +686,8 @@ def test_authors_from_100__a_u_and_multiple_700__a_u_e(): 'a': 'Kersevan, Roberto', 'e': 'dir.', 'u': 'Linz U.', - } - ] + }, + ], } result = cds2hep_marc.do(create_record(snippet)) @@ -952,7 +952,7 @@ def test_thesis_info_from_502__a_b_c_and_500__a(): 'b': 'PhD', 'c': 'Linz U.', 'd': '2017', - } + }, } result = cds2hep_marc.do(create_record(snippet)) @@ -986,7 +986,7 @@ def test_abstracts_from_520__a(): expected = [ { '9': 'CDS', - 'a': u'The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.' + 'a': u'The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.', }, ] result = cds2hep_marc.do(create_record(snippet)) @@ -996,7 +996,7 @@ def test_abstracts_from_520__a(): expected = [ { 'source': 'CDS', - 'value': u'The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.' + 'value': u'The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.', }, ] result = hep.do(create_record_from_dict(result)) @@ -1248,16 +1248,12 @@ def test_collaboration_from_710__g(): '' ) # cds.cern.ch/2295739 - expected = [ - {'g': 'ATLAS Collaboration'} - ] + expected = [{'g': 'ATLAS Collaboration'}] result = cds2hep_marc.do(create_record(snippet)) assert expected == result['710__'] - expected = [ - {'value': 'ATLAS'} - ] + expected = [{'value': 'ATLAS'}] result = hep.do(create_record_from_dict(result)) assert validate(result['collaborations'], subschema) is None diff --git a/tests/test_common.py b/tests/test_common.py index fe31d82..584efaf 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -175,9 +175,7 @@ def test_self_from_001(): schema = load_schema('hep') subschema = schema['properties']['self'] - snippet = ( - '1508668' - ) # record/1508668 + snippet = '1508668' # record/1508668 expected = {'$ref': 'http://localhost:5000/api/literature/1508668'} result = hep.do(create_record(snippet)) @@ -190,9 +188,7 @@ def test_control_number_from_001(): schema = load_schema('hep') subschema = schema['properties']['control_number'] - snippet = ( - '1508668' - ) # record/1508668 + snippet = '1508668' # record/1508668 expected = 1508668 result = hep.do(create_record(snippet)) @@ -549,11 +545,7 @@ def test_inspire_categories_from_65017a_2_9_converts_submitter(): assert expected == result['inspire_categories'] expected = [ - { - '2': 'INSPIRE', - '9': 'user', - 'a': 'Math and Math Physics' - }, + {'2': 'INSPIRE', '9': 'user', 'a': 'Math and Math Physics'}, ] result = hep2marc.do(result) @@ -790,8 +782,8 @@ def test_legacy_version_from_005(): subschema = schema['properties']['legacy_version'] snippet = ( - '20180919130452.0' - ) # record/1694560 + '20180919130452.0' # record/1694560 + ) expected = '20180919130452.0' result = hep.do(create_record(snippet)) diff --git a/tests/test_conferences.py b/tests/test_conferences.py index 0c38247..7af14af 100644 --- a/tests/test_conferences.py +++ b/tests/test_conferences.py @@ -44,12 +44,14 @@ def test_addresses_from_034__d_f_and_111__c(): '' ) # record/1707423 - expected = [{ - 'cities': ['Bologna'], - 'country_code': 'IT', - 'latitude': 44.494887, - 'longitude': 11.3426162 - }] + expected = [ + { + 'cities': ['Bologna'], + 'country_code': 'IT', + 'latitude': 44.494887, + 'longitude': 11.3426162, + } + ] result = conferences.do(create_record(snippet)) assert validate(result['addresses'], subschema) is None @@ -791,7 +793,9 @@ def test_public_notes_from_double_500__a(): ) # record/1445071 expected = [ - {'value': 'Marion White, PhD (Argonne) Conference Chair Vladimir Shiltsev, PhD (FNAL) Scientific Program Chair Maria Power (Argonne) Conference Editor/Scientific Secretariat'}, + { + 'value': 'Marion White, PhD (Argonne) Conference Chair Vladimir Shiltsev, PhD (FNAL) Scientific Program Chair Maria Power (Argonne) Conference Editor/Scientific Secretariat' + }, {'value': 'Will be published in: JACoW'}, ] result = conferences.do(create_record(snippet)) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index f819f95..4260f0a 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -416,7 +416,9 @@ def test_long_name_and_name_variants_from_245__a_and_419__a(): '' ) # record/1275752 - expected_long_name = r'Proposal to measure the very rare kaon decay $K^+ \to \pi^+ \nu \bar{\nu}$' + expected_long_name = ( + r'Proposal to measure the very rare kaon decay $K^+ \to \pi^+ \nu \bar{\nu}$' + ) expected_name_variants = [ 'P-326', ] @@ -439,9 +441,7 @@ def test_description_from_520__a(): '' ) # record/1108188 - expected = ( - 'The Muon Accelerator Program (MAP) was created in 2010 to unify the DOE supported R&D in the U.S. aimed at developing the concepts and technologies required for Muon Colliders and Neutrino Factories. These muon based facilities have the potential to discover and explore new exciting fundamental physics, but will require the development of demanding technologies and innovative concepts. The MAP aspires to prove the feasibility of a Muon Collider within a few years, and to make significant contributions to the international effort devoted to developing Neutrino Factories. MAP was formally approved on March 18, 2011.' - ) + expected = 'The Muon Accelerator Program (MAP) was created in 2010 to unify the DOE supported R&D in the U.S. aimed at developing the concepts and technologies required for Muon Colliders and Neutrino Factories. These muon based facilities have the potential to discover and explore new exciting fundamental physics, but will require the development of demanding technologies and innovative concepts. The MAP aspires to prove the feasibility of a Muon Collider within a few years, and to make significant contributions to the international effort devoted to developing Neutrino Factories. MAP was formally approved on March 18, 2011.' result = experiments.do(create_record(snippet)) diff --git a/tests/test_hep_bd0xx.py b/tests/test_hep_bd0xx.py index d639c14..fbe2540 100644 --- a/tests/test_hep_bd0xx.py +++ b/tests/test_hep_bd0xx.py @@ -575,7 +575,7 @@ def test_texkeys_from_035__z_9_and_035__a_9(): ) # record/1498308 expected = [ - 'Akiba:2016ofq', # XXX: the first one is the one coming + 'Akiba:2016ofq', # XXX: the first one is the one coming 'N.Cartiglia:2015cn', # from the "a" field. ] result = hep.do(create_record(snippet)) @@ -591,7 +591,7 @@ def test_texkeys_from_035__z_9_and_035__a_9(): { '9': 'INSPIRETeX', 'z': 'N.Cartiglia:2015cn', - } + }, ] result = hep2marc.do(result) @@ -842,9 +842,7 @@ def test_035_from_arxiv_eprints_and_texkeys(): subschema_arxiv_eprints = schema['properties']['arxiv_eprints'] subschema_texkeys = schema['properties']['texkeys'] snippet = { - 'arxiv_eprints': [ - {'value': '2212.04977', 'categories': ['hep-ex']} - ], + 'arxiv_eprints': [{'value': '2212.04977', 'categories': ['hep-ex']}], 'texkeys': ['LHCb:2022diq'], } # literature/2612668 @@ -957,7 +955,7 @@ def test_arxiv_eprints_from_037__a_c_9_obsolete_category(): 'categories': [ 'math.FA', ], - 'value': 'funct-an/9710003' + 'value': 'funct-an/9710003', }, ] result = hep.do(create_record(snippet)) @@ -1061,9 +1059,7 @@ def test_report_numbers_hidden_from_037__z(): assert validate(result['report_numbers'], subschema) is None assert expected == result['report_numbers'] - expected = [ - {'z': 'FERMILAB-PUB-17-011-CMS'} - ] + expected = [{'z': 'FERMILAB-PUB-17-011-CMS'}] result = hep2marc.do(result) assert expected == result['037'] @@ -1164,7 +1160,7 @@ def test_arxiv_eprints_from_037__a_c_9_and_multiple_65017_a_2(): 'math-ph', 'gr-qc', ], - 'value': '1702.00702' + 'value': '1702.00702', } ] result = hep.do(create_record(snippet)) @@ -1213,11 +1209,7 @@ def test_arxiv_eprints_037__a_9_lowercase_arxiv(): "" ) - expected = [ - { - 'value': '1703.09086' - } - ] + expected = [{'value': '1703.09086'}] result = hep.do(create_record(snippet)) assert validate(result['arxiv_eprints'], subschema) is None diff --git a/tests/test_hep_bd1xx.py b/tests/test_hep_bd1xx.py index 59908ff..1704f82 100644 --- a/tests/test_hep_bd1xx.py +++ b/tests/test_hep_bd1xx.py @@ -243,7 +243,7 @@ def test_authors_from_100__a_e_w_y_and_700_a_e_w_y(): 'inspire_roles': [ 'editor', ], - } + }, ] result = hep.do(create_record(snippet)) @@ -375,9 +375,7 @@ def test_authors_from_100__a_i_u_x_y_z_and_double_700__a_u_w_x_y_z(): expected_100 = { 'a': 'Sjostrand, Torbjorn', - 'i': [ - 'INSPIRE-00126851' - ], + 'i': ['INSPIRE-00126851'], 'u': [ 'Lund U., Dept. Theor. Phys.', ], @@ -431,9 +429,9 @@ def test_authors_from_100__a_v_m_w_y(): 'raw_affiliations': [ { 'value': 'Chern Institute of Mathematics and LPMC, Nankai University,' - ' Tianjin, 300071, China', + ' Tianjin, 300071, China', } - ] + ], }, ] result = hep.do(create_record(snippet)) @@ -566,9 +564,9 @@ def test_authors_from_100__a_m_u_v_w_y_z_and_700__a_j_v_m_w_y(): 'raw_affiliations': [ { 'value': 'Chern Institute of Mathematics and LPMC, Nankai University,' - ' Tianjin, 300071, China', + ' Tianjin, 300071, China', } - ] + ], }, { 'emails': [ @@ -588,9 +586,9 @@ def test_authors_from_100__a_m_u_v_w_y_z_and_700__a_j_v_m_w_y(): 'raw_affiliations': [ { 'value': 'School of Mathematics, South China University of Technology,' - ' Guangdong, Guangzhou, 510640, China', + ' Guangdong, Guangzhou, 510640, China', } - ] + ], }, ] result = hep.do(create_record(snippet)) @@ -609,7 +607,7 @@ def test_authors_from_100__a_m_u_v_w_y_z_and_700__a_j_v_m_w_y(): 'v': [ 'Chern Institute of Mathematics and LPMC, Nankai University,' ' Tianjin, 300071, China', - ] + ], } expected_700 = [ { @@ -725,11 +723,7 @@ def test_authors_from_100__a_triple_u_w_x_y_triple_z_and_700__double_a_u_w_x_y_z expected_100 = { 'a': 'Abe, K.', - 'u': [ - 'Tokyo U., ICRR', - 'Tokyo U.', - 'Tokyo U., IPMU' - ], + 'u': ['Tokyo U., ICRR', 'Tokyo U.', 'Tokyo U., IPMU'], } expected_700 = [ { @@ -923,9 +917,7 @@ def test_authors_from_100__a_j_m_u_v_w_y(): 'value': 'D.Macnair.2', }, ], - 'raw_affiliations': [ - {'value': 'SLAC, Menlo Park, California, USA'} - ], + 'raw_affiliations': [{'value': 'SLAC, Menlo Park, California, USA'}], }, ] result = hep.do(create_record(snippet)) @@ -1267,9 +1259,7 @@ def test_authors_supervisors_from_100_a_u_w_y_z_and_701__double_a_u_z(): 'affiliations': [ { 'value': 'Minnesota U.', - 'record': { - '$ref': 'http://localhost:5000/api/institutions/903010' - } + 'record': {'$ref': 'http://localhost:5000/api/institutions/903010'}, } ], 'full_name': 'Poling, Ron', @@ -1290,7 +1280,7 @@ def test_authors_supervisors_from_100_a_u_w_y_z_and_701__double_a_u_z(): 'inspire_roles': [ 'supervisor', ], - } + }, ] result = hep.do(create_record(snippet)) @@ -1392,9 +1382,7 @@ def test_authors_supervisors_from_100_a_j_u_w_y_z_and_701__a_i_j_u_x_y_z(): 'value': 'CERN-456299', }, ], - 'inspire_roles': [ - 'supervisor' - ], + 'inspire_roles': ['supervisor'], 'record': { '$ref': 'http://localhost:5000/api/authors/1060887', }, @@ -1528,7 +1516,7 @@ def test_authors_from_100_a_double_u_w_z_y_double_z_and_700__a_double_u_w_y_doub 'u': [ 'INFN, Turin', 'Turin U.', - ] + ], } expected_700 = [ { @@ -1584,7 +1572,9 @@ def test_author_from_100__a_i_m_u_v_x_y_z_strips_email_prefix(): }, ], 'raw_affiliations': [ - {'value': 'CERN, European Organization for Nuclear Research, Geneve, Switzerland'}, + { + 'value': 'CERN, European Organization for Nuclear Research, Geneve, Switzerland' + }, ], 'record': { '$ref': 'http://localhost:5000/api/authors/1066844', @@ -1652,7 +1642,9 @@ def test_author_from_700__strips_dot_from_orcid(): }, ], 'raw_affiliations': [ - {'value': u'Laboratoire de Mathématiques et Physique Théorique CNRS - Université de Tours - Parc de Grammont - 37200 Tours - France'}, + { + 'value': u'Laboratoire de Mathématiques et Physique Théorique CNRS - Université de Tours - Parc de Grammont - 37200 Tours - France' + }, ], }, ] @@ -1949,7 +1941,7 @@ def test_authors_from_100__a_with_q_v_w_y_z_duplicated_v(): { 'value': 'Joint Institute for Nuclear Research', } - ] + ], }, ] result = hep.do(create_record(snippet)) @@ -2199,10 +2191,7 @@ def test_authors_from_100__a_v_w_y_repeated_t(): expected = [ { 'affiliations_identifiers': [ - { - 'schema': 'GRID', - 'value': 'grid.4489.1' - }, + {'schema': 'GRID', 'value': 'grid.4489.1'}, ], 'full_name': 'Puertas-Centeno, David', 'raw_affiliations': [ @@ -2211,7 +2200,7 @@ def test_authors_from_100__a_v_w_y_repeated_t(): }, { 'value': u'Instituto Carlos I de Física Teórica y Computacional - Universidad de Granada - Granada - 18071 - Spain', - } + }, ], 'ids': [ { @@ -2228,9 +2217,7 @@ def test_authors_from_100__a_v_w_y_repeated_t(): expected = { 'a': 'Puertas-Centeno, David', - 't': [ - 'GRID:grid.4489.1' - ], + 't': ['GRID:grid.4489.1'], 'v': [ u'Departamento de Física Atómica - Molecular y Nuclear - Universidad de Granada - Granada - 18071 - Spain', u'Instituto Carlos I de Física Teórica y Computacional - Universidad de Granada - Granada - 18071 - Spain', @@ -2263,19 +2250,11 @@ def test_authors_from_100__a_t_u_v(): { 'value': 'Catania U.', }, - { - 'value': 'INFN, LNS' - } + {'value': 'INFN, LNS'}, ], 'affiliations_identifiers': [ - { - 'schema': 'GRID', - 'value': 'grid.8158.4' - }, - { - 'schema': 'GRID', - 'value': 'grid.466880.4' - }, + {'schema': 'GRID', 'value': 'grid.8158.4'}, + {'schema': 'GRID', 'value': 'grid.466880.4'}, ], 'full_name': 'Plumari, S.', 'raw_affiliations': [ @@ -2284,8 +2263,8 @@ def test_authors_from_100__a_t_u_v(): }, { 'value': u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy', - } - ] + }, + ], }, ] result = hep.do(create_record(snippet)) @@ -2295,18 +2274,15 @@ def test_authors_from_100__a_t_u_v(): expected = { 'a': 'Plumari, S.', - 't': [ - 'GRID:grid.8158.4', - 'GRID:grid.466880.4' - ], + 't': ['GRID:grid.8158.4', 'GRID:grid.466880.4'], 'u': [ 'Catania U.', 'INFN, LNS', ], 'v': [ u'Department of Physics U. and Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 - Catania - Italy', - u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy' - ] + u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy', + ], } result = hep2marc.do(result) @@ -2335,19 +2311,11 @@ def test_authors_from_100__a_t_u_v_ROR(): { 'value': 'Catania U.', }, - { - 'value': 'INFN, LNS' - } + {'value': 'INFN, LNS'}, ], 'affiliations_identifiers': [ - { - 'schema': 'ROR', - 'value': 'https://ror.org/03a64bh57' - }, - { - 'schema': 'ROR', - 'value': 'https://ror.org/02k1zhm92' - }, + {'schema': 'ROR', 'value': 'https://ror.org/03a64bh57'}, + {'schema': 'ROR', 'value': 'https://ror.org/02k1zhm92'}, ], 'full_name': 'Plumari, S.', 'raw_affiliations': [ @@ -2356,8 +2324,8 @@ def test_authors_from_100__a_t_u_v_ROR(): }, { 'value': u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy', - } - ] + }, + ], }, ] result = hep.do(create_record(snippet)) @@ -2367,18 +2335,15 @@ def test_authors_from_100__a_t_u_v_ROR(): expected = { 'a': 'Plumari, S.', - 't': [ - 'ROR:https://ror.org/03a64bh57', - 'ROR:https://ror.org/02k1zhm92' - ], + 't': ['ROR:https://ror.org/03a64bh57', 'ROR:https://ror.org/02k1zhm92'], 'u': [ 'Catania U.', 'INFN, LNS', ], 'v': [ u'Department of Physics U. and Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 - Catania - Italy', - u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy' - ] + u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy', + ], } result = hep2marc.do(result) @@ -2406,12 +2371,7 @@ def test_authors_from_100__a_t_v_and_700_a_t_v(): expected = [ { - 'affiliations_identifiers': [ - { - 'schema': 'GRID', - 'value': 'grid.440804.c' - } - ], + 'affiliations_identifiers': [{'schema': 'GRID', 'value': 'grid.440804.c'}], 'full_name': 'Hosseini, M.', 'raw_affiliations': [ { @@ -2420,12 +2380,7 @@ def test_authors_from_100__a_t_v_and_700_a_t_v(): ], }, { - 'affiliations_identifiers': [ - { - 'schema': 'GRID', - 'value': 'grid.440804.c' - } - ], + 'affiliations_identifiers': [{'schema': 'GRID', 'value': 'grid.440804.c'}], 'full_name': 'Hassanabadi, H.', 'raw_affiliations': [ { diff --git a/tests/test_hep_bd2xx.py b/tests/test_hep_bd2xx.py index 63c26a6..ae807d6 100644 --- a/tests/test_hep_bd2xx.py +++ b/tests/test_hep_bd2xx.py @@ -88,7 +88,7 @@ def test_titles_from_245__a_9(): expected = [ { 'title': 'Exact Form of Boundary Operators Dual to Interacting ' - 'Bulk Scalar Fields in the AdS/CFT Correspondence', + 'Bulk Scalar Fields in the AdS/CFT Correspondence', 'source': 'arXiv', }, ] @@ -100,7 +100,7 @@ def test_titles_from_245__a_9(): expected = [ { 'a': 'Exact Form of Boundary Operators Dual to Interacting ' - 'Bulk Scalar Fields in the AdS/CFT Correspondence', + 'Bulk Scalar Fields in the AdS/CFT Correspondence', '9': 'arXiv', }, ] @@ -125,7 +125,7 @@ def test_titles_from_246__a_9(): { 'source': 'arXiv', 'title': 'Superintegrable relativistic systems in ' - 'spacetime-dependent background fields', + 'spacetime-dependent background fields', }, ] result = hep.do(create_record(snippet)) @@ -176,6 +176,7 @@ def test_titles_from_245__a_b(): assert expected == result['245'] + @pytest.mark.usefixtures(name='_stable_langdetect') def test_title_translations_from_242__a(): schema = load_schema('hep') @@ -207,6 +208,7 @@ def test_title_translations_from_242__a(): assert expected == result['242'] + @pytest.mark.usefixtures(name='_stable_langdetect') def test_title_translations_from_242__a_b(): schema = load_schema('hep') @@ -314,9 +316,7 @@ def test_imprints_from_260__c_wrong_date(): '' ) # record/1314991 - expected = [ - {'date': '2014'} - ] + expected = [{'date': '2014'}] result = hep.do(create_record(snippet)) assert validate(result['imprints'], subschema) is None diff --git a/tests/test_hep_bd5xx.py b/tests/test_hep_bd5xx.py index 39701ef..1b8e3a5 100644 --- a/tests/test_hep_bd5xx.py +++ b/tests/test_hep_bd5xx.py @@ -523,7 +523,9 @@ def test_abstracts_from_520__double_a(): expected = [ {'value': '$D$ $K$ scattering and the $D_s$ spectrum from lattice QCD 520__'}, - {'value': 'We present results from Lattice QCD calculations of the low-lying charmed-strange meson spectrum using two types of Clover-Wilson lattices. In addition to quark-antiquark interpolating fields we also consider meson-meson interpolators corresponding to D-meson kaon scattering states. To calculate the all-to-all propagation necessary for the backtracking loops we use the (stochastic) distillation technique. For the charm quark we use the Fermilab method. Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange meson are presented.'}, + { + 'value': 'We present results from Lattice QCD calculations of the low-lying charmed-strange meson spectrum using two types of Clover-Wilson lattices. In addition to quark-antiquark interpolating fields we also consider meson-meson interpolators corresponding to D-meson kaon scattering states. To calculate the all-to-all propagation necessary for the backtracking loops we use the (stochastic) distillation technique. For the charm quark we use the Fermilab method. Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange meson are presented.' + }, ] result = hep.do(create_record(snippet)) @@ -532,7 +534,9 @@ def test_abstracts_from_520__double_a(): expected = [ {'a': '$D$ $K$ scattering and the $D_s$ spectrum from lattice QCD 520__'}, - {'a': 'We present results from Lattice QCD calculations of the low-lying charmed-strange meson spectrum using two types of Clover-Wilson lattices. In addition to quark-antiquark interpolating fields we also consider meson-meson interpolators corresponding to D-meson kaon scattering states. To calculate the all-to-all propagation necessary for the backtracking loops we use the (stochastic) distillation technique. For the charm quark we use the Fermilab method. Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange meson are presented.'}, + { + 'a': 'We present results from Lattice QCD calculations of the low-lying charmed-strange meson spectrum using two types of Clover-Wilson lattices. In addition to quark-antiquark interpolating fields we also consider meson-meson interpolators corresponding to D-meson kaon scattering states. To calculate the all-to-all propagation necessary for the backtracking loops we use the (stochastic) distillation technique. For the charm quark we use the Fermilab method. Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange meson are presented.' + }, ] result = hep2marc.do(result) @@ -1063,16 +1067,8 @@ def test_desy_bookkeeping_from_multiple_595_Da_d_s(): ) # record/1513161 expected = [ - { - 'expert': '8', - 'date': '2017-02-17', - 'status': 'abs' - }, - { - 'expert': '8', - 'date': '2017-02-19', - 'status': 'printed' - } + {'expert': '8', 'date': '2017-02-17', 'status': 'abs'}, + {'expert': '8', 'date': '2017-02-19', 'status': 'printed'}, ] result = hep.do(create_record(snippet)) @@ -1080,16 +1076,8 @@ def test_desy_bookkeeping_from_multiple_595_Da_d_s(): assert expected == result['_desy_bookkeeping'] expected = [ - { - 'a': '8', - 'd': '2017-02-17', - 's': 'abs' - }, - { - 'a': '8', - 'd': '2017-02-19', - 's': 'printed' - } + {'a': '8', 'd': '2017-02-17', 's': 'abs'}, + {'a': '8', 'd': '2017-02-19', 's': 'printed'}, ] result = hep2marc.do(result) @@ -1149,9 +1137,7 @@ def test_export_to_from_595__c_cds(): assert validate(result['_export_to'], subschema) is None assert expected == result['_export_to'] - expected = [ - {'c': 'CDS'} - ] + expected = [{'c': 'CDS'}] result = hep2marc.do(result) assert expected == result['595'] @@ -1197,9 +1183,7 @@ def test_export_to_from_595__c_not_hal(): assert validate(result['_export_to'], subschema) is None assert expected == result['_export_to'] - expected = [ - {'c': 'not HAL'} - ] + expected = [{'c': 'not HAL'}] result = hep2marc.do(result) assert expected == result['595'] diff --git a/tests/test_hep_bd7xx.py b/tests/test_hep_bd7xx.py index ad86169..45694fe 100644 --- a/tests/test_hep_bd7xx.py +++ b/tests/test_hep_bd7xx.py @@ -619,9 +619,7 @@ def test_publication_info_from_773__p_and_773__c_p_v_y_1_also_populates_public_n '' ) # record/769448 - expected_public_notes = [ - {'value': 'Submitted to Eur.Phys.J.A'} - ] + expected_public_notes = [{'value': 'Submitted to Eur.Phys.J.A'}] expected_publication_info = [ { 'artid': '615', diff --git a/tests/test_hep_bd9xx.py b/tests/test_hep_bd9xx.py index 746d539..f47e110 100644 --- a/tests/test_hep_bd9xx.py +++ b/tests/test_hep_bd9xx.py @@ -412,9 +412,7 @@ def test_document_type_defaults_to_article(): schema = load_schema('hep') subschema = schema['properties']['document_type'] - snippet = ( - '' - ) # synthetic data + snippet = '' # synthetic data expected = [ 'article', @@ -1070,7 +1068,9 @@ def test_references_from_999C50_9_r_u_h_m_o(): 'urn:nbn:de:hebis:77-diss-1000009520', ], 'urls': [ - {'value': 'http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316'}, + { + 'value': 'http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316' + }, ], }, }, @@ -1129,7 +1129,9 @@ def test_reference_from_999C5t_p_y_e_o(): 'imprint': {'publisher': 'New York: McGraw-Hill Book Company, Inc.'}, 'label': '16', 'publication_info': {'year': 1953}, - 'title': {'title': 'Higher Transcendetal Functions Vol. I, Bateman Manuscript Project'}, + 'title': { + 'title': 'Higher Transcendetal Functions Vol. I, Bateman Manuscript Project' + }, }, }, ] @@ -1175,17 +1177,11 @@ def test_reference_from_999C5o_h_c_t_s_r_y_0(): expected = [ { 'curated_relation': False, - 'record': { - '$ref': 'http://localhost:5000/api/literature/1247976' - }, + 'record': {'$ref': 'http://localhost:5000/api/literature/1247976'}, 'reference': { 'arxiv_eprint': '1308.3409', - 'authors': [ - {'full_name': u'Chatrchyan, S.'} - ], - 'collaborations': [ - 'CMS Collaboration' - ], + 'authors': [{'full_name': u'Chatrchyan, S.'}], + 'collaborations': ['CMS Collaboration'], 'label': '36', 'publication_info': { 'artid': '77', @@ -1194,8 +1190,10 @@ def test_reference_from_999C5o_h_c_t_s_r_y_0(): 'page_start': '77', 'year': 2013, }, - 'title': {'title': u'Angular analysis and branching fraction measurement of the decay B0 → K∗0 µ+ µ-'}, - } + 'title': { + 'title': u'Angular analysis and branching fraction measurement of the decay B0 → K∗0 µ+ µ-' + }, + }, } ] result = hep.do(create_record(snippet)) @@ -1260,7 +1258,9 @@ def test_references_from_999C5b_h_m_o_p_t_y_9(): 'cnum': 'C93-06-08', 'year': 1993, }, - 'title': {'title': 'DIM - A Distributed Information Management System for the Delphi experiment at CERN'}, + 'title': { + 'title': 'DIM - A Distributed Information Management System for the Delphi experiment at CERN' + }, }, }, ] @@ -1601,7 +1601,9 @@ def test_references_from_999C5_0_h_m_o_r_t_y(): '66', ], 'publication_info': {'year': 2005}, - 'title': {'title': 'CUORE: A Cryogenic underground Observatory for Rare Events'}, + 'title': { + 'title': 'CUORE: A Cryogenic underground Observatory for Rare Events' + }, }, }, ] @@ -1722,7 +1724,7 @@ def test_references_from_999C5u_as_cds_system_identifiers(): }, ], }, - 'curated_relation': False + 'curated_relation': False, } ] result = hep.do(create_record(snippet)) @@ -1800,7 +1802,7 @@ def test_references_from_999C5u_as_ads_system_identifiers(): ], 'dois': [ '10.1007/s00016-015-0157-9', - ] + ], }, } ] @@ -1872,7 +1874,7 @@ def test_references_from_999C5u_duplicated_u(): ], 'dois': [ '10.1007/s00016-015-0157-9', - ] + ], }, } ] diff --git a/tests/test_hep_bdFFT.py b/tests/test_hep_bdFFT.py index a11239a..d0089a1 100644 --- a/tests/test_hep_bdFFT.py +++ b/tests/test_hep_bdFFT.py @@ -33,12 +33,11 @@ @pytest.fixture() def _legacy_afs_service_config(): - config = { - 'LABS_AFS_HTTP_SERVICE': 'http://legacy-afs-web' - } + config = {'LABS_AFS_HTTP_SERVICE': 'http://legacy-afs-web'} with patch.dict(current_app.config, config): yield + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_documents_from_FFT(): schema = load_schema('hep') @@ -70,6 +69,7 @@ def test_documents_from_FFT(): assert expected == result['documents'] assert 'figures' not in result + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_documents_from_FFT_special_cases_arxiv_properly(): schema = load_schema('hep') @@ -103,6 +103,7 @@ def test_documents_from_FFT_special_cases_arxiv_properly(): assert expected == result['documents'] assert 'figures' not in result + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_documents_are_unique_from_FFT(): schema = load_schema('hep') @@ -151,6 +152,7 @@ def test_documents_are_unique_from_FFT(): assert expected == result['documents'] assert 'figures' not in result + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_figures_from_FFT(): schema = load_schema('hep') @@ -184,6 +186,7 @@ def test_figures_from_FFT(): assert expected == result['figures'] assert 'documents' not in result + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_figures_order_from_FFT(): schema = load_schema('hep') @@ -245,7 +248,7 @@ def test_figures_order_from_FFT(): 'caption': 'Co-simulation results, at $50~\\mathrm{ms}$...', 'url': 'http://legacy-afs-web/var/data/files/g151/3037401/content.png%3B1', 'source': 'arxiv', - } + }, ] result = hep.do(create_record(snippet)) assert validate(result['figures'], subschema) is None @@ -290,7 +293,7 @@ def test_documents_from_FFT_does_not_require_s(): { 'key': 'document', 'fulltext': True, - 'url': 'http://www.mdpi.com/2218-1997/3/1/24/pdf' + 'url': 'http://www.mdpi.com/2218-1997/3/1/24/pdf', } ] result = hep.do(create_record(snippet)) @@ -362,6 +365,7 @@ def test_documents_to_FFT(): assert expected == result['FFT'] + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_documents_to_FFT_converts_afs_urls_to_path(): schema = load_schema('hep') @@ -410,7 +414,7 @@ def test_documents_to_FFT_special_cases_arxiv_properly(): 'material': 'preprint', 'original_url': 'http://export.arxiv.org/pdf/1712.04934', 'source': 'arxiv', - 'url': '/api/files/d82dc015-83ea-4d83-820b-adb7ce1e42d0/1712.04934.pdf' + 'url': '/api/files/d82dc015-83ea-4d83-820b-adb7ce1e42d0/1712.04934.pdf', } ], } # holdingpen/820589 @@ -444,7 +448,7 @@ def test_documents_to_FFT_uses_filename(): "key": "136472d8763496230daa8b6b72fb219a", "original_url": "http://legacy-afs-web/var/data/files/g206/4135590/content.pdf%3B1", "source": "SCOAP3", - "url": "https://s3.cern.ch/inspire-prod-files-1/136472d8763496230daa8b6b72fb219a" + "url": "https://s3.cern.ch/inspire-prod-files-1/136472d8763496230daa8b6b72fb219a", } ] } # literature/1789709 @@ -455,7 +459,7 @@ def test_documents_to_FFT_uses_filename(): 'd': 'Article from SCOAP3', 't': 'SCOAP3', 'n': 'scoap3-fulltext', - 'f': '.pdf' + 'f': '.pdf', } ] @@ -496,7 +500,7 @@ def test_documents_to_FFT_uses_material_as_filename_fallback(): "filename": "document", "fulltext": True, "material": "publication", - } + }, ], } # literature/1852846 @@ -519,7 +523,7 @@ def test_documents_to_FFT_uses_material_as_filename_fallback(): "d": "Fulltext", "n": "document", "t": "INSPIRE-PUBLIC", - } + }, ] assert validate(snippet['documents'], subschema) is None @@ -561,6 +565,7 @@ def test_figures_to_FFT(): assert expected == result['FFT'] + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_figures_to_FFT_converts_afs_urls_to_paths(): schema = load_schema('hep') @@ -583,7 +588,7 @@ def test_figures_to_FFT_converts_afs_urls_to_paths(): 'd': '00000 This figure illustrates something', 't': 'Plot', 'n': 'some_figure', - 'f': '.png' + 'f': '.png', } ] @@ -625,6 +630,7 @@ def test_figures_to_FFT_uses_filename(): assert expected == result['FFT'] + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_figures_from_FFT_generates_valid_uri(): schema = load_schema('hep') @@ -670,6 +676,7 @@ def test_figures_from_FFT_generates_valid_uri(): assert expected == result['FFT'] + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_figures_and_documents_from_FFT_without_d_subfield(): schema = load_schema('hep') @@ -726,6 +733,7 @@ def test_figures_and_documents_from_FFT_without_d_subfield(): assert expected_figures == result['figures'] assert expected_documents == result['documents'] + @pytest.mark.usefixtures("_legacy_afs_service_config") def test_figures_from_FFT_with_composite_file_extension(): schema = load_schema('hep') @@ -807,9 +815,8 @@ def test_documents_from_FFT_without_t_subfield(): expected = [ { 'url': 'http://scoap3.iop.org/article/doi/10.1088/1674-1137/43/1/013104?format=pdf', - 'key': 'fulltext.pdf' + 'key': 'fulltext.pdf', } - ] result = hep.do(create_record(snippet)) assert validate(result['documents'], subschema) is None diff --git a/tests/test_hep_model.py b/tests/test_hep_model.py index df0e81c..e93515d 100644 --- a/tests/test_hep_model.py +++ b/tests/test_hep_model.py @@ -32,9 +32,7 @@ def test_ensure_curated(): schema = load_schema('hep') subschema = schema['properties']['curated'] - snippet = ( - '' - ) # synthetic data + snippet = '' # synthetic data expected = True result = hep.do(create_record(snippet)) diff --git a/tests/test_hepnames.py b/tests/test_hepnames.py index 7aa076d..64e878d 100644 --- a/tests/test_hepnames.py +++ b/tests/test_hepnames.py @@ -40,23 +40,27 @@ current ''', - [{ - 'curated_relation': True, - 'current': True, - 'end_date': '2020', - 'name': 'CERN-ALPHA', - 'record': { - '$ref': 'http://localhost:5000/api/experiments/1', - }, - 'start_date': '2014', - }], - [{ - '0': 1, - 'd': '2020', - 'e': 'CERN-ALPHA', - 's': '2014', - 'z': 'current', - }], + [ + { + 'curated_relation': True, + 'current': True, + 'end_date': '2020', + 'name': 'CERN-ALPHA', + 'record': { + '$ref': 'http://localhost:5000/api/experiments/1', + }, + 'start_date': '2014', + } + ], + [ + { + '0': 1, + 'd': '2020', + 'e': 'CERN-ALPHA', + 's': '2014', + 'z': 'current', + } + ], ], [ 'current_curated_hidden', @@ -70,25 +74,29 @@ HIDDEN ''', - [{ - 'curated_relation': True, - 'current': True, - 'end_date': '2020', - 'name': 'CERN-ALPHA', - 'record': { - '$ref': 'http://localhost:5000/api/experiments/1', - }, - 'start_date': '2014', - 'hidden': True - }], - [{ - '0': 1, - 'd': '2020', - 'e': 'CERN-ALPHA', - 's': '2014', - 'z': 'current', - 'h': 'HIDDEN', - }], + [ + { + 'curated_relation': True, + 'current': True, + 'end_date': '2020', + 'name': 'CERN-ALPHA', + 'record': { + '$ref': 'http://localhost:5000/api/experiments/1', + }, + 'start_date': '2014', + 'hidden': True, + } + ], + [ + { + '0': 1, + 'd': '2020', + 'e': 'CERN-ALPHA', + 's': '2014', + 'z': 'current', + 'h': 'HIDDEN', + } + ], ], [ 'notcurrent_curated', @@ -98,18 +106,22 @@ 3 ''', - [{ - 'curated_relation': True, - 'current': False, - 'name': 'SDSS', - 'record': { - '$ref': 'http://localhost:5000/api/experiments/3', - }, - }], - [{ - '0': 3, - 'e': 'SDSS', - }], + [ + { + 'curated_relation': True, + 'current': False, + 'name': 'SDSS', + 'record': { + '$ref': 'http://localhost:5000/api/experiments/3', + }, + } + ], + [ + { + '0': 3, + 'e': 'SDSS', + } + ], ], [ 'notcurrent_notcurated', @@ -118,14 +130,18 @@ NOTCURATED ''', - [{ - 'name': 'NOTCURATED', - 'curated_relation': False, - 'current': False, - }], - [{ - 'e': 'NOTCURATED', - }], + [ + { + 'name': 'NOTCURATED', + 'curated_relation': False, + 'current': False, + } + ], + [ + { + 'e': 'NOTCURATED', + } + ], ], [ 'repeated_experiment', @@ -213,7 +229,7 @@ 'record': { '$ref': 'http://localhost:5000/api/experiments/2', }, - 'start_date': '2015' + 'start_date': '2015', }, ], [ @@ -288,14 +304,8 @@ def test_ids_from_double_035__a_9(): assert expected == result['ids'] expected = [ - { - 'a': 'H.Vogel.1', - '9': 'BAI' - }, - { - 'a': 'INSPIRE-00134135', - '9': 'INSPIRE' - }, + {'a': 'H.Vogel.1', '9': 'BAI'}, + {'a': 'INSPIRE-00134135', '9': 'INSPIRE'}, ] result = hepnames2marc.do(result) @@ -360,12 +370,7 @@ def test_ids_from_035__a_9_with_cern(): assert validate(result['ids'], subschema) is None assert expected == result['ids'] - expected = [ - { - '9': 'CERN', - 'a': 'CERN-622961' - } - ] + expected = [{'9': 'CERN', 'a': 'CERN-622961'}] result = hepnames2marc.do(result) assert expected == result['035'] @@ -1193,16 +1198,11 @@ def test_advisors_from_701__a_g_i(): { 'name': 'Rivelles, Victor O.', 'degree_type': 'phd', - 'ids': [ - { - 'schema': 'INSPIRE ID', - 'value': 'INSPIRE-00120420' - } - ], + 'ids': [{'schema': 'INSPIRE ID', 'value': 'INSPIRE-00120420'}], 'record': { '$ref': 'http://localhost:5000/api/authors/991627', }, - 'curated_relation': True + 'curated_relation': True, }, ] result = hepnames.do(create_record(snippet)) @@ -1241,17 +1241,12 @@ def test_advisors_from_701__a_g_i_h(): { 'name': 'Rivelles, Victor O.', 'degree_type': 'phd', - 'ids': [ - { - 'schema': 'INSPIRE ID', - 'value': 'INSPIRE-00120420' - } - ], + 'ids': [{'schema': 'INSPIRE ID', 'value': 'INSPIRE-00120420'}], 'record': { '$ref': 'http://localhost:5000/api/authors/991627', }, 'curated_relation': True, - 'hidden': True + 'hidden': True, }, ] result = hepnames.do(create_record(snippet)) @@ -1288,12 +1283,7 @@ def test_advisors_from_701__a_g_i_orcid(): { 'name': 'Riccioni, Fabio', 'degree_type': 'phd', - 'ids': [ - { - 'schema': 'ORCID', - 'value': '0000-0003-4702-3632' - } - ], + 'ids': [{'schema': 'ORCID', 'value': '0000-0003-4702-3632'}], }, ] result = hepnames.do(create_record(snippet)) @@ -1325,25 +1315,17 @@ def test_email_addresses_from_371__a_m_z(): '' ) # record/1222902 - expected = [ - { - 'current': True, - 'value': 'test@hep.physik.uni-siegen.de' - } - ] + expected = [{'current': True, 'value': 'test@hep.physik.uni-siegen.de'}] result = hepnames.do(create_record(snippet)) assert validate(result['email_addresses'], subschema) is None assert expected == result['email_addresses'] expected = [ - { - "a": "Siegen U.", - "z": "Current" - }, + {"a": "Siegen U.", "z": "Current"}, { "m": "test@hep.physik.uni-siegen.de", - } + }, ] result = hepnames2marc.do(result) @@ -1365,14 +1347,8 @@ def test_email_addresses_from_371__a_repeated_m_z(): ) # record/1019084 expected = [ - { - 'current': True, - 'value': 'test@usp.br' - }, - { - 'current': True, - 'value': 'test@fma.if.usp.br' - }, + {'current': True, 'value': 'test@usp.br'}, + {'current': True, 'value': 'test@fma.if.usp.br'}, ] result = hepnames.do(create_record(snippet)) @@ -1380,16 +1356,13 @@ def test_email_addresses_from_371__a_repeated_m_z(): assert expected == result['email_addresses'] expected = [ - { - "a": "Sao Paulo U.", - "z": "Current" - }, + {"a": "Sao Paulo U.", "z": "Current"}, { "m": "test@usp.br", }, { "m": "test@fma.if.usp.br", - } + }, ] result = hepnames2marc.do(result) @@ -1411,27 +1384,17 @@ def test_email_addresses_from_371__a_o_r_s_t(): '' ) # record/1060782 - expected = [ - { - 'current': False, - 'value': 'test@imsc.res.in' - } - ] + expected = [{'current': False, 'value': 'test@imsc.res.in'}] result = hepnames.do(create_record(snippet)) assert validate(result['email_addresses'], subschema) is None assert expected == result['email_addresses'] expected = [ - { - "a": "IMSc, Chennai", - "s": "2012", - "r": "PD", - "t": "2013" - }, + {"a": "IMSc, Chennai", "s": "2012", "r": "PD", "t": "2013"}, { "o": "test@imsc.res.in", - } + }, ] result = hepnames2marc.do(result) @@ -1449,13 +1412,7 @@ def test_email_addresses_from_595__m(): '' ) # record/1021896 - expected = [ - { - 'current': True, - 'hidden': True, - 'value': 'test@pnnl.gov' - } - ] + expected = [{'current': True, 'hidden': True, 'value': 'test@pnnl.gov'}] result = hepnames.do(create_record(snippet)) assert validate(result['email_addresses'], subschema) is None @@ -1482,13 +1439,7 @@ def test_email_addresses_from_595__o(): '' ) # record/1021896 - expected = [ - { - 'current': False, - 'hidden': True, - 'value': 'test@pnl.gov' - } - ] + expected = [{'current': False, 'hidden': True, 'value': 'test@pnl.gov'}] result = hepnames.do(create_record(snippet)) assert validate(result['email_addresses'], subschema) is None @@ -1525,9 +1476,7 @@ def test_positions_from_371__a(): assert validate(result['positions'], subschema) is None assert expected == result['positions'] - expected = [ - {'a': 'Aachen, Tech. Hochsch.'} - ] + expected = [{'a': 'Aachen, Tech. Hochsch.'}] result = hepnames2marc.do(result) assert expected == result['371'] @@ -1555,12 +1504,7 @@ def test_positions_from_371__a_z(): assert validate(result['positions'], subschema) is None assert expected == result['positions'] - expected = [ - { - 'a': 'Argonne', - 'z': 'Current' - } - ] + expected = [{'a': 'Argonne', 'z': 'Current'}] result = hepnames2marc.do(result) @@ -1591,13 +1535,7 @@ def test_positions_from_371__a_r_z(): assert validate(result['positions'], subschema) is None assert expected == result['positions'] - expected = [ - { - 'a': 'Antwerp U.', - 'r': 'SENIOR', - 'z': 'Current' - } - ] + expected = [{'a': 'Antwerp U.', 'r': 'SENIOR', 'z': 'Current'}] result = hepnames2marc.do(result) assert expected == result['371'] @@ -1621,7 +1559,7 @@ def test_positions_from_371__a_r_z_h(): 'current': True, 'institution': 'Antwerp U.', 'rank': 'SENIOR', - 'hidden': True + 'hidden': True, }, ] result = hepnames.do(create_record(snippet)) @@ -1660,9 +1598,7 @@ def test_positions_from_371__a_r_t_z(): 'end_date': '2007', 'institution': 'San Luis Potosi U.', 'rank': 'MASTER', - 'record': { - '$ref': 'http://localhost:5000/api/institutions/903830' - }, + 'record': {'$ref': 'http://localhost:5000/api/institutions/903830'}, 'curated_relation': True, }, ] @@ -1862,9 +1798,7 @@ def test_public_notes_from_667__a(): '' ) # record/1018999 - expected = [ - {'value': 'Do not confuse with Acharya, Bannanje Sripath'} - ] + expected = [{'value': 'Do not confuse with Acharya, Bannanje Sripath'}] result = hepnames.do(create_record(snippet)) assert validate(result['public_notes'], subschema) is None @@ -1899,7 +1833,7 @@ def test_previous_names_to_667__a(): expected = [ {'a': 'Formerly Tomoko Furukawa'}, - {'a': 'Formerly Second previous name'} + {'a': 'Formerly Second previous name'}, ] # record/1281982 @@ -2067,10 +2001,7 @@ def test_ids_from_8564_g_u_y_twitter(): ) # record/1073331 expected = [ - { - 'schema': 'TWITTER', - 'value': 'neiltyson' - }, + {'schema': 'TWITTER', 'value': 'neiltyson'}, ] result = hepnames.do(create_record(snippet)) @@ -2100,10 +2031,7 @@ def test_ids_from_8564_u_wikipedia(): ) # record/1018793 expected = [ - { - 'schema': 'WIKIPEDIA', - 'value': u'Torsten_Åkesson' - }, + {'schema': 'WIKIPEDIA', 'value': u'Torsten_Åkesson'}, ] result = hepnames.do(create_record(snippet)) @@ -2133,10 +2061,7 @@ def test_ids_from_8564_u_y_linkedin(): ) # record/1423251 expected = [ - { - 'schema': 'LINKEDIN', - 'value': u'silvia-adrián-martínez-ab1a548b' - }, + {'schema': 'LINKEDIN', 'value': u'silvia-adrián-martínez-ab1a548b'}, ] result = hepnames.do(create_record(snippet)) diff --git a/tests/test_institutions.py b/tests/test_institutions.py index 7dac11b..0c41bf4 100644 --- a/tests/test_institutions.py +++ b/tests/test_institutions.py @@ -161,7 +161,9 @@ def test_icn_legacy_icn_institution_hierarchy_from_110__a_t_u(): assert validate(result['legacy_ICN'], legacy_ICN_schema) is None assert expected_legacy_ICN == result['legacy_ICN'] - assert validate(result['institution_hierarchy'], institution_hierarchy_schema) is None + assert ( + validate(result['institution_hierarchy'], institution_hierarchy_schema) is None + ) assert expected_institution_hierarchy == result['institution_hierarchy'] @@ -201,7 +203,9 @@ def test_icn_legacy_icn_institution_hierarchy_from_110__a_b_t_u(): assert validate(result['legacy_ICN'], legacy_ICN_schema) is None assert expected_legacy_ICN == result['legacy_ICN'] - assert validate(result['institution_hierarchy'], institution_hierarchy_schema) is None + assert ( + validate(result['institution_hierarchy'], institution_hierarchy_schema) is None + ) assert expected_institution_hierarchy == result['institution_hierarchy'] @@ -746,7 +750,9 @@ def test_private_notes_from_595__a(): ) # record/902879 expected = [ - {'value': u'The Division is located inside the Department of Physics and Astronomy of the University of Catania Scientific Campus ("Città Universitaria" or "Cittadella"). Via Santa Sofia 64 95123 CATANIA'}, + { + 'value': u'The Division is located inside the Department of Physics and Astronomy of the University of Catania Scientific Campus ("Città Universitaria" or "Cittadella"). Via Santa Sofia 64 95123 CATANIA' + }, ] result = institutions.do(create_record(snippet)) @@ -770,8 +776,12 @@ def test_private_notes_from_double_595__a(): ) # record/907691 expected = [ - {'value': u'The Roma II Structure was established in 1989 at the University of Rome “Tor Vergata” - cc'}, - {'value': u'REDACTED thinks we don\'t have to write 110__t: "INFN, Rome 2" because Rome 2 is only in the url but not in the site. She\'ll ask to REDACTED (from INFN) to have her feedback.'}, + { + 'value': u'The Roma II Structure was established in 1989 at the University of Rome “Tor Vergata” - cc' + }, + { + 'value': u'REDACTED thinks we don\'t have to write 110__t: "INFN, Rome 2" because Rome 2 is only in the url but not in the site. She\'ll ask to REDACTED (from INFN) to have her feedback.' + }, ] result = institutions.do(create_record(snippet)) @@ -790,7 +800,9 @@ def test_public_notes_from_680__i(): ) # record/902725 expected = [ - {'value': u'2nd address: Organisation Européenne pour la Recherche Nucléaire (CERN), F-01631 Prévessin Cedex, France'} + { + 'value': u'2nd address: Organisation Européenne pour la Recherche Nucléaire (CERN), F-01631 Prévessin Cedex, France' + } ] result = institutions.do(create_record(snippet)) diff --git a/tests/test_utils.py b/tests/test_utils.py index c837281..46d4efe 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -163,7 +163,9 @@ def test_afs_url_converts_afs_path(): def test_afs_url_converts_new_afs_path(): expected = 'file:///afs/cern.ch/project/inspire/PROD/var/data/files/g220/4413039/content.xml' - result = afs_url('/opt/venvs/inspire-legacy/var/data/files/g220/4413039/content.xml') + result = afs_url( + '/opt/venvs/inspire-legacy/var/data/files/g220/4413039/content.xml' + ) assert expected == result @@ -194,7 +196,9 @@ def test_afs_url_with_custom_afs_path(): def test_afs_url_handles_unicode(): expected = u'file:///afs/cern.ch/project/inspire/PROD/var/data/files/g70/1407585/%E7%89%A9%E7%90%86%E7%A7%91%E5%AD%A6%E4%B8%8E%E6%8A%80%E6%9C%AF%E5%AD%A6%E9%99%A2-%E6%9D%8E%E5%A8%9C-200650218-%E5%AD%A6%E4%BD%8D%E7%BA%A7....pdf%3B1' - result = afs_url(u'/opt/cds-invenio/var/data/files/g70/1407585/物理科学与技术学院-李娜-200650218-学位级....pdf;1') + result = afs_url( + u'/opt/cds-invenio/var/data/files/g70/1407585/物理科学与技术学院-李娜-200650218-学位级....pdf;1' + ) assert expected == result @@ -260,7 +264,7 @@ def test_afs_url_converts_afs_url_to_path(): def test_afs_url_handles_custom_afs_path(): config = { 'LABS_AFS_HTTP_SERVICE': 'http://jessicajones.com/nested/nested', - 'LEGACY_AFS_PATH': '/foo/bar' + 'LEGACY_AFS_PATH': '/foo/bar', } expected = "file:///foo/bar/var/file.txt" @@ -349,13 +353,13 @@ def test_get_recid_from_ref_returns_none_on_ref_malformed(): def test_dedupe_all_lists(): - obj = {'l0': list(range(10)) + list(range(10)), - 'o1': [{'foo': 'bar'}] * 10, - 'o2': [{'foo': [1, 2]}, {'foo': [1, 1, 2]}] * 10} + obj = { + 'l0': list(range(10)) + list(range(10)), + 'o1': [{'foo': 'bar'}] * 10, + 'o2': [{'foo': [1, 2]}, {'foo': [1, 1, 2]}] * 10, + } - expected = {'l0': list(range(10)), - 'o1': [{'foo': 'bar'}], - 'o2': [{'foo': [1, 2]}]} + expected = {'l0': list(range(10)), 'o1': [{'foo': 'bar'}], 'o2': [{'foo': [1, 2]}]} assert dedupe_all_lists(obj) == expected