ajdust line width

inspirehep · Aug 6, 2024 · 5359d99 · 5359d99
1 parent cb36b4f
commit 5359d99
Show file tree

Hide file tree

Showing 39 changed files with 1,089 additions and 896 deletions.
diff --git a/inspire_dojson/api.py b/inspire_dojson/api.py
@@ -50,10 +50,12 @@
 try:
     unichr(0x100000)
     RE_INVALID_CHARS_FOR_XML = re.compile(
-        u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]+')
+        u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]+'
+    )
 except ValueError:  # pragma: no cover
     RE_INVALID_CHARS_FOR_XML = re.compile(
-        u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD]+')
+        u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD]+'
+    )
 
 RECORD = E.record
 CONTROLFIELD = E.controlfield
@@ -105,7 +107,9 @@ def record2marcxml_etree(record):
     elif schema_name == 'authors':
         marcjson = hepnames2marc.do(record)
     else:
-        raise NotSupportedError(u'JSON -> MARC rules missing for "{}"'.format(schema_name))
+        raise NotSupportedError(
+            u'JSON -> MARC rules missing for "{}"'.format(schema_name)
+        )
 
     record = RECORD()
 
@@ -115,15 +119,19 @@ def record2marcxml_etree(record):
             value = force_single_element(values)
             if not isinstance(value, text_type):
                 value = text_type(value)
-            record.append(CONTROLFIELD(_strip_invalid_chars_for_xml(value), {'tag': tag}))
+            record.append(
+                CONTROLFIELD(_strip_invalid_chars_for_xml(value), {'tag': tag})
+            )
         else:
             for value in force_list(values):
                 datafield = DATAFIELD({'tag': tag, 'ind1': ind1, 'ind2': ind2})
                 for code, els in sorted(iteritems(value)):
                     for el in force_list(els):
                         if not isinstance(el, text_type):
                             el = text_type(el)
-                        datafield.append(SUBFIELD(_strip_invalid_chars_for_xml(el), {'code': code}))
+                        datafield.append(
+                            SUBFIELD(_strip_invalid_chars_for_xml(el), {'code': code})
+                        )
                 record.append(datafield)
 
     return record
@@ -153,7 +161,9 @@ def cds_marcxml2record(marcxml):
 
 
 def _get_collections(marcjson):
-    collections = chain.from_iterable([force_list(el) for el in force_list(get_value(marcjson, '980__.a'))])
+    collections = chain.from_iterable(
+        [force_list(el) for el in force_list(get_value(marcjson, '980__.a'))]
+    )
     normalized_collections = [el.lower() for el in collections]
 
     return normalized_collections

diff --git a/inspire_dojson/cds/model.py b/inspire_dojson/cds/model.py
@@ -36,18 +36,24 @@ def add_control_number(record, blob):
     if '001' not in blob:
         return record
 
-    collections = (value.lower() for value in chain(force_list(get_value(blob, '980__.a', default=[])),
-                                                    force_list(get_value(blob, '980__.c', default=[]))))
+    collections = (
+        value.lower()
+        for value in chain(
+            force_list(get_value(blob, '980__.a', default=[])),
+            force_list(get_value(blob, '980__.c', default=[])),
+        )
+    )
     if 'hidden' in collections:
-        record.setdefault('595__', []).append({
-            '9': 'CDS',
-            'a': u'CDS-{}'.format(blob['001'])
-        })
+        record.setdefault('595__', []).append(
+            {'9': 'CDS', 'a': u'CDS-{}'.format(blob['001'])}
+        )
     else:
-        record.setdefault('035__', []).append({
-            '9': 'CDS',
-            'a': blob['001'],
-        })
+        record.setdefault('035__', []).append(
+            {
+                '9': 'CDS',
+                'a': blob['001'],
+            }
+        )
 
     return record
 

diff --git a/inspire_dojson/cds/rules.py b/inspire_dojson/cds/rules.py
@@ -120,7 +120,7 @@ def escape_url(url):
         else:
             scheme = ''
 
-        url = quote_url(url[len(scheme):])
+        url = quote_url(url[len(scheme) :])
         return scheme + url
 
 
@@ -135,8 +135,19 @@ def persistent_identifiers(self, key, value):
 @cds2hep_marc.over('035__', '^035..')
 @utils.for_each_value
 def external_sytem_identifiers(self, key, value):
-    ignored = {'cercer', 'inspire', 'xx', 'cern annual report', 'cmscms', 'wai01', 'spires'}
-    if any(val.lower() in ignored for val in chain(force_list(value.get('9')), force_list(value.get('a')))):
+    ignored = {
+        'cercer',
+        'inspire',
+        'xx',
+        'cern annual report',
+        'cmscms',
+        'wai01',
+        'spires',
+    }
+    if any(
+        val.lower() in ignored
+        for val in chain(force_list(value.get('9')), force_list(value.get('a')))
+    ):
         return
     if any(val.lower().endswith('cercer') for val in force_list(value.get('a'))):
         return
@@ -151,7 +162,15 @@ def secondary_report_numbers(self, key, value):
     Also populates the ``500``, ``595`` and ``980`` MARC field through side effects.
     """
     preliminary_results_prefixes = ['ATLAS-CONF-', 'CMS-PAS-', 'CMS-DP-', 'LHCB-CONF-']
-    note_prefixes = ['ALICE-INT-', 'ATL-', 'ATLAS-CONF-', 'CMS-DP-', 'CMS-PAS-', 'LHCB-CONF-', 'LHCB-PUB-']
+    note_prefixes = [
+        'ALICE-INT-',
+        'ATL-',
+        'ATLAS-CONF-',
+        'CMS-DP-',
+        'CMS-PAS-',
+        'LHCB-CONF-',
+        'LHCB-PUB-',
+    ]
 
     result_037 = self.get('037__', [])
     result_500 = self.get('500__', [])
@@ -165,17 +184,21 @@ def secondary_report_numbers(self, key, value):
     if any(report.upper().startswith(prefix) for prefix in note_prefixes):
         result_980.append({'a': 'NOTE'})
 
-    if any(report.upper().startswith(prefix) for prefix in preliminary_results_prefixes):
+    if any(
+        report.upper().startswith(prefix) for prefix in preliminary_results_prefixes
+    ):
         result_500.append({'9': 'CDS', 'a': 'Preliminary results'})
 
     is_barcode = hidden_report.startswith('P0') or hidden_report.startswith('CM-P0')
     if not report.startswith('SIS-') and not is_barcode:
-        result_037.append({
-            '9': source,
-            'a': report,
-            'c': value.get('c'),
-            'z': hidden_report if source == 'CDS' else None,
-        })
+        result_037.append(
+            {
+                '9': source,
+                'a': report,
+                'c': value.get('c'),
+                'z': hidden_report if source == 'CDS' else None,
+            }
+        )
 
     self['500__'] = result_500
     self['595__'] = result_595
@@ -196,7 +219,9 @@ def languages(self, key, value):
             languages.append({'a': pycountry.languages.get(alpha_3=alpha_3).name})
         except KeyError:
             with contextlib.suppress(KeyError):
-                languages.append({'a': pycountry.languages.get(bibliographic=alpha_3).name})
+                languages.append(
+                    {'a': pycountry.languages.get(bibliographic=alpha_3).name}
+                )
 
     return languages
 
@@ -262,7 +287,9 @@ def nonfirst_authors(self, key, value):
     field_700 = self.get('700__', [])
     field_701 = self.get('701__', [])
 
-    is_supervisor = any(el.lower().startswith('dir') for el in force_list(value.get('e', '')))
+    is_supervisor = any(
+        el.lower().startswith('dir') for el in force_list(value.get('e', ''))
+    )
     if is_supervisor:
         field_701.append(_converted_author(value))
     else:
@@ -346,7 +373,7 @@ def categories(self, key, value):
         result = {
             '2': 'INSPIRE',
             # XXX: will fail validation and be logged if invalid category
-            'a': CATEGORIES.get(value.get('a'), value.get('a'))
+            'a': CATEGORIES.get(value.get('a'), value.get('a')),
         }
     else:
         result = vanilla_dict(value)
@@ -405,20 +432,28 @@ def urls(self, key, value):
 
     Also populate the ``FFT`` field through side effects.
     """
+
     def _is_preprint(value):
         return value.get('y', '').lower() == 'preprint'
 
     def _is_fulltext(value):
-        return value['u'].endswith('.pdf') and value['u'].startswith('http://cds.cern.ch')
+        return value['u'].endswith('.pdf') and value['u'].startswith(
+            'http://cds.cern.ch'
+        )
 
     def _is_local_copy(value):
         return 'local copy' in value.get('y', '')
 
     def _is_ignored_domain(value):
-        ignored_domains = ['http://cdsweb.cern.ch', 'http://cms.cern.ch',
-                           'http://cmsdoc.cern.ch', 'http://documents.cern.ch',
-                           'http://preprints.cern.ch', 'http://cds.cern.ch',
-                           'http://arxiv.org']
+        ignored_domains = [
+            'http://cdsweb.cern.ch',
+            'http://cms.cern.ch',
+            'http://cmsdoc.cern.ch',
+            'http://documents.cern.ch',
+            'http://preprints.cern.ch',
+            'http://cds.cern.ch',
+            'http://arxiv.org',
+        ]
         return any(value['u'].startswith(domain) for domain in ignored_domains)
 
     field_8564 = self.get('8564_', [])
@@ -431,26 +466,34 @@ def _is_ignored_domain(value):
 
     if _is_fulltext(value) and not _is_preprint(value):
         if _is_local_copy(value):
-            description = value.get('y', '').replace('local copy', 'on CERN Document Server')
-            field_8564.append({
-                'u': url,
-                'y': description,
-            })
+            description = value.get('y', '').replace(
+                'local copy', 'on CERN Document Server'
+            )
+            field_8564.append(
+                {
+                    'u': url,
+                    'y': description,
+                }
+            )
         else:
             _, file_name = os.path.split(urllib.parse.urlparse(value['u']).path)
             _, extension = os.path.splitext(file_name)
-            field_FFT.append({
-                't': 'CDS',
-                'a': url,
-                'd': value.get('y', ''),
-                'n': file_name,
-                'f': extension,
-            })
+            field_FFT.append(
+                {
+                    't': 'CDS',
+                    'a': url,
+                    'd': value.get('y', ''),
+                    'n': file_name,
+                    'f': extension,
+                }
+            )
     elif not _is_ignored_domain(value):
-        field_8564.append({
-            'u': url,
-            'y': value.get('y'),
-        })
+        field_8564.append(
+            {
+                'u': url,
+                'y': value.get('y'),
+            }
+        )
 
     self['FFT__'] = field_FFT
     return field_8564