openlibhums · ajrbyers · Feb 7, 2024 · Feb 22, 2024 · Feb 22, 2024 · Mar 12, 2024
diff --git a/admin.py b/admin.py
@@ -73,11 +73,29 @@ class CSVImportArticleAdmin(utils_admin_utils.ArticleFKModelAdmin):
     date_hierarchy = ('imported')
 
 
+class NotificationAdmin(admin.ModelAdmin):
+    list_display = ('pk', 'email')
+
+
+class CitationFormatAdmin(admin.ModelAdmin):
+    list_display = ('journal', 'format')
+    raw_id_fields = ('journal',)
+
+
+class SectionMapAdmin(admin.ModelAdmin):
+    list_display = ('section', 'article_type')
+    search_fields = ('article_type', 'section__name')
+    raw_id_fields = ('section',)
+
+
 for pair in [
     (models.ExportFile, ExportFileAdmin),
     (models.CSVImport, CSVImportAdmin),
     (models.CSVImportCreateArticle, CSVImportArticleAdmin),
     (models.CSVImportUpdateArticle, CSVImportArticleAdmin),
     (models.OJSFile,),
+    (models.AutomatedImportNotification, NotificationAdmin),
+    (models.CitationFormat, CitationFormatAdmin),
+    (models.SectionMap, SectionMapAdmin),
 ]:
     admin.site.register(*pair)
diff --git a/jats.py b/jats.py
@@ -18,6 +18,7 @@
 from django.db import transaction
 from django.utils import timezone
 
+from plugins.imports import models
 from core import files
 from core import models as core_models
 from core.models import Account
@@ -42,6 +43,7 @@ def import_jats_article(
         jats_contents, journal=None,
         persist=True, filename=None, owner=None,
         images=None, request=None, stage=None,
+        get_section_from_subject=False,
 ):
     """ JATS import entrypoint
     :param jats_contents: (str) the JATS XML to be imported
@@ -63,13 +65,14 @@ def import_jats_article(
     meta["abstract"] = get_jats_abstract(metadata_soup)
     meta["issue"], meta["volume"] = get_jats_issue(jats_soup)
     meta["keywords"] = get_jats_keywords(metadata_soup)
-    meta["section_name"] = get_jats_section_name(jats_soup)
+    meta["section_name"] = get_jats_section_name(jats_soup, get_section_from_subject)
     meta["date_published"] = get_jats_pub_date(jats_soup) or datetime.date.today()
     meta["license_url"], meta["license_text"] = get_jats_license(jats_soup)
     meta["rights"] = get_jats_rights_statement(jats_soup)
     meta["authors"] = []
     meta["date_submitted"] = None
     meta["date_accepted"] = None
+    meta["custom_how_to_cite"] = get_custom_how_to_cite(metadata_soup)
     try:
         meta["first_page"] = int(metadata_soup.find("fpage").text)
     except (ValueError, AttributeError):
@@ -85,7 +88,10 @@ def import_jats_article(
         meta["date_accepted"] = get_jats_acc_date(history_soup)
 
     authors_soup = metadata_soup.find("contrib-group")
-    author_notes = metadata_soup.find("author_notes")
+    author_notes = metadata_soup.find("author-notes")
+    if author_notes:
+        meta["coi"] = get_jats_coi_statement(author_notes)
+
     if authors_soup:
         meta["authors"] = get_jats_authors(
             authors_soup,
@@ -115,7 +121,10 @@ def import_jats_article(
     return article
 
 
-def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=None):
+def import_jats_zipped(
+        zip_file, journal=None, owner=None,
+        persist=True, stage=None, get_section_from_subject=False,
+):
     """ Import a batch of Zipped JATS articles and their associated files
     :param zip_file: The zipped jats to be imported
     :param journal: Journal in which to import the articles
@@ -147,7 +156,6 @@ def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=N
                         else:
                             supplements.append(file_path)
 
-
                     if jats_path:
                         # Check nested dirs relative to xml like ./figures
                         for dir_ in dirs:
@@ -163,14 +171,15 @@ def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=N
                                 jats_file.read(), journal, persist,
                                 jats_filename, owner, supplements,
                                 stage=stage,
+                                get_section_from_subject=get_section_from_subject,
                             )
                             articles.append((jats_filename, article))
                         if pdf_path:
                             import_pdf(article, pdf_path, pdf_filename)
                 except Exception as err:
                     logger.warning(err)
                     logger.warning(traceback.format_exc())
-                    errors.append((filenames, err))
+                    errors.append((jats_path, err))
 
     return articles, errors
 
@@ -210,7 +219,7 @@ def get_jats_title(soup):
 def get_jats_abstract(soup):
     abstract = soup.find("abstract")
     if abstract:
-        return abstract.text
+        return f"<p>{abstract.text}</p>"
     else:
         return ""
 
@@ -278,16 +287,22 @@ def get_jats_acc_date(soup):
 
 def get_jats_keywords(soup):
     jats_keywords_soup = soup.find("kwd-group")
+
+    # This was previously a set but is now a list to preserve keyword order.
     if jats_keywords_soup:
-        return {
+        return [
             keyword.text.strip()
             for keyword in jats_keywords_soup.find_all("kwd")
-        }
+        ]
     else:
-        return set()
+        return list()
 
 
-def get_jats_section_name(soup):
+def get_jats_section_name(soup,get_section_from_subject):
+    if get_section_from_subject:
+        subject = soup.find("subject")
+        if subject:
+            return subject.text
     return soup.find("article").attrs.get("article-type")
 
 
@@ -334,10 +349,44 @@ def get_jats_authors(soup, metadata_soup, author_notes=None):
                 corresp_email = author_notes.find("email")
                 if corresp_email:
                     author_data["email"] = corresp_email.text
+            else:
+                # Check and alternative route for identifying corresp
+                # authors
+                corresp_ref = author.find(
+                    'xref', {'ref-type': 'corresp'}
+                )
+                if corresp_ref:
+                    author_data["correspondence"] = True
+
+                    if author_notes:
+                        xref_rid = corresp_ref.get('rid')
+                        corr_note = author_notes.find(
+                            'corresp', {'id': xref_rid}
+                        )
+                        if corr_note:
+                            corresp_email = corr_note.find(
+                                'email'
+                            )
+                            if corresp_email:
+                                author_data["email"] = corresp_email.text
+
             authors.append(author_data)
     return authors
 
 
+def get_custom_how_to_cite(metadata_soup):
+    custom_meta_tags = metadata_soup.find_all('custom-meta')
+    for custom_meta_tag in custom_meta_tags:
+        meta_name_tag = custom_meta_tag.find('meta-name')
+        if meta_name_tag:
+            meta_name_value = meta_name_tag.string
+            if meta_name_value in ['How to cite', 'How To Cite']:
+                meta_value_tag = custom_meta_tag.find('meta-value')
+                if meta_value_tag:
+                    return meta_value_tag.text
+    return ''
+
+
 def get_orcid(author_soup):
     contrib_ids = author_soup.findAll('contrib-id')
     for ci in contrib_ids:
@@ -372,12 +421,19 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
         journal = get_lost_found_journal()
 
     with transaction.atomic():
-        section, _ = submission_models.Section.objects \
-            .get_or_create(
-                journal=journal,
-                name=metadata["section_name"],
-        )
-        section.save()
+        try:
+            section_map = models.SectionMap.objects.get(
+                article_type=metadata["section_name"],
+                section__journal=journal,
+            )
+            if section_map:
+                section = section_map.section
+        except models.SectionMap.DoesNotExist:
+            section, _ = submission_models.Section.objects \
+                .get_or_create(
+                    journal=journal,
+                    name=metadata["section_name"],
+            )
 
         article = get_article(metadata.get("identifiers", {}), journal)
         if not article:
@@ -386,14 +442,16 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
                 title=metadata["title"],
                 abstract=metadata["abstract"],
                 date_published=metadata["date_published"],
-                date_accepted=metadata["date_submitted"],
+                date_accepted=metadata["date_accepted"],
                 date_submitted=metadata["date_submitted"],
                 rights=metadata["rights"],
                 stage=stage or submission_models.STAGE_PUBLISHED,
                 is_import=True,
                 owner=owner,
                 first_page=metadata["first_page"],
-                last_page=metadata["last_page"]
+                last_page=metadata["last_page"],
+                custom_how_to_cite=metadata['custom_how_to_cite'],
+                article_agreement='This article is a JATS import.',
             )
             article.section = section
             article.save()
@@ -403,11 +461,13 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
             article.abstract = metadata["abstract"]
             article.date_published = metadata["date_published"]
             article.date_published = metadata["date_published"]
-            article.date_accepted = metadata["date_submitted"]
+            article.date_accepted = metadata["date_accepted"]
             article.date_submitted = metadata["date_submitted"]
             article.rights = metadata["rights"]
             article.first_page = metadata["first_page"]
             article.last_page = metadata["last_page"]
+            article.custom_how_to_cite = metadata["custom_how_to_cite"]
+            article.section = section
             article.save()
 
         if metadata["identifiers"]["doi"]:
@@ -444,7 +504,6 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
                         "orcid": author["orcid"],
                     },
                 )
-
             fa = submission_models.FrozenAuthor.objects.create(
                 article=article,
                 author=account,
@@ -454,6 +513,7 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
                 frozen_orcid=author["orcid"],
                 frozen_email=author['email'],
                 order=idx,
+                display_email=True if author['correspondence'] else False,
             )
             if account and author["correspondence"]:
                 article.correspondence_author = account
@@ -491,8 +551,19 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None):
                 journal=journal,
                 defaults={"issue_type": issue_type}
             )
+            journal_models.SectionOrdering.objects.update_or_create(
+                issue=issue,
+                section=section,
+                defaults={
+                    "order": 0,
+                }
+            )
         issue.articles.add(article)
         article.primary_issue = issue
+
+        if metadata.get("coi", None):
+            article.competing_interests = metadata.get("coi")
+
         article.save()
 
         return article
@@ -701,7 +772,8 @@ def import_jats_preprint(
     meta["license_url"], meta["license_text"] = get_jats_license(jats_soup)
     meta["authors"] = []
     authors_soup = metadata_soup.find("contrib-group")
-    author_notes = metadata_soup.find("author_notes")
+    author_notes = jats_soup.find("author-notes")
+
     if authors_soup:
         meta["authors"] = get_jats_authors(
             authors_soup,
@@ -837,7 +909,7 @@ def save_preprint(
         return preprint
 
 
-def import_html_reviews(preprint, review_files, owner):
+def import_html_reviews(preprint, review_files, owner, number=None):
     review_round, _ = review_models.ReviewRound.objects.get_or_create(
         round_number=1,
         article=preprint.article,
@@ -846,6 +918,7 @@ def import_html_reviews(preprint, review_files, owner):
         journal=preprint.article.journal,
     ).first()
     for review_file in review_files:
+        print(f"Importing {review_file}")
         with open(review_file, 'r') as r_file:
             contents = r_file.read()
             try:
@@ -878,7 +951,7 @@ def import_html_reviews(preprint, review_files, owner):
                 assignment=review_assignment,
                 original_element=default_element,
                 defaults={
-                    'answer': contents.strip().replace('\n', ''),
+                    'answer': answer,
                     'author_can_see': True,
                 }
             )
@@ -888,3 +961,15 @@ def import_html_reviews(preprint, review_files, owner):
                 identifier=review_doi,
                 review=review_assignment,
             )
+
+
+def get_jats_coi_statement(author_notes):
+    coi_fn = author_notes.find(
+        "fn",
+        attrs={"fn-type": "coi-statement"},
+    )
+    if not coi_fn:
+        return None
+
+    # Return inner HTML of <fn> (not including the <fn> tag itself)
+    return "".join(str(child) for child in coi_fn.contents).strip()