diff --git a/admin.py b/admin.py index 7f8307a..1f7564f 100644 --- a/admin.py +++ b/admin.py @@ -73,11 +73,29 @@ class CSVImportArticleAdmin(utils_admin_utils.ArticleFKModelAdmin): date_hierarchy = ('imported') +class NotificationAdmin(admin.ModelAdmin): + list_display = ('pk', 'email') + + +class CitationFormatAdmin(admin.ModelAdmin): + list_display = ('journal', 'format') + raw_id_fields = ('journal',) + + +class SectionMapAdmin(admin.ModelAdmin): + list_display = ('section', 'article_type') + search_fields = ('article_type', 'section__name') + raw_id_fields = ('section',) + + for pair in [ (models.ExportFile, ExportFileAdmin), (models.CSVImport, CSVImportAdmin), (models.CSVImportCreateArticle, CSVImportArticleAdmin), (models.CSVImportUpdateArticle, CSVImportArticleAdmin), (models.OJSFile,), + (models.AutomatedImportNotification, NotificationAdmin), + (models.CitationFormat, CitationFormatAdmin), + (models.SectionMap, SectionMapAdmin), ]: admin.site.register(*pair) diff --git a/jats.py b/jats.py index 1b92ddd..82fdcf3 100644 --- a/jats.py +++ b/jats.py @@ -18,6 +18,7 @@ from django.db import transaction from django.utils import timezone +from plugins.imports import models from core import files from core import models as core_models from core.models import Account @@ -42,6 +43,7 @@ def import_jats_article( jats_contents, journal=None, persist=True, filename=None, owner=None, images=None, request=None, stage=None, + get_section_from_subject=False, ): """ JATS import entrypoint :param jats_contents: (str) the JATS XML to be imported @@ -63,13 +65,14 @@ def import_jats_article( meta["abstract"] = get_jats_abstract(metadata_soup) meta["issue"], meta["volume"] = get_jats_issue(jats_soup) meta["keywords"] = get_jats_keywords(metadata_soup) - meta["section_name"] = get_jats_section_name(jats_soup) + meta["section_name"] = get_jats_section_name(jats_soup, get_section_from_subject) meta["date_published"] = get_jats_pub_date(jats_soup) or datetime.date.today() meta["license_url"], meta["license_text"] = get_jats_license(jats_soup) meta["rights"] = get_jats_rights_statement(jats_soup) meta["authors"] = [] meta["date_submitted"] = None meta["date_accepted"] = None + meta["custom_how_to_cite"] = get_custom_how_to_cite(metadata_soup) try: meta["first_page"] = int(metadata_soup.find("fpage").text) except (ValueError, AttributeError): @@ -85,7 +88,10 @@ def import_jats_article( meta["date_accepted"] = get_jats_acc_date(history_soup) authors_soup = metadata_soup.find("contrib-group") - author_notes = metadata_soup.find("author_notes") + author_notes = metadata_soup.find("author-notes") + if author_notes: + meta["coi"] = get_jats_coi_statement(author_notes) + if authors_soup: meta["authors"] = get_jats_authors( authors_soup, @@ -115,7 +121,10 @@ def import_jats_article( return article -def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=None): +def import_jats_zipped( + zip_file, journal=None, owner=None, + persist=True, stage=None, get_section_from_subject=False, +): """ Import a batch of Zipped JATS articles and their associated files :param zip_file: The zipped jats to be imported :param journal: Journal in which to import the articles @@ -147,7 +156,6 @@ def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=N else: supplements.append(file_path) - if jats_path: # Check nested dirs relative to xml like ./figures for dir_ in dirs: @@ -163,6 +171,7 @@ def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=N jats_file.read(), journal, persist, jats_filename, owner, supplements, stage=stage, + get_section_from_subject=get_section_from_subject, ) articles.append((jats_filename, article)) if pdf_path: @@ -170,7 +179,7 @@ def import_jats_zipped(zip_file, journal=None, owner=None, persist=True, stage=N except Exception as err: logger.warning(err) logger.warning(traceback.format_exc()) - errors.append((filenames, err)) + errors.append((jats_path, err)) return articles, errors @@ -210,7 +219,7 @@ def get_jats_title(soup): def get_jats_abstract(soup): abstract = soup.find("abstract") if abstract: - return abstract.text + return f"

{abstract.text}

" else: return "" @@ -278,16 +287,22 @@ def get_jats_acc_date(soup): def get_jats_keywords(soup): jats_keywords_soup = soup.find("kwd-group") + + # This was previously a set but is now a list to preserve keyword order. if jats_keywords_soup: - return { + return [ keyword.text.strip() for keyword in jats_keywords_soup.find_all("kwd") - } + ] else: - return set() + return list() -def get_jats_section_name(soup): +def get_jats_section_name(soup,get_section_from_subject): + if get_section_from_subject: + subject = soup.find("subject") + if subject: + return subject.text return soup.find("article").attrs.get("article-type") @@ -334,10 +349,44 @@ def get_jats_authors(soup, metadata_soup, author_notes=None): corresp_email = author_notes.find("email") if corresp_email: author_data["email"] = corresp_email.text + else: + # Check and alternative route for identifying corresp + # authors + corresp_ref = author.find( + 'xref', {'ref-type': 'corresp'} + ) + if corresp_ref: + author_data["correspondence"] = True + + if author_notes: + xref_rid = corresp_ref.get('rid') + corr_note = author_notes.find( + 'corresp', {'id': xref_rid} + ) + if corr_note: + corresp_email = corr_note.find( + 'email' + ) + if corresp_email: + author_data["email"] = corresp_email.text + authors.append(author_data) return authors +def get_custom_how_to_cite(metadata_soup): + custom_meta_tags = metadata_soup.find_all('custom-meta') + for custom_meta_tag in custom_meta_tags: + meta_name_tag = custom_meta_tag.find('meta-name') + if meta_name_tag: + meta_name_value = meta_name_tag.string + if meta_name_value in ['How to cite', 'How To Cite']: + meta_value_tag = custom_meta_tag.find('meta-value') + if meta_value_tag: + return meta_value_tag.text + return '' + + def get_orcid(author_soup): contrib_ids = author_soup.findAll('contrib-id') for ci in contrib_ids: @@ -372,12 +421,19 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None): journal = get_lost_found_journal() with transaction.atomic(): - section, _ = submission_models.Section.objects \ - .get_or_create( - journal=journal, - name=metadata["section_name"], - ) - section.save() + try: + section_map = models.SectionMap.objects.get( + article_type=metadata["section_name"], + section__journal=journal, + ) + if section_map: + section = section_map.section + except models.SectionMap.DoesNotExist: + section, _ = submission_models.Section.objects \ + .get_or_create( + journal=journal, + name=metadata["section_name"], + ) article = get_article(metadata.get("identifiers", {}), journal) if not article: @@ -386,14 +442,16 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None): title=metadata["title"], abstract=metadata["abstract"], date_published=metadata["date_published"], - date_accepted=metadata["date_submitted"], + date_accepted=metadata["date_accepted"], date_submitted=metadata["date_submitted"], rights=metadata["rights"], stage=stage or submission_models.STAGE_PUBLISHED, is_import=True, owner=owner, first_page=metadata["first_page"], - last_page=metadata["last_page"] + last_page=metadata["last_page"], + custom_how_to_cite=metadata['custom_how_to_cite'], + article_agreement='This article is a JATS import.', ) article.section = section article.save() @@ -403,11 +461,13 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None): article.abstract = metadata["abstract"] article.date_published = metadata["date_published"] article.date_published = metadata["date_published"] - article.date_accepted = metadata["date_submitted"] + article.date_accepted = metadata["date_accepted"] article.date_submitted = metadata["date_submitted"] article.rights = metadata["rights"] article.first_page = metadata["first_page"] article.last_page = metadata["last_page"] + article.custom_how_to_cite = metadata["custom_how_to_cite"] + article.section = section article.save() if metadata["identifiers"]["doi"]: @@ -444,7 +504,6 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None): "orcid": author["orcid"], }, ) - fa = submission_models.FrozenAuthor.objects.create( article=article, author=account, @@ -454,6 +513,7 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None): frozen_orcid=author["orcid"], frozen_email=author['email'], order=idx, + display_email=True if author['correspondence'] else False, ) if account and author["correspondence"]: article.correspondence_author = account @@ -491,8 +551,19 @@ def save_article(metadata, journal=None, issue=None, owner=None, stage=None): journal=journal, defaults={"issue_type": issue_type} ) + journal_models.SectionOrdering.objects.update_or_create( + issue=issue, + section=section, + defaults={ + "order": 0, + } + ) issue.articles.add(article) article.primary_issue = issue + + if metadata.get("coi", None): + article.competing_interests = metadata.get("coi") + article.save() return article @@ -701,7 +772,8 @@ def import_jats_preprint( meta["license_url"], meta["license_text"] = get_jats_license(jats_soup) meta["authors"] = [] authors_soup = metadata_soup.find("contrib-group") - author_notes = metadata_soup.find("author_notes") + author_notes = jats_soup.find("author-notes") + if authors_soup: meta["authors"] = get_jats_authors( authors_soup, @@ -837,7 +909,7 @@ def save_preprint( return preprint -def import_html_reviews(preprint, review_files, owner): +def import_html_reviews(preprint, review_files, owner, number=None): review_round, _ = review_models.ReviewRound.objects.get_or_create( round_number=1, article=preprint.article, @@ -846,6 +918,7 @@ def import_html_reviews(preprint, review_files, owner): journal=preprint.article.journal, ).first() for review_file in review_files: + print(f"Importing {review_file}") with open(review_file, 'r') as r_file: contents = r_file.read() try: @@ -878,7 +951,7 @@ def import_html_reviews(preprint, review_files, owner): assignment=review_assignment, original_element=default_element, defaults={ - 'answer': contents.strip().replace('\n', ''), + 'answer': answer, 'author_can_see': True, } ) @@ -888,3 +961,15 @@ def import_html_reviews(preprint, review_files, owner): identifier=review_doi, review=review_assignment, ) + + +def get_jats_coi_statement(author_notes): + coi_fn = author_notes.find( + "fn", + attrs={"fn-type": "coi-statement"}, + ) + if not coi_fn: + return None + + # Return inner HTML of (not including the tag itself) + return "".join(str(child) for child in coi_fn.contents).strip() \ No newline at end of file diff --git a/management/commands/fetch_crossref_how_to_cite.py b/management/commands/fetch_crossref_how_to_cite.py new file mode 100644 index 0000000..230e4b4 --- /dev/null +++ b/management/commands/fetch_crossref_how_to_cite.py @@ -0,0 +1,63 @@ +from django.core.management.base import BaseCommand + +import requests +import time +from pprint import pprint + +from submission import models + + +class Command(BaseCommand): + """For a give journal query crossref for the given citation format + and update the custom how to cite field.""" + + help = "Gets custom how to cite using Crossref." + + def add_arguments(self, parser): + parser.add_argument('--journal', type=str) + parser.add_argument('--article_id', type=str) + parser.add_argument('--style', type=str) + parser.add_argument('--locale', type=str) + parser.add_argument('--mailto', type=str) + + def handle(self, *args, **options): + errors = [] + articles = models.Article.objects.filter(journal__code=options.get('journal')) + + article_id = options.get('article_id') + if article_id: + articles = articles.filter(pk=article_id) + + for index, article in enumerate(articles): + print(f"Getting how to cite for article #{article.pk}. {index}/{articles.count()}") + if article.get_doi(): + try: + r = requests.get( + headers={ + 'Accept': 'text/bibliography', + 'style': options.get('style'), + 'locale': options.get('locale') + }, + url=f"https://api.crossref.org/v1/works/{article.get_doi()}/transform?mailto={options.get('mailto')}", + ) + r.encoding = 'UTF-8' + how_to_cite = r.text.strip() + if r.status_code == 200: + print(f"Response: {how_to_cite}") + article.custom_how_to_cite = how_to_cite + article.save() + print(f"Article #{article.pk} how to cite updated.") + else: + print(f"Crossref API responded with: {r.status_code}") + except Exception as e: + errors.append( + {'article': article, 'error': e} + ) + else: + print(f"Article #{article.pk} does not have a DOI") + + time.sleep(2) + + print('Errors:') + pprint(errors) + diff --git a/management/commands/import_multiple_jats_zips.py b/management/commands/import_multiple_jats_zips.py new file mode 100644 index 0000000..2c5fe2f --- /dev/null +++ b/management/commands/import_multiple_jats_zips.py @@ -0,0 +1,167 @@ +import os +import mimetypes + +from django.core.management.base import BaseCommand +from django.core.management import call_command +from django.contrib.contenttypes.models import ContentType + +from utils.logger import get_logger +from journal import models as jm, logic +from core import models as cm +from plugins.imports import jats, models +from cron.models import Request +from press import models as pm +from utils import setting_handler, render_template +from utils.transactional_emails import send_author_publication_notification + + +logger = get_logger(__name__) + + +class Command(BaseCommand): + """ Imports zipped articles in JATS XML format file""" + + help = "Imports zipped articles in JATS XML Format" + + def add_arguments(self, parser): + parser.add_argument('folder_path') + parser.add_argument('-j', '--journal_code') + parser.add_argument('-o', '--owner_id', default=1) + parser.add_argument('-d', '--dry-run', action="store_true", default=False) + parser.add_argument('-c', '--crossref-deposit', action="store_true", + default=False) + parser.add_argument('-ch', '--crossref-how-to-cite', action="store_true", + default=False) + parser.add_argument('-n', '--notify-author', action="store_true", + default=False) + parser.add_argument('-m', '--crossref-mailto', type=str) + parser.add_argument('-s', '--section_from_subject', action="store_true") + parser.add_argument('-r', '--remove_zips', action="store_true") + + def handle(self, *args, **options): + successes = [] + all_errors = [] + articles = [] + folder_path = options.get('folder_path') + journal = None + if options["journal_code"]: + journal = jm.Journal.objects.get(code=options["journal_code"]) + owner = cm.Account.objects.get(pk=options["owner_id"]) + persist = False if options.get('dry_run') else True + + if os.path.exists( + folder_path, + ): + zip_files = [] + for root, dirs, filenames in os.walk(folder_path): + for filename in filenames: + mimetype, _ = mimetypes.guess_type(filename) + if mimetype == 'application/zip': + zip_files.append(os.path.join(root, filename)) + + if zip_files: + for zip_file in zip_files: + articles, errors = jats.import_jats_zipped( + zip_file, + journal, + owner=owner, + persist=persist, + get_section_from_subject=options.get( + 'section_from_subject', + False, + ) + ) + + for article in articles: + successes.append( + f'Imported {article}', + ) + if options.get('crossref_deposit'): + call_command( + 'register_crossref_doi', + article[1].pk + ) + if options.get('crossref_how_to_cite'): + citation_format = models.CitationFormat.objects.filter( + journal=article[1].journal, + ).first() + if citation_format: + call_command( + 'fetch_crossref_how_to_cite', + journal=article[1].journal.code, + article_id=article[1].pk, + style=citation_format.format, + locale='en_GB', + mailto='a.byers@bbk.ac.uk', + ) + for error in errors: + all_errors.append( + { + 'zip_file': os.path.basename(zip_file), + 'error': error[1], + } + ) + + if options.get('remove_zips', False): + os.unlink(zip_file) + + if zip_files and persist: + to_notify = models.AutomatedImportNotification.objects.all() + request = Request() + press = pm.Press.objects.first() + request.press = press + request.site_type = press + request.repository = None + request.POST = {} + + for n in to_notify: + n.send_notification( + articles, + all_errors, + request, + ) + + if options.get('notify_author'): + for article_set in articles: + article = article_set[1] + + jm.FixedPubCheckItems.objects.get_or_create( + article=article, + ) + + if article.correspondence_author and not article.fixedpubcheckitems.notify_the_author: + request.user = owner + request.journal = article.journal + request.site_type = article.journal + request.model_content_type = ContentType.objects.get_for_model( + article.journal, + ) + + template = setting_handler.get_setting( + 'email', + 'author_publication', + request.journal, + ).value.replace( + " at {{ article.date_published|date:'H:i' }}", + "", + ) + message = render_template.get_message_content( + request, + {'article': article}, + template, + template_is_setting=True, + ) + request.POST = { + 'notify_author_email': message + } + send_author_publication_notification( + **{ + 'request': request, + 'article': article, + 'user_message': message, + 'section_editors': False, + 'peer_reviewers': False, + } + ) + article.fixedpubcheckitems.notify_the_author = True + article.fixedpubcheckitems.save() diff --git a/migrations/0009_automatedimportnotification.py b/migrations/0009_automatedimportnotification.py new file mode 100644 index 0000000..c97e3f5 --- /dev/null +++ b/migrations/0009_automatedimportnotification.py @@ -0,0 +1,20 @@ +# Generated by Django 3.2.20 on 2024-02-07 14:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('imports', '0008_auto_20231106_1621'), + ] + + operations = [ + migrations.CreateModel( + name='AutomatedImportNotification', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('email', models.EmailField(help_text='Email address of user to receive notification of automatic import logs.', max_length=254)), + ], + ), + ] diff --git a/migrations/0010_citationformat_sectionmap.py b/migrations/0010_citationformat_sectionmap.py new file mode 100644 index 0000000..bb539f4 --- /dev/null +++ b/migrations/0010_citationformat_sectionmap.py @@ -0,0 +1,30 @@ +# Generated by Django 3.2.20 on 2024-03-11 12:04 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('imports', '0009_automatedimportnotification'), + ] + + operations = [ + migrations.CreateModel( + name='SectionMap', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('article_type', models.CharField(blank=True, max_length=100)), + ('section', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='submission.section')), + ], + ), + migrations.CreateModel( + name='CitationFormat', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('format', models.CharField(blank=True, max_length=255)), + ('journal', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='journal.journal')), + ], + ), + ] diff --git a/models.py b/models.py index 8f9f214..9395f00 100644 --- a/models.py +++ b/models.py @@ -1,6 +1,8 @@ from django.db import models from django.utils import timezone +from utils import notify_helpers + class WordPressImport(models.Model): url = models.URLField( @@ -129,3 +131,65 @@ class OJSFile(models.Model): 'core.File', on_delete=models.CASCADE, ) + + +class AutomatedImportNotification(models.Model): + email = models.EmailField( + help_text='Email address of user to receive notification ' + 'of automatic import logs.', + ) + + def send_notification(self, articles, errors, request): + log_dict = { + 'level': 'Info', + 'action_type': 'Contact Production Staff', + 'types': 'Email', + 'target': None + } + articles_urls = '
'.join(article.url for article in articles) + message = f""" +

The following articles were imported:

+

{articles_urls}

+

The following errors were detected during import:

+

{errors}

+

+ Regards
+ Janeway +

+ """ + + notify_helpers.send_email_with_body_from_user( + request, + 'Janeway Article Import Notification', + self.email, + message, + log_dict=log_dict, + ) + + +class CitationFormat(models.Model): + journal = models.OneToOneField( + 'journal.Journal', + on_delete=models.CASCADE, + ) + format = models.CharField( + max_length=255, + blank=True, + ) + + def __str__(self): + return f"{self.journal.name}: {self.format}" + + +class SectionMap(models.Model): + section = models.ForeignKey( + 'submission.Section', + on_delete=models.CASCADE, + ) + article_type = models.CharField( + max_length=100, + blank=True, + ) + + def __str__(self): + return f"{self.article_type} mapped to {self.section.name}" diff --git a/ojs/native.py b/ojs/native.py index 61940ff..ee35d71 100644 --- a/ojs/native.py +++ b/ojs/native.py @@ -52,9 +52,14 @@ def import_users(xml_content, journal): print(f'Account with email {email} updated.') if interests: for interest in interests.split(','): - new_interest, c = core_models.Interest.objects.get_or_create( - name=interest, - ) + try: + new_interest, c = core_models.Interest.objects.get_or_create( + name=interest, + ) + except core_models.Interest.MultipleObjectsReturned: + new_interest = core_models.Interest.objects.filter( + name=interest, + ).first() account.interest.add(new_interest) if user_groups: role_slugs = common.map_ojs_roles_to_janeway_role_slugs( diff --git a/utils.py b/utils.py index 072a1fd..169bbf5 100644 --- a/utils.py +++ b/utils.py @@ -348,7 +348,6 @@ def update_article_metadata(reader, folder_path=None, owner=None, import_id=None actions[article.pk] = f'Article {article.title} ({article.pk}) updated.' except Exception as e: - import pdb;pdb.set_trace() errors.append( { 'article': primary_row.get('Article title'),