diff --git a/adsmp/app.py b/adsmp/app.py index c2cd9fd..4b265c6 100644 --- a/adsmp/app.py +++ b/adsmp/app.py @@ -175,16 +175,23 @@ def update_storage(self, bibcode, type, payload): session.add(ChangeLog(key=bibcode, type=type, oldvalue=oldval)) record.updated = now out = record.toJSON() + attempted_scix_id = None try: session.flush() if not record.scix_id and record.bib_data: - record.scix_id = "scix:" + str(self.generate_scix_id(record.bib_data)) + attempted_scix_id = "scix:" + str(self.generate_scix_id(record.bib_data)) + record.scix_id = attempted_scix_id out = record.toJSON() session.commit() return out - except exc.IntegrityError: - self.logger.exception('error in app.update_storage while updating database for bibcode {}, type {}'.format(bibcode, type)) + except exc.IntegrityError as e: session.rollback() + if attempted_scix_id: + self.log_scix_id_collision(bibcode, attempted_scix_id, e) + else: + self.logger.exception( + 'IntegrityError in update_storage for bibcode %s, type %s ' + '(not a scix_id collision)', bibcode, type) raise def generate_scix_id(self, bib_data): @@ -194,6 +201,39 @@ def generate_scix_id(self, bib_data): user_fields = None return scix_id.generate_scix_id(bib_data, user_fields = user_fields) + def log_scix_id_collision(self, bibcode, attempted_scix_id, original_error=None): + """Query the database for the record that already holds attempted_scix_id + and log both the existing and new (bibcode, scix_id) pairs so curators + can identify which records are clashing.""" + try: + with self.session_scope() as session: + existing = session.query(Records).filter_by(scix_id=attempted_scix_id).first() + if existing: + self.logger.error( + 'SciX ID collision detected: ' + 'existing record in DB: (bibcode=%s, scix_id=%s), ' + 'new record that failed to commit: (bibcode=%s, scix_id=%s). ' + 'Original error: %s', + existing.bibcode, existing.scix_id, + bibcode, attempted_scix_id, + original_error + ) + else: + self.logger.error( + 'IntegrityError for bibcode %s with scix_id %s, ' + 'but no existing record found with that scix_id ' + '(possible race condition or other constraint violation). ' + 'Original error: %s', + bibcode, attempted_scix_id, + original_error + ) + except Exception: + self.logger.exception( + 'Failed to query database for scix_id collision details ' + '(bibcode=%s, attempted_scix_id=%s)', + bibcode, attempted_scix_id + ) + def delete_by_bibcode(self, bibcode): with self.session_scope() as session: r = session.query(Records).filter_by(bibcode=bibcode).first() diff --git a/adsmp/tasks.py b/adsmp/tasks.py index 53a6cef..92ba880 100644 --- a/adsmp/tasks.py +++ b/adsmp/tasks.py @@ -237,13 +237,14 @@ def task_update_scixid(bibcodes, flag): if flag == 'update': if not r.scix_id: if r.bib_data: - r.scix_id = "scix:" + app.generate_scix_id(r.bib_data) + attempted_scix_id = "scix:" + app.generate_scix_id(r.bib_data) + r.scix_id = attempted_scix_id try: session.commit() logger.debug('Bibcode %s has been assigned a new scix id: %s', bibcode, r.scix_id) - except exc.IntegrityError: - logger.exception('error in app.update_storage while updating database for bibcode %s', bibcode) + except exc.IntegrityError as e: session.rollback() + app.log_scix_id_collision(bibcode, attempted_scix_id, e) else: r.scix_id = None session.commit() @@ -254,13 +255,14 @@ def task_update_scixid(bibcodes, flag): if flag == 'force': if r.bib_data: old_id = r.scix_id - r.scix_id = "scix:" + app.generate_scix_id(r.bib_data) + attempted_scix_id = "scix:" + app.generate_scix_id(r.bib_data) + r.scix_id = attempted_scix_id try: session.commit() logger.debug('Bibcode %s scix_id changed from %s to %s', bibcode, old_id, r.scix_id) - except exc.IntegrityError: - logger.exception('error in app.update_storage while updating database for bibcode %s', bibcode) + except exc.IntegrityError as e: session.rollback() + app.log_scix_id_collision(bibcode, attempted_scix_id, e) else: r.scix_id = None session.commit()