Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions adsmp/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ def should_include_in_sitemap(self, record):
3. If processed, processing isn't too stale

Args:
record: Dictionary with record data including bib_data, status, timestamps
record: Dictionary with record data including has_bib_data, status, timestamps

Returns:
bool: True if record should be included in sitemap, False otherwise
Expand All @@ -772,14 +772,14 @@ def should_include_in_sitemap(self, record):

# Extract values from record dictionary
bibcode = record.get('bibcode', None)
bib_data = record.get('bib_data', None)
has_bib_data = record.get('has_bib_data', None)
bib_data_updated = record.get('bib_data_updated')
solr_processed = record.get('solr_processed')
status = record.get('status')

# Must have bibliographic data
if not bib_data or not bibcode or (isinstance(bib_data, str) and not bib_data.strip()):
self.logger.debug('Excluding %s from sitemap: No bibcode or bib_data', bibcode)
if not has_bib_data or not bibcode:
self.logger.debug('Excluding %s from sitemap: No bibcode or has_bib_data is False', bibcode)
return False

# Exclude if SOLR failed or if record is being retried (previously failed)
Expand Down Expand Up @@ -828,6 +828,8 @@ def get_records_bulk(self, bibcodes, session, load_only=None):
record_data = {}
for field in (load_only or ['id', 'bibcode', 'bib_data', 'bib_data_updated', 'solr_processed', 'status']):
record_data[field] = getattr(record, field, None)
# Add has_bib_data boolean for sitemap checks
record_data['has_bib_data'] = bool(record_data.get('bib_data'))
records_dict[record.bibcode] = record_data

return records_dict
Expand Down
27 changes: 16 additions & 11 deletions adsmp/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,15 @@ def task_update_record(msg):
record = app.update_storage(m.bibcode, 'nonbib_data', m.toJSON())
if record:
logger.debug('Saved record from list: %s', record)
_generate_boost_request(m, type)
elif type == 'metrics_records':
for m in msg.metrics_records:
m = Msg(m, None, None)
bibcodes.append(m.bibcode)
record = app.update_storage(m.bibcode, 'metrics', m.toJSON(including_default_value_fields=True))
if record:
logger.debug('Saved record from list: %s', record)
_generate_boost_request(m, type)
elif type == 'augment':
bibcodes.append(msg.bibcode)
record = app.update_storage(msg.bibcode, 'augment',
Expand All @@ -176,22 +178,25 @@ def task_update_record(msg):
record = app.update_storage(msg.bibcode, type, msg.toJSON())
if record:
logger.debug('Saved record: %s', record)
_generate_boost_request(msg, type)
if type == 'metadata':
# with new bib data we request to augment the affiliation
# that pipeline will eventually respond with a msg to task_update_record
logger.debug('requesting affilation augmentation for %s', msg.bibcode)
app.request_aff_augment(msg.bibcode)
if record:
# Send payload to Boost pipeline
if type != 'boost' and not app._config.get('TESTING_MODE', False):
try:
task_boost_request.apply_async(args=(msg.bibcode,))
except Exception as e:
app.logger.exception('Error generating boost request message for bibcode %s: %s', msg.bibcode, e)

else:
logger.error('Received a message with unclear status: %s', msg)

def _generate_boost_request(msg, msg_type):
# Send payload to Boost pipeline
if msg_type not in app._config.get('IGNORED_BOOST_PAYLOAD_TYPES', ['boost']) and not app._config.get('TESTING_MODE', False):
try:
task_boost_request.apply_async(args=(msg.bibcode,))
except Exception as e:
app.logger.exception('Error generating boost request message for bibcode %s: %s', msg.bibcode, e)
else:
app.logger.debug("Message for bibcode %s has type: %s, Skipping.".format(msg.bibcode, msg_type))

@app.task(queue='update-scixid')
def task_update_scixid(bibcodes, flag):
"""Receives bibcodes to add scix id to the record.
Expand Down Expand Up @@ -490,7 +495,7 @@ def task_cleanup_invalid_sitemaps():
session.query(
SitemapInfo.id,
SitemapInfo.bibcode,
Records.bib_data,
(Records.bib_data.isnot(None)).label('has_bib_data'),
Records.bib_data_updated,
Records.solr_processed,
Records.status
Expand Down Expand Up @@ -519,7 +524,7 @@ def task_cleanup_invalid_sitemaps():
# Convert to dict for should_include_in_sitemap function
record_dict = {
'bibcode': record_data.bibcode,
'bib_data': record_data.bib_data,
'has_bib_data': record_data.has_bib_data,
'bib_data_updated': record_data.bib_data_updated,
'solr_processed': record_data.solr_processed,
'status': record_data.status
Expand Down Expand Up @@ -688,7 +693,7 @@ def task_manage_sitemap(bibcodes, action):
# Apply SOLR filtering - convert record to dict for should_include_in_sitemap
record_dict = {
'bibcode': record.bibcode,
'bib_data': record.bib_data,
'has_bib_data': bool(record.bib_data),
'bib_data_updated': record.bib_data_updated,
'solr_processed': record.solr_processed,
'status': record.status
Expand Down
Loading