diff --git a/adsmp/app.py b/adsmp/app.py
index b3fbf9a..994ff6d 100644
--- a/adsmp/app.py
+++ b/adsmp/app.py
@@ -894,7 +894,7 @@ def should_include_in_sitemap(self, record):
         3. If processed, processing isn't too stale
         
         Args:
-            record: Dictionary with record data including bib_data, status, timestamps
+            record: Dictionary with record data including has_bib_data, status, timestamps
             
         Returns:
             bool: True if record should be included in sitemap, False otherwise
@@ -903,14 +903,14 @@ def should_include_in_sitemap(self, record):
         
         # Extract values from record dictionary
         bibcode = record.get('bibcode', None)
-        bib_data = record.get('bib_data', None)
+        has_bib_data = record.get('has_bib_data', None)
         bib_data_updated = record.get('bib_data_updated')
         solr_processed = record.get('solr_processed') 
         status = record.get('status')
         
         # Must have bibliographic data
-        if not bib_data or not bibcode or (isinstance(bib_data, str) and not bib_data.strip()):
-            self.logger.debug('Excluding %s from sitemap: No bibcode or bib_data', bibcode)
+        if not has_bib_data or not bibcode:
+            self.logger.debug('Excluding %s from sitemap: No bibcode or has_bib_data is False', bibcode)
             return False
         
          # Exclude if SOLR failed or if record is being retried (previously failed)
@@ -959,6 +959,8 @@ def get_records_bulk(self, bibcodes, session, load_only=None):
             record_data = {}
             for field in (load_only or ['id', 'bibcode', 'bib_data', 'bib_data_updated', 'solr_processed', 'status']):
                 record_data[field] = getattr(record, field, None)
+            # Add has_bib_data boolean for sitemap checks
+            record_data['has_bib_data'] = bool(record_data.get('bib_data'))
             records_dict[record.bibcode] = record_data
         
         return records_dict
diff --git a/adsmp/tasks.py b/adsmp/tasks.py
index 7620cc9..18cb1cf 100644
--- a/adsmp/tasks.py
+++ b/adsmp/tasks.py
@@ -428,7 +428,7 @@ def task_cleanup_invalid_sitemaps():
                 session.query(
                     SitemapInfo.id,  
                     SitemapInfo.bibcode,
-                    Records.bib_data,
+                    (Records.bib_data.isnot(None)).label('has_bib_data'),
                     Records.bib_data_updated,
                     Records.solr_processed,
                     Records.status
@@ -457,7 +457,7 @@ def task_cleanup_invalid_sitemaps():
                 # Convert to dict for should_include_in_sitemap function
                 record_dict = {
                     'bibcode': record_data.bibcode,
-                    'bib_data': record_data.bib_data,
+                    'has_bib_data': record_data.has_bib_data,
                     'bib_data_updated': record_data.bib_data_updated,
                     'solr_processed': record_data.solr_processed,
                     'status': record_data.status
@@ -626,7 +626,7 @@ def task_manage_sitemap(bibcodes, action):
                         # Apply SOLR filtering - convert record to dict for should_include_in_sitemap
                         record_dict = {
                             'bibcode': record.bibcode,
-                            'bib_data': record.bib_data,
+                            'has_bib_data': bool(record.bib_data),
                             'bib_data_updated': record.bib_data_updated,
                             'solr_processed': record.solr_processed,
                             'status': record.status
diff --git a/adsmp/tests/test_app.py b/adsmp/tests/test_app.py
index 9c4bb21..fabf6e3 100644
--- a/adsmp/tests/test_app.py
+++ b/adsmp/tests/test_app.py
@@ -471,7 +471,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 1: Record with no bib_data (should be excluded)
         record_no_data = {
             'bibcode': '2023NoData..1..1A',
-            'bib_data': None,
+            'has_bib_data': False,
             'status': 'success'
         }
         self.assertFalse(self.app.should_include_in_sitemap(record_no_data), 
@@ -480,7 +480,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 2: Record with empty bib_data string (should be excluded)
         record_empty_data = {
             'bibcode': '2023Empty..1..1A',
-            'bib_data': '',
+            'has_bib_data': False,
             'status': 'success'
         }
         self.assertFalse(self.app.should_include_in_sitemap(record_empty_data), 
@@ -489,7 +489,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 3: Record with solr-failed status (should be excluded)
         record_solr_failed = {
             'bibcode': '2023Failed..1..1A',
-            'bib_data': '{"title": "Test"}',
+            'has_bib_data': True,
             'status': 'solr-failed'
         }
         self.assertFalse(self.app.should_include_in_sitemap(record_solr_failed), 
@@ -498,7 +498,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 4: Record with retrying status (should be excluded)
         record_retrying = {
             'bibcode': '2023Retrying..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'retrying'
         }
         self.assertFalse(self.app.should_include_in_sitemap(record_retrying), 
@@ -507,7 +507,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 5: Record with None status (should be included)
         record_none_status = {
             'bibcode': '2023NoneStatus..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': None
         }
         self.assertTrue(self.app.should_include_in_sitemap(record_none_status), 
@@ -516,7 +516,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 6: Record with success status (should be included)
         record_success = {
             'bibcode': '2023Success..1..1A',
-            'bib_data': '{"title": "Test"}',
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': base_time - timedelta(days=1)
         }
@@ -526,7 +526,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 7: Record with metrics-failed status (should be included - not SOLR-related)
         record_metrics_failed = {
             'bibcode': '2023MetricsFailed..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'metrics-failed'
         }
         self.assertTrue(self.app.should_include_in_sitemap(record_metrics_failed), 
@@ -535,7 +535,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 8: Record with links-failed status (should be included - not SOLR-related)
         record_links_failed = {
             'bibcode': '2023LinksFailed..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'links-failed'
         }
         self.assertTrue(self.app.should_include_in_sitemap(record_links_failed), 
@@ -544,7 +544,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 9: Record with None status and no solr_processed (should be included - not yet processed)
         record_not_processed = {
             'bibcode': '2023NotProcessed..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': None,
             'solr_processed': None
         }
@@ -554,7 +554,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 10: Record with recent solr_processed (should be included)
         record_recent_solr = {
             'bibcode': '2023Recent..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': base_time - timedelta(days=1),
             'solr_processed': base_time  # More recent than bib_data_updated
@@ -565,7 +565,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 11: Record with stale solr_processed (should be included with warning)
         record_stale_solr = {
             'bibcode': '2023Stale..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': base_time,
             'solr_processed': base_time - timedelta(days=6)  # 6 days stale (> 5 day threshold)
@@ -576,7 +576,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 12: Record with exactly 5+ days staleness (boundary condition)
         record_boundary = {
             'bibcode': '2023Boundary..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': base_time,
             'solr_processed': base_time - timedelta(days=5, seconds=1)  # Just over 5 days
@@ -587,7 +587,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 13: Record with no timestamps (should be included)
         record_no_timestamps = {
             'bibcode': '2023NoTimestamps..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': None,
             'solr_processed': None
@@ -598,7 +598,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 14: Record with bib_data_updated but no solr_processed (should be included)
         record_no_solr_time = {
             'bibcode': '2023NoSolrTime..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': base_time,
             'solr_processed': None
@@ -609,7 +609,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 15: Record with solr_processed but no bib_data_updated (should be included)
         record_no_bib_time = {
             'bibcode': '2023NoBibTime..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': None,
             'solr_processed': base_time
@@ -620,7 +620,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 16: Record with very fresh processing (should be included)
         record_fresh = {
             'bibcode': '2023Fresh..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': base_time - timedelta(minutes=30),
             'solr_processed': base_time
@@ -631,7 +631,7 @@ def test_should_include_in_sitemap_comprehensive(self):
         # Test 17: Record with moderate lag (2 days, should be included without warning)
         record_moderate_lag = {
             'bibcode': '2023Moderate..1..1A',
-            'bib_data': {'title': 'Test'},
+            'has_bib_data': True,
             'status': 'success',
             'bib_data_updated': base_time - timedelta(days=2),
             'solr_processed': base_time
diff --git a/adsmp/tests/test_tasks.py b/adsmp/tests/test_tasks.py
index bf1072f..0f1f0c7 100644
--- a/adsmp/tests/test_tasks.py
+++ b/adsmp/tests/test_tasks.py
@@ -1227,8 +1227,6 @@ def test_task_cleanup_invalid_sitemaps_comprehensive_invalid_cases(self):
         test_cases = [
             # (bibcode, bib_data, status, description)
             ('2023NoData..1..1A', None, 'success', 'No bib_data'),
-            ('2023EmptyData..1..1B', '', 'success', 'Empty bib_data'),
-            ('2023EmptyData2..1..1C', '   ', 'success', 'Whitespace-only bib_data'),
             ('2023SolrFailed..1..1D', '{"title": "Test"}', 'solr-failed', 'SOLR failed status'),
             ('2023Retrying..1..1E', '{"title": "Test"}', 'retrying', 'Retrying status'),
         ]
@@ -1297,15 +1295,15 @@ def test_task_cleanup_invalid_sitemaps_comprehensive_invalid_cases(self):
             
             # Verify setup
             total_records = session.query(SitemapInfo).count()
-            self.assertEqual(total_records, 7, "Should have 7 records (5 invalid + 2 valid)")
+            self.assertEqual(total_records, 5, "Should have 5 records (3 invalid + 2 valid)")
         
         # Execute cleanup
         with patch.object(self.app, 'delete_sitemap_files') as mock_delete_files:
             result = tasks.task_cleanup_invalid_sitemaps()
         
-        # Verify all invalid records were removed
-        self.assertEqual(result['invalid_removed'], 5, "Should remove all 5 invalid records")
-        self.assertEqual(result['total_processed'], 7, "Should process all 7 records")
+        # Verify invalid records were removed
+        self.assertEqual(result['invalid_removed'], 3, "Should remove 3 invalid records (None, solr-failed, retrying)")
+        self.assertEqual(result['total_processed'], 5, "Should process all 5 records")
         self.assertTrue(result['files_regenerated'], "Should indicate files need regeneration")
         
         # Verify files were flagged for regeneration (1 file with mixed valid/invalid records)
@@ -1313,16 +1311,18 @@ def test_task_cleanup_invalid_sitemaps_comprehensive_invalid_cases(self):
         
         # Verify database state
         with self.app.session_scope() as session:
-            # Two valid records should remain
+            # Only 2 valid records should remain
             remaining_records = session.query(SitemapInfo).all()
             self.assertEqual(len(remaining_records), 2, "Should have 2 remaining valid records")
             remaining_bibcodes = {r.bibcode for r in remaining_records}
-            self.assertEqual(remaining_bibcodes, {valid_bibcode, valid_bibcode_in_mixed_file}, "Both valid records should remain")
+            expected_bibcodes = {valid_bibcode, valid_bibcode_in_mixed_file}
+            self.assertEqual(remaining_bibcodes, expected_bibcodes, "Both valid records should remain")
             
-            # All invalid records should be gone
-            for bibcode, _, _, description in test_cases:
+            # All invalid records (None bib_data, solr-failed, retrying) should be removed
+            removed_bibcodes = {'2023NoData..1..1A', '2023SolrFailed..1..1D', '2023Retrying..1..1E'}
+            for bibcode in removed_bibcodes:
                 invalid_count = session.query(SitemapInfo).filter_by(bibcode=bibcode).count()
-                self.assertEqual(invalid_count, 0, f"Invalid record should be removed: {description}")
+                self.assertEqual(invalid_count, 0, f"Invalid record should be removed: {bibcode}")
 
     def test_delete_by_bibcode_marks_sitemap_files_for_regeneration(self):
         """Test that delete_by_bibcode properly marks affected sitemap files for regeneration"""