Skip to content

Commit bfe2822

Browse files
lior-airisclaude
andcommitted
fix: add DOI to prefix_mapping in _get_not_found_ids
The `_get_not_found_ids` method was missing DOI from its `prefix_mapping` dict. When a paper was looked up using a DOI-prefixed ID (e.g., `DOI:10.1145/792550.792552`), the method would add only the bare DOI value to `found_ids` (without the `DOI:` prefix), causing the input ID to never match. This resulted in a false "IDs not found" warning for every DOI-prefixed lookup, even when the paper was successfully returned. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 93bc2ca commit bfe2822

2 files changed

Lines changed: 26 additions & 1 deletion

File tree

semanticscholar/AsyncSemanticScholar.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ def _get_not_found_ids(self, paper_ids, papers):
228228

229229
prefix_mapping = {
230230
'ARXIV': 'ArXiv',
231+
'DOI': 'DOI',
231232
'MAG': 'MAG',
232233
'ACL': 'ACL',
233234
'PMID': 'PubMed',

tests/test_semanticscholar.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,18 @@ def test_get_papers_return_not_found(self):
284284
self.assertEqual(len(not_found), 1)
285285
self.assertEqual(not_found[0], 'CorpusId:211530585')
286286

287+
def test_get_papers_doi_prefix_not_false_positive(self):
288+
paper = Paper({
289+
'paperId': 'abc123',
290+
'externalIds': {
291+
'DOI': '10.1145/792550.792552',
292+
'CorpusId': 12345
293+
}
294+
})
295+
not_found = self.sch._AsyncSemanticScholar._get_not_found_ids(
296+
['DOI:10.1145/792550.792552'], [paper])
297+
self.assertEqual(not_found, [])
298+
287299
@test_vcr.use_cassette
288300
def test_get_paper_authors(self):
289301
data = self.sch.get_paper_authors('10.2139/ssrn.2250500')
@@ -829,7 +841,19 @@ async def test_get_papers_return_not_found_async(self):
829841
not_found = data[1]
830842
self.assertEqual(len(not_found), 1)
831843
self.assertEqual(not_found[0], 'CorpusId:211530585')
832-
844+
845+
def test_get_papers_doi_prefix_not_false_positive_async(self):
846+
paper = Paper({
847+
'paperId': 'abc123',
848+
'externalIds': {
849+
'DOI': '10.1145/792550.792552',
850+
'CorpusId': 12345
851+
}
852+
})
853+
not_found = self.sch._get_not_found_ids(
854+
['DOI:10.1145/792550.792552'], [paper])
855+
self.assertEqual(not_found, [])
856+
833857
@test_vcr.use_cassette
834858
async def test_get_paper_authors_async(self):
835859
data = await self.sch.get_paper_authors('10.2139/ssrn.2250500')

0 commit comments

Comments
 (0)