Skip to content

Commit ef052b3

Browse files
lior-airisclaude
andcommitted
fix: add DOI to prefix_mapping in _get_not_found_ids
The `_get_not_found_ids` method was missing DOI from its `prefix_mapping` dict. When a paper was looked up using a DOI-prefixed ID (e.g., `DOI:10.1145/792550.792552`), the method would add only the bare DOI value to `found_ids` (without the `DOI:` prefix), causing the input ID to never match. This resulted in a false "IDs not found" warning for every DOI-prefixed lookup, even when the paper was successfully returned. Also changed the matching logic to always add bare external ID values alongside prefixed forms, so both `DOI:10.1145/...` and `10.1145/...` inputs match correctly. This is consistent with how the Semantic Scholar API accepts both forms. Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 93bc2ca commit ef052b3

2 files changed

Lines changed: 27 additions & 3 deletions

File tree

semanticscholar/AsyncSemanticScholar.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ def _get_not_found_ids(self, paper_ids, papers):
228228

229229
prefix_mapping = {
230230
'ARXIV': 'ArXiv',
231+
'DOI': 'DOI',
231232
'MAG': 'MAG',
232233
'ACL': 'ACL',
233234
'PMID': 'PubMed',
@@ -241,11 +242,10 @@ def _get_not_found_ids(self, paper_ids, papers):
241242
found_ids.add(paper.paperId)
242243
if paper.externalIds:
243244
for prefix, value in paper.externalIds.items():
245+
found_ids.add(f'{value}')
244246
if prefix.lower() in prefix_mapping:
245247
found_ids.add(
246248
f'{prefix_mapping[prefix.lower()]}:{value}')
247-
else:
248-
found_ids.add(f'{value}')
249249
found_ids = {id.lower() for id in found_ids}
250250

251251
not_found_ids = [id for id in paper_ids if id.lower() not in found_ids]

tests/test_semanticscholar.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,18 @@ def test_get_papers_return_not_found(self):
284284
self.assertEqual(len(not_found), 1)
285285
self.assertEqual(not_found[0], 'CorpusId:211530585')
286286

287+
def test_get_papers_doi_prefix_not_false_positive(self):
288+
paper = Paper({
289+
'paperId': 'abc123',
290+
'externalIds': {
291+
'DOI': '10.1145/792550.792552',
292+
'CorpusId': 12345
293+
}
294+
})
295+
not_found = self.sch._AsyncSemanticScholar._get_not_found_ids(
296+
['DOI:10.1145/792550.792552'], [paper])
297+
self.assertEqual(not_found, [])
298+
287299
@test_vcr.use_cassette
288300
def test_get_paper_authors(self):
289301
data = self.sch.get_paper_authors('10.2139/ssrn.2250500')
@@ -829,7 +841,19 @@ async def test_get_papers_return_not_found_async(self):
829841
not_found = data[1]
830842
self.assertEqual(len(not_found), 1)
831843
self.assertEqual(not_found[0], 'CorpusId:211530585')
832-
844+
845+
def test_get_papers_doi_prefix_not_false_positive_async(self):
846+
paper = Paper({
847+
'paperId': 'abc123',
848+
'externalIds': {
849+
'DOI': '10.1145/792550.792552',
850+
'CorpusId': 12345
851+
}
852+
})
853+
not_found = self.sch._get_not_found_ids(
854+
['DOI:10.1145/792550.792552'], [paper])
855+
self.assertEqual(not_found, [])
856+
833857
@test_vcr.use_cassette
834858
async def test_get_paper_authors_async(self):
835859
data = await self.sch.get_paper_authors('10.2139/ssrn.2250500')

0 commit comments

Comments
 (0)