diff --git a/iiify/search.py b/iiify/search.py index a44e219..cfc5e3c 100644 --- a/iiify/search.py +++ b/iiify/search.py @@ -1,29 +1,24 @@ import requests from .resolver import ARCHIVE, URI_PRIFIX from bs4 import BeautifulSoup -import json -def buildSearchURL(identifier, query): +def build_search_url(identifier, query): response = requests.get(f"{ARCHIVE}/metadata/{identifier}") - response.raise_for_status() - + response.raise_for_status() metadata = response.json() - - return f"https://{metadata['server']}/fulltext/inside.php?item_id={identifier}&doc={identifier}&path={metadata['dir']}&q={query}" + doc = [file['name'].split('_djvu.xml')[0] for file in metadata["files"] if '_djvu.xml' in file['name']][0] + return f"https://{metadata['server']}/fulltext/inside.php?item_id={identifier}&doc={doc}&path={metadata['dir']}&q={query}" def iiif_search(identifier, query): - url = buildSearchURL(identifier, query) - - # print (f"Search URL:\n{url}") + url = build_search_url(identifier, query) response = requests.get(url) response.raise_for_status() ia_response = response.json() - # print (json.dumps(ia_response, indent=4)) searchResponse = { "@context":"http://iiif.io/api/presentation/2/context.json", "@id": f"{URI_PRIFIX}/search/{identifier}?q={query}", - "@type":"sc:AnnotationList", + "@type": "sc:AnnotationList", "resources": [ ] @@ -51,8 +46,7 @@ def iiif_search(identifier, query): for box in paragraph['boxes']: x = int(box['l']) - y = int (box['t']) - right = 0 + y = int(box['t']) # If r is missing then use the paragraph if 'r' in box: right = int(box['r']) @@ -61,7 +55,7 @@ def iiif_search(identifier, query): width = right - x height = int(box['b']) - y - page = int(paragraph['page']) - 1 + page = int(paragraph['page']) - 1 if "leaf0_missing" in ia_response and ia_response['leaf0_missing'] == False: page = int(paragraph['page']) @@ -82,4 +76,4 @@ def iiif_search(identifier, query): matchNo += 1 count += 1 - return searchResponse \ No newline at end of file + return searchResponse diff --git a/tests/test_search.py b/tests/test_search.py index 5d40e64..610e7a1 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -74,7 +74,7 @@ def mock_response(url, *args, **kwargs): metadataPatch.side_effect = mock_response - self.assertEqual(search.buildSearchURL("journalofexpedit00ford", "query"), "https://ia601302.us.archive.org/fulltext/inside.php?item_id=journalofexpedit00ford&doc=journalofexpedit00ford&path=/31/items/journalofexpedit00ford&q=query", "Unexpected search query") + self.assertEqual(search.build_search_url("journalofexpedit00ford", "query"), "https://ia601302.us.archive.org/fulltext/inside.php?item_id=journalofexpedit00ford&doc=journalofexpedit00ford&path=/31/items/journalofexpedit00ford&q=query", "Unexpected search query") @patch("requests.get") def multi_box(self, searchPatch):