Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions metadata_fetcher/fetchers/Fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def fetch_page(self):
f"[{self.collection_id}]: fetching page {self.write_page} "
f"at {page.get('url')}"
)
if 'url' not in page:
raise InvalidHarvestEndpoint(
f"[{self.collection_id}]: invalid harvest endpoint")
try:
response = requests.get(**page)
response.raise_for_status()
Expand Down Expand Up @@ -91,7 +94,7 @@ def build_fetch_request(self):
{'headers': {}, 'params': {}} or any other options accepted by
https://docs.python-requests.org/en/latest/api/#requests.get
"""
pass
raise NotImplementedError

def get_records(self, http_resp):
"""parses http_resp from institutional API into a list of records
Expand All @@ -100,7 +103,7 @@ def get_records(self, http_resp):
by json.dumps into json line format; takes as an argument:
https://docs.python-requests.org/en/latest/api/#requests.Response
"""
pass
raise NotImplementedError

def increment(self, http_resp):
"""increment internal state for fetching the next page
Expand Down
11 changes: 8 additions & 3 deletions metadata_fetcher/fetchers/oai_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class OaiFetcher(Fetcher):
def __init__(self, params):
super(OaiFetcher, self).__init__(params)

self.oai = params.get('harvest_data')
self.oai = params.get('harvest_data', {})

if self.oai.get('harvest_extra_data'):
# see if we have a query string,
Expand Down Expand Up @@ -68,8 +68,13 @@ def build_fetch_request(self):

def check_page(self, http_resp: requests.Response) -> int:
xml_resp = ElementTree.fromstring(http_resp.content)
xml_hits = xml_resp.find(
'oai2:ListRecords', NAMESPACE).findall('oai2:record', NAMESPACE)

xml_list = xml_resp.find('oai2:ListRecords', NAMESPACE)
if xml_list is None:
raise ValueError(
"No records found in response from {http_resp.request.url}")

xml_hits = xml_list.findall('oai2:record', NAMESPACE)

if len(xml_hits) > 0:
logging.debug(
Expand Down