From 6e40402aabaef6f1ffb36e76562fd0629d668ffa Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Mon, 27 Jan 2025 15:40:30 +0000 Subject: [PATCH] only keeps update files that are newer than one year --- aim/hathifiles/poll.py | 24 ++++++++++++++++++++-- tests/hathifiles/test_poll.py | 38 ++++++++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/aim/hathifiles/poll.py b/aim/hathifiles/poll.py index 131f253..fa55da1 100644 --- a/aim/hathifiles/poll.py +++ b/aim/hathifiles/poll.py @@ -1,6 +1,7 @@ import requests import json import os +from datetime import datetime, timedelta from typing import Type from aim.services import S @@ -25,7 +26,7 @@ def get_latest_update_files(): for just a list of update files. Returns: - _type_: flat list of update file names + list: flat list of update file names """ return filter_for_update_files(get_hathi_file_list()) @@ -97,6 +98,23 @@ def notify_webhook(self): else: response.raise_for_status() + @property + def slim_store(self): + """ + Removes files from the store that are over one year old + + Returns: + list: list of update files that are newer than one year + """ + last_year = datetime.today() - timedelta(days=365) + slimmed_store = [] + for file_name in self.store: + end = file_name.split("_")[2] + date = datetime.strptime(end.split(".")[0], "%Y%m%d") + if date > last_year: + slimmed_store.append(file_name) + return slimmed_store + def replace_store(self, store_path: str = S.hathifiles_store_path): """ Replaces the store file with a list of hathifile update files @@ -105,7 +123,9 @@ def replace_store(self, store_path: str = S.hathifiles_store_path): store_path (str, optional): path to hathifiles store file. Defaults to S.hathifiles_store_path. """ with open(store_path, "w") as f: - json.dump((self.store + self.new_files), f, ensure_ascii=False, indent=4) + json.dump( + (self.slim_store + self.new_files), f, ensure_ascii=False, indent=4 + ) S.logger.info("Update store SUCCESS") diff --git a/tests/hathifiles/test_poll.py b/tests/hathifiles/test_poll.py index 4346f78..a339d97 100644 --- a/tests/hathifiles/test_poll.py +++ b/tests/hathifiles/test_poll.py @@ -2,6 +2,7 @@ import json import responses from responses import matchers +from datetime import datetime, timedelta import os from requests.exceptions import HTTPError from structlog.testing import capture_logs @@ -29,6 +30,24 @@ def cleanup_temp_files(temp_dir): os.remove(file) +@pytest.fixture +def today_file_name(): + today = datetime.today().strftime("%Y%m%d") + return f"hathi_upd_{today}.txt.gz" + + +@pytest.fixture +def yesterday_file_name(): + yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y%m%d") + return f"hathi_upd_{yesterday}.txt.gz" + + +@pytest.fixture +def last_year_file_name(): + last_year = (datetime.today() - timedelta(days=366)).strftime("%Y%m%d") + return f"hathi_upd_{last_year}.txt.gz" + + @pytest.fixture def file_list_data(): with open("tests/fixtures/hathifiles/poll/hathi_file_list.json") as f: @@ -153,16 +172,29 @@ def test_new_file_handler_notify_webhook_fail(): assert webhook_stub.call_count == 1 -def test_new_file_handler_replace_store(temp_dir): +def test_new_file_handler_replace_store( + temp_dir, today_file_name, yesterday_file_name, last_year_file_name +): store_path = temp_dir / "test_store_file.json" with open(store_path, "w") as f: f.write("This_is_a_line") - handler = NewFileHandler(new_files=["new_file"], store=["old_file"]) + handler = NewFileHandler( + new_files=[today_file_name], store=[yesterday_file_name, last_year_file_name] + ) handler.replace_store(store_path) with open(store_path, "r") as f: file_contents = json.load(f) - assert file_contents == ["old_file", "new_file"] + assert file_contents == [yesterday_file_name, today_file_name] + + +def test_new_file_handler_slim_store(yesterday_file_name, last_year_file_name): + handler = NewFileHandler( + new_files=["new_file"], store=[yesterday_file_name, last_year_file_name] + ) + + slimmed_store = handler.slim_store + assert slimmed_store == [yesterday_file_name]