Skip to content

Commit

Permalink
only keeps update files that are newer than one year
Browse files Browse the repository at this point in the history
  • Loading branch information
niquerio committed Jan 27, 2025
1 parent c823fc3 commit 6e40402
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 5 deletions.
24 changes: 22 additions & 2 deletions aim/hathifiles/poll.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import requests
import json
import os
from datetime import datetime, timedelta
from typing import Type
from aim.services import S

Expand All @@ -25,7 +26,7 @@ def get_latest_update_files():
for just a list of update files.
Returns:
_type_: flat list of update file names
list: flat list of update file names
"""
return filter_for_update_files(get_hathi_file_list())

Expand Down Expand Up @@ -97,6 +98,23 @@ def notify_webhook(self):
else:
response.raise_for_status()

@property
def slim_store(self):
"""
Removes files from the store that are over one year old
Returns:
list: list of update files that are newer than one year
"""
last_year = datetime.today() - timedelta(days=365)
slimmed_store = []
for file_name in self.store:
end = file_name.split("_")[2]
date = datetime.strptime(end.split(".")[0], "%Y%m%d")
if date > last_year:
slimmed_store.append(file_name)
return slimmed_store

def replace_store(self, store_path: str = S.hathifiles_store_path):
"""
Replaces the store file with a list of hathifile update files
Expand All @@ -105,7 +123,9 @@ def replace_store(self, store_path: str = S.hathifiles_store_path):
store_path (str, optional): path to hathifiles store file. Defaults to S.hathifiles_store_path.
"""
with open(store_path, "w") as f:
json.dump((self.store + self.new_files), f, ensure_ascii=False, indent=4)
json.dump(
(self.slim_store + self.new_files), f, ensure_ascii=False, indent=4
)

S.logger.info("Update store SUCCESS")

Expand Down
38 changes: 35 additions & 3 deletions tests/hathifiles/test_poll.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import responses
from responses import matchers
from datetime import datetime, timedelta
import os
from requests.exceptions import HTTPError
from structlog.testing import capture_logs
Expand Down Expand Up @@ -29,6 +30,24 @@ def cleanup_temp_files(temp_dir):
os.remove(file)


@pytest.fixture
def today_file_name():
today = datetime.today().strftime("%Y%m%d")
return f"hathi_upd_{today}.txt.gz"


@pytest.fixture
def yesterday_file_name():
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y%m%d")
return f"hathi_upd_{yesterday}.txt.gz"


@pytest.fixture
def last_year_file_name():
last_year = (datetime.today() - timedelta(days=366)).strftime("%Y%m%d")
return f"hathi_upd_{last_year}.txt.gz"


@pytest.fixture
def file_list_data():
with open("tests/fixtures/hathifiles/poll/hathi_file_list.json") as f:
Expand Down Expand Up @@ -153,16 +172,29 @@ def test_new_file_handler_notify_webhook_fail():
assert webhook_stub.call_count == 1


def test_new_file_handler_replace_store(temp_dir):
def test_new_file_handler_replace_store(
temp_dir, today_file_name, yesterday_file_name, last_year_file_name
):
store_path = temp_dir / "test_store_file.json"
with open(store_path, "w") as f:
f.write("This_is_a_line")

handler = NewFileHandler(new_files=["new_file"], store=["old_file"])
handler = NewFileHandler(
new_files=[today_file_name], store=[yesterday_file_name, last_year_file_name]
)

handler.replace_store(store_path)

with open(store_path, "r") as f:
file_contents = json.load(f)

assert file_contents == ["old_file", "new_file"]
assert file_contents == [yesterday_file_name, today_file_name]


def test_new_file_handler_slim_store(yesterday_file_name, last_year_file_name):
handler = NewFileHandler(
new_files=["new_file"], store=[yesterday_file_name, last_year_file_name]
)

slimmed_store = handler.slim_store
assert slimmed_store == [yesterday_file_name]

0 comments on commit 6e40402

Please sign in to comment.