diff --git a/python_scripts/one_time_scripts/update_locationbuildingfloor.py b/python_scripts/one_time_scripts/update_locationbuildingfloor.py new file mode 100644 index 0000000..ac176e7 --- /dev/null +++ b/python_scripts/one_time_scripts/update_locationbuildingfloor.py @@ -0,0 +1,180 @@ +#!/usr/bin/python3 +# This script retrieves location IDs with a given building name passed in the oB (originalBuilding) argument using an SQL +# query to the ASpace database. Then it takes a list of those IDs and retrieves their JSON data from the API and +# updates the building field with the argument passed in the uB (updatedBuilding) argument. Then it posts the updated +# location JSON data to ArchivesSpace, saving the original JSON data in a given .jsonl file and logging the results of +# the update to a given log file. +# NOTE: put "" around the building name if it contains spaces, like so: -uB="NMAH-FSD, Building 92" +import argparse +import os +import sys + +from copy import deepcopy +from dotenv import load_dotenv, find_dotenv +from http.client import HTTPException +from loguru import logger +from pathlib import Path + +sys.path.append(os.path.dirname('python_scripts')) # Needed to import functions from utilities.py +from python_scripts.utilities import ASpaceAPI, ASpaceDatabase, record_error, write_to_file + +# Find and load environment-specific .env file +env_file = find_dotenv(f'.env.{os.getenv("ENV", "dev")}') +load_dotenv(env_file) + +def parseArguments(): + """Parses the arguments fed to the script from the terminal or within a run configuration""" + parser = argparse.ArgumentParser() + + parser.add_argument("-oB", "--originalBuilding", help="the original building name to search for", + type=str) + parser.add_argument("-uB", "--updatedBuilding", help="the updated building name", type=str) + parser.add_argument("-mR", "--moveRoomFloor", help="move Room value to Floor if True", type=bool) + parser.add_argument("jsonPath", help="path to the JSONL file for storing original location data", + type=str) + parser.add_argument("logFolder", help="path to the log folder for storing log files", type=str) + parser.add_argument("-dR", "--dry-run", help="dry run?", action='store_true') + parser.add_argument("--version", action="version", version='%(prog)s - Version 1.0') + + return parser.parse_args() + +def location_ids(original_building_name, aspace_db_connection): + """ + Performs an SQL search for a building name and returns the location IDs of all that match. + + Args: + original_building_name (str): the text of the building name to search for in the location table + aspace_db_connection (ASpaceDatabase instance): connection instance to the ASpace database + + Returns: + matching_ids (list): all the matching location IDs + """ + find_building = ('SELECT location.id FROM location ' + 'WHERE ' + f'location.building = "{original_building_name}"') + sql_results = aspace_db_connection.query_database(find_building) + formatted_results = [result[0] for result in sql_results] + return formatted_results + +def update_building_name(location_ids, update_name, aspace_connection): + """ + Takes a location JSON record and replaces the building value with the provided updated_name. + + Args: + location_ids (list): the IDs for all the locations to update + update_name (str): the building name to update on the given location IDs + aspace_connection (ASpaceAPI Instance): an instance of the ASpaceAPI class from utilities.py + + Returns: + update_response (list): the response from the POST request + """ + try: + post_message = aspace_connection.aspace_client.post('locations/batch_update', + json={"record_uris": location_ids, + "building": update_name}).json() + except HTTPException as get_error: + record_error(f'update_building_name() - Unable to make post request with record_uris: {location_ids}; ' + f'building: {update_name}', get_error) + else: + if 'error' in post_message: + record_error(f'get_object() - Unable to retrieve object in: ' + f'{location_ids}', + post_message) + else: + return post_message + +def move_room_to_floor(location_json): + """ + Takes a location JSON record and replaces the floor value with the room value and deletes the room value from + the room field. + + Args: + location_json (dict): the JSON data for the location object + + Returns: + updated_location (dict): the updated JSON data with the updated floor and room values + """ + updated_location = deepcopy(location_json) + if 'room' in updated_location and 'floor' in updated_location: + if updated_location['room']: + updated_location['floor'] = updated_location['room'] + updated_location['room'] = '' + return updated_location + else: + record_error(f'move_room_to_floor() - No value in Room field to move to Floor field for {location_json}', ValueError) + return None + elif 'room' in updated_location and 'floor' not in updated_location: + updated_location['floor'] = updated_location['room'] + updated_location['room'] = '' + return updated_location + else: + record_error(f'move_room_to_floor() - Room field not found in {location_json}', ValueError) + return None + + + +def main(original_building, jsonl_path, updated_building=None, move_floor=False, dry_run=False): + """ + This script retrieves location IDs with a given building name passed in the oB (originalBuilding) argument using an SQL + query to the ASpace database. Then it takes a list of those IDs and retrieves their JSON data from the API and + updates the building field with the argument passed in the uB (updatedBuilding) argument. Then it posts the updated + location JSON data to ArchivesSpace, saving the original JSON data in a given .jsonl file and logging the results of + the update to a given log file. + + NOTE: put "" around the building name if it contains spaces, like so: -uB="NMAH-FSD, Building 92" + + Args: + original_building (str): the text of the building name to search for in the Locations table + updated_building (str): the updated text of the building name for matching Locations + move_floor (bool): if True, move the value in Room to the Floor field and remove Room value from Room field + jsonl_path (str): filepath of the jsonL file for storing JSON data of objects before updates - backup + dry_run (bool): if True, it prints the changed object_json but does not post the changes to ASpace + """ + local_aspace = ASpaceAPI(os.getenv('as_api'), os.getenv('as_un'), os.getenv('as_pw')) + as_database = ASpaceDatabase(os.getenv('db_un'), os.getenv('db_pw'), os.getenv('db_host'), os.getenv('db_name'), + os.getenv('db_port')) + matching_ids = location_ids(original_building, as_database) + location_uris = [] + for location_id in matching_ids: + location_json = local_aspace.get_object('locations', location_id) + write_to_file(jsonl_path, location_json) + location_uris.append(f'/locations/{location_id}') + if move_floor: + updated_location = move_room_to_floor(location_json) + if dry_run: + print(f'This is the updated location: {updated_location}') + else: + if updated_location: + update_result = local_aspace.update_object(updated_location['uri'], updated_location) + print(update_result) + logger.info(update_result) + if updated_building: + updated_location = update_building_name(location_uris[:10], updated_building, local_aspace) + if dry_run: + print(f'This is the updated location: {updated_location}') + else: + if updated_location: + update_result = local_aspace.update_object(updated_location['uri'], updated_location) + print(update_result) + logger.info(update_result) + + +# Call with `python update_locationbuilding.py -ob= -uB= -mR= .jsonl ` +if __name__ == '__main__': + args = parseArguments() + + # Set up log file + logger.remove() + log_path = Path(args.logFolder, 'update_locationbuilding_{time:YYYY-MM-DD}.log') + logger.add(str(log_path), format="{time}-{level}: {message}") + + # Print arguments + logger.info(f'Running {sys.argv[0]} script with following arguments: ') + print(f'Running {sys.argv[0]} script with following arguments: ') + for arg in args.__dict__: + logger.info(str(arg) + ": " + str(args.__dict__[arg])) + print(str(arg) + ": " + str(args.__dict__[arg])) + + # Run function + main(original_building=args.originalBuilding, jsonl_path=args.jsonPath, updated_building=args.updatedBuilding, + move_floor=args.moveRoomFloor, dry_run=args.dry_run) \ No newline at end of file diff --git a/python_scripts/repeatable/update_locations.py b/python_scripts/repeatable/update_locationrepo.py similarity index 100% rename from python_scripts/repeatable/update_locations.py rename to python_scripts/repeatable/update_locationrepo.py diff --git a/python_scripts/utilities.py b/python_scripts/utilities.py index 4da9bde..1252325 100644 --- a/python_scripts/utilities.py +++ b/python_scripts/utilities.py @@ -1,6 +1,4 @@ #!/usr/bin/env python -from http.client import HTTPException - import mysql.connector as mysql import csv import jsonlines @@ -8,6 +6,7 @@ from asnake.client import ASnakeClient from asnake.client.web_client import ASnakeAuthError +from http.client import HTTPException from jsonlines import InvalidLineError from loguru import logger from mysql.connector import errorcode @@ -224,7 +223,7 @@ def query_database(self, statement): """ try: self.cursor.execute(statement) - except mysql.Error as error: + except mysql.ProgrammingError as error: record_error('query_database() - SQL query was invalid', error) raise error else: diff --git a/test_data/utilities_testdata.py b/test_data/utilities_testdata.py index f42aa77..52284e0 100644 --- a/test_data/utilities_testdata.py +++ b/test_data/utilities_testdata.py @@ -1,6 +1,6 @@ test_record_type = 'digital_objects' -test_object_id = 20 -test_object_repo_uri = '/repositories/12' +test_object_id = 1250203 +test_object_repo_uri = '/repositories/11' test_object_user_identifier = 'NMAI.AC.066.ref21.1' test_digital_object_dates = {'lock_version': 1, diff --git a/tests/updatelocationbuilding_tests.py b/tests/updatelocationbuilding_tests.py new file mode 100644 index 0000000..097e26d --- /dev/null +++ b/tests/updatelocationbuilding_tests.py @@ -0,0 +1,69 @@ +# This script consists of unittests for update_locationbuilding.py +import contextlib +import io +import unittest + +from python_scripts.utilities import * +from python_scripts.one_time_scripts.update_locationbuildingfloor import * +from test_data.location_testdata import * + +# Hardcode to dev env +env_file = find_dotenv('.env.dev') +load_dotenv(env_file) +local_aspace = ASpaceAPI(os.getenv('as_api'), os.getenv('as_un'), os.getenv('as_pw')) +test_dbconnection = ASpaceDatabase(os.getenv('db_un'), os.getenv('db_pw'), os.getenv('db_host'), os.getenv('db_name'), + int(os.getenv('db_port'))) + + +class TestUpdateLocationBuilding(unittest.TestCase): + + def test_location_ids(self): + """Tests that an existing building location returns a list of matching location IDs""" + test_building = "NMAH-SHF, Building 19" + test_results = location_ids(test_building, test_dbconnection) + self.assertIsInstance(test_results, list) + self.assertIsInstance(test_results[0], int) + self.assertTrue(len(test_results) > 0) + +class TestUpdateBuildingName(unittest.TestCase): + + def test_valid_name(self): + """Tests that a location JSON object is updated with a new building name in the building field""" + test_name = "Gutiokipanja" + test_location_uris = ["/locations/48008", "/locations/48009"] # TODO: change these? + updated_test_location = update_building_name(test_location_uris, test_name, local_aspace) + self.assertIsInstance(updated_test_location, list) + self.assertEqual(len(updated_test_location), 2) + +class TestMoveRoomToFloor(unittest.TestCase): + + def test_good_room(self): + """Tests that good data found in the Room field overwrites the data in the Floor field and deleted from + the Room field""" + room_to_floor_updated = move_room_to_floor(test_location) + self.assertEqual(room_to_floor_updated['floor'], test_location['room']) + self.assertEqual(room_to_floor_updated['room'], '') + + def test_null_room(self): + """Tests that if there is no value in the room field, the floor field remains the same""" + test_location['room'] = None + no_room = move_room_to_floor(test_location) + self.assertIsNone(no_room) + + def test_missing_keys(self): + """Tests that if either the Floor or Room keys are absent in the given location, nothing is returned""" + copied_location = deepcopy(test_location) + copied_location.pop('floor') + copied_location.pop('room') + f = io.StringIO() + with contextlib.redirect_stdout(f): + no_room_floor = move_room_to_floor(copied_location) + self.assertTrue( + r"""move_room_to_floor() - Room field not found in""" in f.getvalue()) + self.assertIsNone(no_room_floor) + + + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/updatelocations_tests.py b/tests/updatelocationrepo_tests.py similarity index 96% rename from tests/updatelocations_tests.py rename to tests/updatelocationrepo_tests.py index 0aaa129..d5b1014 100644 --- a/tests/updatelocations_tests.py +++ b/tests/updatelocationrepo_tests.py @@ -3,7 +3,7 @@ import unittest from dotenv import load_dotenv, find_dotenv -from python_scripts.repeatable.update_locations import * +from python_scripts.repeatable.update_locationrepo import * from python_scripts.utilities import * # Hardcode to dev env diff --git a/tests/utilities_tests.py b/tests/utilities_tests.py index b3fb864..4bf49e0 100644 --- a/tests/utilities_tests.py +++ b/tests/utilities_tests.py @@ -2,6 +2,7 @@ import contextlib import io import json +import mysql.connector import os import unittest @@ -60,7 +61,7 @@ def test_get_digobjs_page(self): def test_get_digobjs_set(self): """Tests getting an ID Set of digital objects returns the list of digital objects from the API""" self.good_aspace_connection.get_repo_info() - id_set_values = [20,1204715,314276] + id_set_values = [183037,1204715,314276] test_digitalobjects = self.good_aspace_connection.get_objects(self.good_aspace_connection.repo_info[1]['uri'], test_record_type, ('id_set', id_set_values)) @@ -86,7 +87,7 @@ def test_bad_digobj(self): test_object_repo_uri) self.assertTrue( - r"""get_object() - Unable to retrieve object with provided URI: /repositories/12/digital_objects/10000000000000000: {'error': 'DigitalObject not found'}""" in f.getvalue()) + r"""get_object() - Unable to retrieve object with provided URI: /repositories/11/digital_objects/10000000000000000: {'error': 'DigitalObject not found'}""" in f.getvalue()) def test_aspace_post_response(self): """Tests that a post with an existing URI returns Status: Updated and no warnings""" @@ -210,6 +211,17 @@ def test_bad_query(self): with self.assertRaises(mysql.Error): test_dbconnection.query_database(test_bad_query) + def test_invalid_query(self): + """Tests that an invalid building location name (") returns an escaped error""" + invalid_query_syntax = '"' + test_query = ('SELECT location.id FROM location ' + 'WHERE ' + f'location.building = "{invalid_query_syntax}"') + with self.assertRaises(mysql.ProgrammingError) as raised_exception: + test_dbconnection.query_database(test_query) + mysql_error = raised_exception.exception + self.assertEqual(mysql_error.msg, r'''You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '"""' at line 1''') + class TestClientLogin(unittest.TestCase):