Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions python_scripts/one_time_scripts/update_locationbuilding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/usr/bin/python3
# This script retrieves location IDs with a given building name passed in the oB (originalBuilding) argument using an SQL
# query to the ASpace database. Then it takes a list of those IDs and retrieves their JSON data from the API and
# updates the building field with the argument passed in the uB (updatedBuilding) argument. Then it posts the updated
# location JSON data to ArchivesSpace, saving the original JSON data in a given .jsonl file and logging the results of
# the update to a given log file.
# NOTE: put "" around the building name if it contains spaces, like so: -uB="NMAH-FSD, Building 92"
import argparse
import os
import sys

from copy import deepcopy
from dotenv import load_dotenv, find_dotenv
from loguru import logger
from pathlib import Path

sys.path.append(os.path.dirname('python_scripts')) # Needed to import functions from utilities.py
from python_scripts.utilities import ASpaceAPI, ASpaceDatabase, write_to_file

# Find and load environment-specific .env file
env_file = find_dotenv(f'.env.{os.getenv("ENV", "dev")}')
load_dotenv(env_file)

def parseArguments():
"""Parses the arguments fed to the script from the terminal or within a run configuration"""
parser = argparse.ArgumentParser()

parser.add_argument("-oB", "--originalBuilding", help="the original building name to search for",
type=str)
parser.add_argument("-uB", "--updatedBuilding", help="the updated building name", type=str)
parser.add_argument("jsonPath", help="path to the JSONL file for storing original location data",
type=str)
parser.add_argument("logFolder", help="path to the log folder for storing log files", type=str)
parser.add_argument("-dR", "--dry-run", help="dry run?", action='store_true')
parser.add_argument("--version", action="version", version='%(prog)s - Version 1.0')

return parser.parse_args()

def location_ids(original_building_name, aspace_db_connection):
"""
Performs an SQL search for a building name and returns the location IDs of all that match.

Args:
original_building_name (str): the text of the building name to search for in the location table
aspace_db_connection (ASpaceDatabase instance): connection instance to the ASpace database

Returns:
matching_ids (list): all the matching location IDs
"""
find_building = ('SELECT location.id FROM location '
'WHERE '
f'location.building = "{original_building_name}"')
sql_results = aspace_db_connection.query_database(find_building)
formatted_results = [result[0] for result in sql_results]
return formatted_results

def update_building_name(location_json, updated_name):
"""
Takes a location JSON record and replaces the building value with the provided updated_name.

Args:
location_json (dict): the JSON data for the location object
updated_name (str): the new building name

Returns:
updated_location (dict): the updated JSON data without leading zeros if present in coordinate indicators
"""
updated_location = deepcopy(location_json)
if 'building' in updated_location:
updated_location['building'] = updated_name
else:
logger.error(f'update_building_name() - Error finding key "building" in location JSON for {location_json}')
print(f'update_building_name() - Error finding key "building" in location JSON for {location_json}')
return None
return updated_location


def main(original_building, updated_building, jsonl_path, dry_run=False):
"""
This script retrieves location IDs with a given building name passed in the oB (originalBuilding) argument using an SQL
query to the ASpace database. Then it takes a list of those IDs and retrieves their JSON data from the API and
updates the building field with the argument passed in the uB (updatedBuilding) argument. Then it posts the updated
location JSON data to ArchivesSpace, saving the original JSON data in a given .jsonl file and logging the results of
the update to a given log file.

NOTE: put "" around the building name if it contains spaces, like so: -uB="NMAH-FSD, Building 92"

Args:
original_building (str): the text of the building name to search for in the Locations table
updated_building (str): the updated text of the building name for matching Locations
jsonl_path (str): filepath of the jsonL file for storing JSON data of objects before updates - backup
dry_run (bool): if True, it prints the changed object_json but does not post the changes to ASpace
"""
local_aspace = ASpaceAPI(os.getenv('as_api'), os.getenv('as_un'), os.getenv('as_pw'))
as_database = ASpaceDatabase(os.getenv('db_un'), os.getenv('db_pw'), os.getenv('db_host'), os.getenv('db_name'),
os.getenv('db_port'))
matching_ids = location_ids(original_building, as_database)
for location_id in matching_ids:
location_json = local_aspace.get_object('locations', location_id)
write_to_file(jsonl_path, location_json)
updated_location = update_building_name(location_json, updated_building)
if dry_run:
print(f'This is the updated location: {updated_location}')
else:
if updated_location:
update_result = local_aspace.update_object(updated_location['uri'], updated_location)
print(update_result)
logger.info(update_result)


# Call with `python update_locationbuilding.py ob=<original_building_name> uB=<updated_building_name> <jsonl_filepath>.jsonl <log_folder_path>`
if __name__ == '__main__':
args = parseArguments()

# Set up log file
logger.remove()
log_path = Path(args.logFolder, 'update_locationbuilding_{time:YYYY-MM-DD}.log')
logger.add(str(log_path), format="{time}-{level}: {message}")

# Print arguments
logger.info(f'Running {sys.argv[0]} script with following arguments: ')
print(f'Running {sys.argv[0]} script with following arguments: ')
for arg in args.__dict__:
logger.info(str(arg) + ": " + str(args.__dict__[arg]))
print(str(arg) + ": " + str(args.__dict__[arg]))

# Run function
main(original_building=args.originalBuilding, updated_building=args.updatedBuilding, jsonl_path=args.jsonPath,
dry_run=args.dry_run)
2 changes: 1 addition & 1 deletion python_scripts/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def query_database(self, statement):
"""
try:
self.cursor.execute(statement)
except mysql.Error as error:
except mysql.ProgrammingError as error:
record_error('query_database() - SQL query was invalid', error)
raise error
else:
Expand Down
4 changes: 2 additions & 2 deletions test_data/utilities_testdata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
test_record_type = 'digital_objects'
test_object_id = 20
test_object_repo_uri = '/repositories/12'
test_object_id = 1250203
test_object_repo_uri = '/repositories/11'
test_object_user_identifier = 'NMAI.AC.066.ref21.1'

test_digital_object_dates = {'lock_version': 1,
Expand Down
44 changes: 44 additions & 0 deletions tests/updatelocationbuilding_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# This script consists of unittests for update_locationbuilding.py
import unittest

from python_scripts.utilities import *
from python_scripts.one_time_scripts.update_locationbuilding import *
from test_data.location_testdata import *

# Hardcode to dev env
env_file = find_dotenv('.env.dev')
load_dotenv(env_file)
local_aspace = client_login(os.getenv('as_api'), os.getenv('as_un'), os.getenv('as_pw'))
test_dbconnection = ASpaceDatabase(os.getenv('db_un'), os.getenv('db_pw'), os.getenv('db_host'), os.getenv('db_name'),
int(os.getenv('db_port')))


class TestUpdateLocationBuilding(unittest.TestCase):

def test_location_ids(self):
"""Tests that an existing building location returns a list of matching location IDs"""
test_building = "NMAH-SHF, Building 19"
test_results = location_ids(test_building, test_dbconnection)
self.assertIsInstance(test_results, list)
self.assertIsInstance(test_results[0], int)
self.assertTrue(len(test_results) > 0)

class TestUpdateBuildingName(unittest.TestCase):

def test_valid_name(self):
"""Tests that a location JSON object is updated with a new building name in the building field"""
test_name = "Gutiokipanja"
updated_test_location = update_building_name(test_location, test_name)
self.assertIsInstance(updated_test_location, dict)
self.assertEqual(updated_test_location['building'], test_name)

def test_no_building_key(self):
"""Tests that a location JSON object is not updated and None is returned if the building key is not found"""
test_name = "Gutiokipanja"
updated_test_location = update_building_name(test_nobuilding_location, test_name)
self.assertIsNone(updated_test_location)



if __name__ == "__main__":
unittest.main(verbosity=2)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import unittest

from dotenv import load_dotenv, find_dotenv
from python_scripts.repeatable.update_locations import *
from python_scripts.repeatable.update_locationrepo import *
from python_scripts.utilities import *

# Hardcode to dev env
Expand Down
16 changes: 14 additions & 2 deletions tests/utilities_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import contextlib
import io
import json
import mysql.connector
import os
import unittest

Expand Down Expand Up @@ -60,7 +61,7 @@ def test_get_digobjs_page(self):
def test_get_digobjs_set(self):
"""Tests getting an ID Set of digital objects returns the list of digital objects from the API"""
self.good_aspace_connection.get_repo_info()
id_set_values = [20,1204715,314276]
id_set_values = [183037,1204715,314276]
test_digitalobjects = self.good_aspace_connection.get_objects(self.good_aspace_connection.repo_info[1]['uri'],
test_record_type,
('id_set', id_set_values))
Expand All @@ -86,7 +87,7 @@ def test_bad_digobj(self):
test_object_repo_uri)

self.assertTrue(
r"""get_object() - Unable to retrieve object with provided URI: /repositories/12/digital_objects/10000000000000000: {'error': 'DigitalObject not found'}""" in f.getvalue())
r"""get_object() - Unable to retrieve object with provided URI: /repositories/11/digital_objects/10000000000000000: {'error': 'DigitalObject not found'}""" in f.getvalue())

def test_aspace_post_response(self):
"""Tests that a post with an existing URI returns Status: Updated and no warnings"""
Expand Down Expand Up @@ -210,6 +211,17 @@ def test_bad_query(self):
with self.assertRaises(mysql.Error):
test_dbconnection.query_database(test_bad_query)

def test_invalid_query(self):
"""Tests that an invalid building location name (") returns an escaped error"""
invalid_query_syntax = '"'
test_query = ('SELECT location.id FROM location '
'WHERE '
f'location.building = "{invalid_query_syntax}"')
with self.assertRaises(mysql.ProgrammingError) as raised_exception:
test_dbconnection.query_database(test_query)
mysql_error = raised_exception.exception
self.assertEqual(mysql_error.msg, r'''You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '"""' at line 1''')


class TestClientLogin(unittest.TestCase):

Expand Down
Loading