From 5ac41729e949456161056cd99f2fc81dcb577e2d Mon Sep 17 00:00:00 2001 From: ross-spencer Date: Sun, 16 Jun 2024 16:42:16 +0200 Subject: [PATCH] WIP: more lintingg work --- src/anz_rosetta_csv/anz_rosetta_csv.py | 4 +- src/anz_rosetta_csv/rosetta_csv_generator.py | 142 +++++++++---------- tests/test_import_generator.py | 4 +- tests/test_original_generator_tests.py | 4 +- 4 files changed, 77 insertions(+), 77 deletions(-) diff --git a/src/anz_rosetta_csv/anz_rosetta_csv.py b/src/anz_rosetta_csv/anz_rosetta_csv.py index f75c4fc..a818e7e 100644 --- a/src/anz_rosetta_csv/anz_rosetta_csv.py +++ b/src/anz_rosetta_csv/anz_rosetta_csv.py @@ -65,7 +65,7 @@ def main(): if len(sys.argv) == 1: parser.print_help() - sys.exit(0) + sys.exit() args = parser.parse_args() if args.args: @@ -89,7 +89,7 @@ def main(): configfile=args.cfg, provenance=args.pro, ) - res = csvgen.export2rosettacsv() + res = csvgen.export_to_rosetta_csv() print(res) sys.exit() diff --git a/src/anz_rosetta_csv/rosetta_csv_generator.py b/src/anz_rosetta_csv/rosetta_csv_generator.py index 36464e4..6b390fd 100644 --- a/src/anz_rosetta_csv/rosetta_csv_generator.py +++ b/src/anz_rosetta_csv/rosetta_csv_generator.py @@ -1,5 +1,6 @@ """Archives New Zealand Rosetta CSV Generator.""" +# pylint: disable=R1710,R0902,R0913,R0912 import configparser as ConfigParser import logging @@ -7,13 +8,16 @@ try: import json_table_schema - from droid_csv_handler_class import * + from droid_csv_handler_class import DroidCSVHandler, GenericCSVHandler from import_sheet_generator import ImportSheetGenerator from provenance_csv_handler_class import ProvenanceCSVHandler from rosetta_csv_sections_class import RosettaCSVSections except ModuleNotFoundError: try: - from src.anz_rosetta_csv.droid_csv_handler_class import * + from src.anz_rosetta_csv.droid_csv_handler_class import ( + DroidCSVHandler, + GenericCSVHandler, + ) from src.anz_rosetta_csv.import_sheet_generator import ImportSheetGenerator from src.anz_rosetta_csv.json_table_schema import json_table_schema from src.anz_rosetta_csv.provenance_csv_handler_class import ( @@ -21,7 +25,10 @@ ) from src.anz_rosetta_csv.rosetta_csv_sections_class import RosettaCSVSections except ModuleNotFoundError: - from anz_rosetta_csv.droid_csv_handler_class import * + from anz_rosetta_csv.droid_csv_handler_class import ( + DroidCSVHandler, + GenericCSVHandler, + ) from anz_rosetta_csv.import_sheet_generator import ImportSheetGenerator from anz_rosetta_csv.json_table_schema import json_table_schema from anz_rosetta_csv.provenance_csv_handler_class import ProvenanceCSVHandler @@ -74,6 +81,13 @@ def __init__( logger.error("a configuration file hasn't been provided") sys.exit(1) + self.subseriesmask = None + self.rnumber = None + self.droidlist = None + self.exportlist = None + self.provlist = None + self.duplicates = None + logging.info("reading app config from '%s'", configfile) self.config = ConfigParser.RawConfigParser() self.config.read(configfile) @@ -83,7 +97,7 @@ def __init__( # NOTE: A bit of a hack, compare with import schema work and refactor self.rosettaschema = rosettaschema - self.readRosettaSchema() + self.read_rosetta_schema() # Grab Rosetta Sections rs = RosettaCSVSections(configfile) @@ -118,7 +132,7 @@ def add_csv_value(self, value): field = f'"{value}"' return field - def readRosettaSchema(self): + def read_rosetta_schema(self): """Read the Rosetta Schema File.""" importschemajson = None with open(self.rosettaschema, "r", encoding="utf-8") as rosetta_schema: @@ -128,18 +142,18 @@ def readRosettaSchema(self): importschemadict = importschema.as_dict() importschemaheader = importschema.as_csv_header() - self.rosettacsvheader = ( - importschemaheader + "\n" - ) # TODO: Add newline in JSON Handler class? + self.rosettacsvheader = importschemaheader + "\n" self.rosettacsvdict = importschemadict["fields"] - def createcolumns(self, columno): + def createcolumns(self, column_number): + """Create a number of empty columns in Rosetta CSV.""" columns = "" - for column in range(columno): - columns = columns + '"",' + for _ in range(column_number): + columns = f'{columns}"",' return columns def normalize_spaces(self, filename): + """Normalize spacces in a filename.""" if filename.find(" ") != -1: filename = filename.replace(" ", " ") return self.normalize_spaces(filename) @@ -157,10 +171,7 @@ def compare_filenames_as_titles(self, droidrow, listcontroltitle): def get_droid_value( self, checksum, lc_title, lc_sub_series, rosetta_field, droid_field, path_mask ): - """ - # TODO: Potentially index droidlist by MD5 or SHA-256 in future... - # NOTE: itemtitle is title from Archway List Control... - """ + """Retrieve a row from a DROID sheet.""" returnfield = "" for drow in self.droidlist: addtorow = False @@ -209,34 +220,28 @@ def get_droid_value( return returnfield def csvstringoutput(self, csvlist): - # String output... - csvrows = self.rosettacsvheader - - # TODO: Understand how to get this in rosettacsvsectionclass - # NOTE: Possibly put all basic RosettaCSV stuff in rosettacsvsectionclass? - # Static ROW in CSV Ingest Sheet - SIPROW = ['"",'] * len(self.rosettacsvdict) - SIPROW[0] = '"SIP",' + """Output CSV as a string.""" - # SIP Title... + csvrows = self.rosettacsvheader + sip_row = ['"",'] * len(self.rosettacsvdict) + sip_row[0] = '"SIP",' + sip_title = '"CSV Load",' if self.config.has_option("rosetta mapping", "SIP Title"): - SIPROW[1] = '"' + self.config.get("rosetta mapping", "SIP Title") + '",' - else: - SIPROW[1] = '"CSV Load",' - - csvrows = csvrows + "".join(SIPROW).rstrip(",") + "\n" - + sip_title = f'"{self.config.get("rosetta mapping", "SIP Title")}",' + sip_row[1] = sip_title + sip_row = "".join(sip_row).rstrip(",") + csvrows = f"{csvrows}{sip_row}\n" for sectionrows in csvlist: rowdata = "" for sectionrow in sectionrows: for fielddata in sectionrow: - rowdata = rowdata + fielddata + "," - rowdata = rowdata.rstrip(",") + "\n" + rowdata = f"{rowdata}{fielddata}," + rowdata = f'{rowdata.rstrip(",")}\n' csvrows = csvrows + rowdata - # this is the best i can think of because ExLibris have named two fields with the same - # title in CSV which doesn't help us when we're trying to use unique names for populating rows - # replaces SIP Title with Title (DC) + # this is the best i can think of because ExLibris have named two fields + # with the same title in CSV which doesn't help us when we're trying to + # use unique names for populating rows replaces SIP Title with Title (DC) csvrows = csvrows.replace( '"Object Type","SIP Title"', '"Object Type","Title (DC)"' ) @@ -247,24 +252,21 @@ def csvstringoutput(self, csvlist): return csvrows def handleprovenanceexceptions( - self, PROVENANCE_FIELD, sectionrow, field, csvindex, rnumber + self, provenance_field, sectionrow, field, csvindex, rnumber ): + """Read a provenance CSV file for exceptions on specific rows.""" ignorefield = False - if self.prov is True: - for p in self.provlist: - if p["RECORDNUMBER"] == rnumber: - # These values overwrite the defaults from DROID list... - # Double-check comparison to ensure we're inputting the right values... - # TODO: field == 'MD5' get from config... - if (PROVENANCE_FIELD == "CHECKSUM" and field == self.provhash) or ( - PROVENANCE_FIELD == "ORIGINALNAME" - and field == "File Original Name" - ): - if p[PROVENANCE_FIELD].lower().strip() != "ignore": - ignorefield = True - sectionrow[csvindex] = self.add_csv_value( - p[PROVENANCE_FIELD] - ) + if not self.prov: + return False + for p_row in self.provlist: + if p_row["RECORDNUMBER"] != rnumber: + continue + if (provenance_field == "CHECKSUM" and field == self.provhash) or ( + provenance_field == "ORIGINALNAME" and field == "File Original Name" + ): + if p_row[provenance_field].lower().strip() != "ignore": + ignorefield = True + sectionrow[csvindex] = self.add_csv_value(p_row[provenance_field]) return ignorefield def __setpathmask__(self): @@ -306,12 +308,12 @@ def populaterows(self, field, listcontrolitem, sectionrow, csvindex, rnumber): ) # if ignorefield is still false, check our checksum field as well... - if ignorefield is False: + if not ignorefield: ignorefield = self.handleprovenanceexceptions( "CHECKSUM", sectionrow, field, csvindex, rnumber ) - if ignorefield is False: + if not ignorefield: sectionrow[csvindex] = self.add_csv_value( self.get_droid_value( checksum=listcontrolitem["Missing Comment"], @@ -355,8 +357,8 @@ def create_rosetta_csv(self): else: logger.info("subseries mask is not set in config") - CSVINDEXSTARTPOS = 2 - csvindex = CSVINDEXSTARTPOS + csv_index_start = 2 + csvindex = csv_index_start self.rnumber = 0 fields = [] @@ -389,21 +391,19 @@ def create_rosetta_csv(self): field, item, sectionrow, csvindex, self.rnumber ) else: - # we have a misalignment between cfg and json... - # TODO: Output a more useful error message? - sys.exit( - "CSV configuration and schema file do not match. Look for missing fields in either. Failed on: " - + str(field) - + " " - + str(self.rosettacsvdict[csvindex]["name"]) + logger.error( + "field in config: '%s' is not aligned with JSON schema '%s'", + field, + self.rosettacsvdict[csvindex]["name"], ) + sys.exit(1) # increment csvindex along the x-axis... csvindex += 1 itemrow.append(sectionrow) fields.append(itemrow) - csvindex = CSVINDEXSTARTPOS + csvindex = csv_index_start return self.csvstringoutput(fields) @@ -430,30 +430,30 @@ def listduplicates(self): dupe.append(cs) return set(dupe) - def readExportCSV(self): + def read_export_csv(self): + """Read a list control CSV.""" if self.exportsheet is not False: csvhandler = GenericCSVHandler() exportlist = csvhandler.csv_as_list(self.exportsheet) return exportlist - def readDROIDCSV(self): + def read_droid_csv(self): + """Read a DROID CSV.""" if self.droidcsv is not False: droidcsvhandler = DroidCSVHandler() droidlist = droidcsvhandler.read_droid_csv(self.droidcsv) droidlist = droidcsvhandler.remove_folders(droidlist) return droidcsvhandler.remove_container_contents(droidlist) - def export2rosettacsv(self): + def export_to_rosetta_csv(self): + """Convert a list control and droid sheet to a Rosetta CSV.""" if self.droidcsv is not False and self.exportsheet is not False: - self.droidlist = self.readDROIDCSV() - self.exportlist = self.readExportCSV() - # self.readRosettaSchema() #NOTE: Moved to constructor... TODO: Refactor - + self.droidlist = self.read_droid_csv() + self.exportlist = self.read_export_csv() if self.prov is True: provhandler = ProvenanceCSVHandler() self.provlist = provhandler.read_provenance_csv(self.provfile) if self.provlist is None: self.prov = False - self.duplicates = self.listduplicates() return self.create_rosetta_csv() diff --git a/tests/test_import_generator.py b/tests/test_import_generator.py index 0e26dfd..1693789 100644 --- a/tests/test_import_generator.py +++ b/tests/test_import_generator.py @@ -532,7 +532,7 @@ def test_csv_generation(tmp_path): config_file, prov_file, ) - res = csvgen.export2rosettacsv() + res = csvgen.export_to_rosetta_csv() assert res.strip() == result.strip() @@ -654,5 +654,5 @@ def test_duplicates(tmp_path): config_file, "", ) - res = csvgen.export2rosettacsv() + res = csvgen.export_to_rosetta_csv() assert res.strip() == dupe_result.strip() diff --git a/tests/test_original_generator_tests.py b/tests/test_original_generator_tests.py index aa32851..3c19e94 100644 --- a/tests/test_original_generator_tests.py +++ b/tests/test_original_generator_tests.py @@ -15,7 +15,7 @@ def test_normalize_spaces(mocker): """Test spaces are normalized correctly.""" placeholder_config = io.StringIO("") - mocker.patch.object(RosettaCSVGenerator, "readRosettaSchema") + mocker.patch.object(RosettaCSVGenerator, "read_rosetta_schema") rosetta_csv_gen = RosettaCSVGenerator(False, False, False, placeholder_config) @@ -76,7 +76,7 @@ def test_normalize_spaces(mocker): def test_compare_filenames_as_titles(mocker): """Test that filenames are compared correctly.""" placeholder_config = io.StringIO("") - mocker.patch.object(RosettaCSVGenerator, "readRosettaSchema") + mocker.patch.object(RosettaCSVGenerator, "read_rosetta_schema") rosetta_csv_gen = RosettaCSVGenerator(False, False, False, placeholder_config) # Standard true comparison, expected result