AR233 : refacto JSON output + fix list export to CSV

Alban-Peyrat · Feb 28, 2024 · 6d573de · 6d573de
1 parent e80212b
commit 6d573de
Show file tree

Hide file tree

Showing 4 changed files with 137 additions and 9 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,16 @@ _Some previous changes will be added_
 
 ## [Unreleased]
 
+## [1.17.0] - 2024-02-28
+
+### Changed
+
+* JSON output exports much mor eprecise data, including raw data from records
+
+### Fixed
+
+* Lists containing one element and empty elements are now properly output in CSV export
+
 ## [1.16.2] - 2024-02-28
 
 ### Fixed

diff --git a/fcr_classes.py b/fcr_classes.py
@@ -899,9 +899,10 @@ def close_file(self):
 class Database_Record(object):
     """Contains extracted data from the record.
     The data property contains every mapped data for the chosen processing"""
-    def __init__(self, processing: Processing, record: ET.ElementTree | dict | pymarc.record.Record, is_target_db: bool, settings:Records_Settings):
+    def __init__(self, processing: Processing, record: ET.ElementTree | dict | pymarc.record.Record, fcr_processed_id:str, is_target_db: bool, settings:Records_Settings):
         self.processing = processing
         self.record = record
+        self.fcr_processed_id = fcr_processed_id
         self.database = self.processing.origin_database
         if is_target_db:
             self.database = self.processing.target_database
@@ -1057,6 +1058,43 @@ def compare_to(self, compared_to):
         self.__compare_other_db_id(compared_to)
         self.__finalize_analysis()
 
+    def data_to_json(self) -> dict:
+        """Returns the data for a JSOn export, using FCR_Mapped_Fields names as keys"""
+        out = {}
+        for data in self.data:
+            out[data.name] = self.data[data]
+        return out
+
+    def analysis_to_json(self) -> dict:
+        """Returns the analysis data as a dict for a JSON export"""
+        if type(self.total_checks) == Analysis_Final_Results:
+            return {
+                "title":{
+                    "title_ratio":self.title_ratio,
+                    "title_partial_ratio":self.title_partial_ratio,
+                    "title_token_sort_ratio":self.title_token_sort_ratio,
+                    "title_token_set_ratio":self.title_token_set_ratio,
+                },
+                "dates":self.dates_matched,
+                "publishers":{
+                    "score":self.publishers_score,
+                    "target_db":self.chosen_publisher,
+                    "origin_db":self.chosen_compared_publisher
+                },
+                "other_ids":{
+                    "nb":self.nb_other_db_id,
+                    "result":self.local_id_in_compared_record.name
+                },
+                "global":{
+                    "result":self.total_checks.name,
+                    "nb_succesful_checks":self.passed_check_nb,
+                    "title":self.checks[Analysis_Checks.TITLE],
+                    "publisher":self.checks[Analysis_Checks.PUBLISHER],
+                    "date":self.checks[Analysis_Checks.DATE]
+                }
+            }
+        else:
+            return None
 
     # --- Utils methods for other classes / functions ---
     class Utils:
@@ -1184,7 +1222,22 @@ def add_returned_ids(self, ids: list):
     def add_returned_records(self, records: list):
         self.returned_records = records
         self.includes_records = True
-
+
+    def to_json(self) -> dict:
+        """Retuns the data ready for a JSON export"""
+        error_type = None
+        if type(self.error_type) == Errors:
+            error_type = self.error_type.name
+        return {
+            "try_nb":self.try_nb,
+            "action":self.action.name,
+            "status":self.status.name,
+            "error_type":error_type,
+            "msg":self.msg,
+            "query":self.query,
+            "returned_ids":self.returned_ids
+        }
+
 class Matched_Records(object):
     """
     
@@ -1193,7 +1246,7 @@ class Matched_Records(object):
     def __init__(self, operation: Operation, query: str, local_record:Database_Record, target_url:str, lang:str):
         self.error = None
         self.error_msg = None
-        self.tries = []
+        self.tries:List[Request_Try] = []
         self.returned_ids = []
         self.returned_records = []
         self.includes_record = False
@@ -1235,6 +1288,12 @@ def execute_operation(self):
             self.error = Errors.NOTHING_WAS_FOUND
             self.error_msg = get_instance_from_enum(self.error, Errors).get_msg(self.lang)
 
+    def tries_to_json(self) -> dict:
+        """Returns the tries as a dict ready for JSON export"""
+        out = {}
+        for this_try in self.tries:
+            out[this_try.try_nb] = this_try.to_json()
+        return out
 
     def request_action(self, action: Actions, thisTry: Request_Try):
         """Makes the request for this specific action and returns a list of IDs as a result"""
@@ -2777,11 +2836,11 @@ def get_matched_records_instance(self, mr: Matched_Records):
 
     def get_origin_database_data(self, processing: Processing, record: ET.ElementTree | dict | pymarc.record.Record):
         """Extract data from the origin database record"""
-        self.origin_database_data = Database_Record(processing, record, False, self.records_settings)
+        self.origin_database_data = Database_Record(processing, record, self.fcr_processed_id, False, self.records_settings)
 
     def get_target_database_data(self, processing: Processing, id:str, record: ET.ElementTree | dict | pymarc.record.Record):
         """Extract data from the origin database record"""
-        self.target_database_data[id] = Database_Record(processing, record, True, self.records_settings)
+        self.target_database_data[id] = Database_Record(processing, record, self.fcr_processed_id, True, self.records_settings)
 
     def change_target_record_id(self, old_id:str, new_id:str):
         """Changes the ID key in target_database_data and set a new matched_id"""
@@ -2865,7 +2924,59 @@ def __special_better_item(self, out:dict, origin_db=True) -> dict:
             del out[f"{db}_{FCR_Mapped_Fields.GENERAL_PROCESSING_DATA_DATES.name}"]
             return out
 
-        def to_csv(self):
+        def to_json(self, error:Report_Errors=None) -> dict:
+            """Returns the data as a dict for the JSON file"""
+            out = {}
+            par:Original_Record = self.parent
+
+            # Errors
+            out["error"] = par.error
+            out["error_message"] = par.error_message
+
+            # Original line
+            out["original_line"] = None
+            if type(par.original_line) == dict:
+                out["original_line"] = par.original_line
+
+            # IDs
+            out["fcr_processed_id"] = par.fcr_processed_id
+            out["input_query"] = par.input_query
+            out["original_uid"] = par.original_uid
+
+            # First possible return : failed to get origin DB
+            if error in [Report_Errors.ORIGIN_DB_KOHA, Report_Errors.ORIGIN_DB_LOCAL_RECORD]:
+                return out
+
+            # Origin database record
+            out["origin_database"] = {
+                "data":par.origin_database_data.data_to_json(),
+                "fcr_processed_id":par.fcr_processed_id
+            }
+
+            # Matched records
+            out["matched_record_tries"] = par.matched_record_instance.tries_to_json()
+            out["query_used"] = par.query_used
+            out["action_used"] = par.action_used.name
+            out["nb_matched_records"] = par.nb_matched_records
+            out["matched_records_ids"] = par.matched_records_ids
+
+            # Second possible return : no matched records
+            if error in [Report_Errors.MATCH_RECORD_NO_MATCH]:
+                return out
+
+            # Target DB records
+            out["target_database"] = {}
+            for record_id in par.target_database_data:
+                record:Database_Record = par.target_database_data[record_id] # for IDE
+                out["target_database"][record_id] = {
+                    "data":record.data_to_json(),
+                    "fcr_processed_id":record.fcr_processed_id,
+                    "analysis":record.analysis_to_json()
+                }
+
+            return out
+
+        def to_csv(self) -> dict:
             """Returns the data as a dict for the CSV export"""
             par:Original_Record = self.parent
             out = {}

diff --git a/fcr_func.py b/fcr_func.py
@@ -102,7 +102,7 @@ def list_as_string(this_list: list) -> str:
         if len(non_empty_elements) == 0:
             return ""
         elif len(non_empty_elements) == 1:
-            return delete_control_char(str(", ".join(non_empty_elements[0])))
+            return delete_control_char(str(non_empty_elements[0]))
         else:
             return delete_control_char(str(", ".join([str(elem) for elem in non_empty_elements])))
 

diff --git a/main.py b/main.py
@@ -70,6 +70,7 @@ def main(es: fcr.Execution_Settings):
 
     # ------------------------------ MAIN FUNCTION ------------------------------
     results = []
+    json_output = []
     es.log.big_info("File processing start")
 
     # Load original file data
@@ -116,6 +117,7 @@ def main(es: fcr.Execution_Settings):
                 es.log.error(rec.error_message)
                 es.csv.write_line(rec, False)
                 results.append(rec.output.to_csv())
+                json_output.append(rec.output.to_json(fcr.Report_Errors.ORIGIN_DB_KOHA))
                 continue # skip to next line
             rec.get_origin_database_data(es.processing, origin_record.record_parsed)
         # MARC_FILE_IN_KOHA_SRU from the file
@@ -127,6 +129,7 @@ def main(es: fcr.Execution_Settings):
                 es.log.error(rec.error_message)
                 es.csv.write_line(rec, False)
                 results.append(rec.output.to_csv())
+                json_output.append(rec.output.to_json(fcr.Report_Errors.ORIGIN_DB_KOHA))
                 continue # skip to next line
             rec.get_origin_database_data(es.processing, origin_record)
             rec.original_uid = rec.origin_database_data.utils.get_id()
@@ -138,7 +141,7 @@ def main(es: fcr.Execution_Settings):
         results_report.increase_step(fcr.Report_Success.ORIGIN_DB) # report stats
 
         # --------------- Match records ---------------
-        rec.get_matched_records_instance(fcr.Matched_Records(es.operation, rec.input_query, rec.origin_database_data, es.target_url, es.lang))     
+        rec.get_matched_records_instance(fcr.Matched_Records(es.operation, rec.input_query, rec.origin_database_data, es.target_url, es.lang)) 
         if rec.nb_matched_records == 0:
             rec.trigger_error(f"{es.operation.name} : {fcr.get_instance_from_enum(fcr.Errors.OPERATION_NO_RESULT).get_msg(es.lang)}")
 
@@ -150,6 +153,7 @@ def main(es: fcr.Execution_Settings):
             # Skip to next line
             es.csv.write_line(rec, False)
             results.append(rec.output.to_csv())
+            json_output.append(rec.output.to_json(fcr.Report_Errors.MATCH_RECORD_NO_MATCH))
             continue
 
         # Match records was a success
@@ -239,6 +243,9 @@ def main(es: fcr.Execution_Settings):
             results.append(rec.output.to_csv())
             results_report.increase_step(fcr.Report_Success.TARGET_RECORD_GLOBAL) # report stats
 
+        # JSON output
+        json_output.append(rec.output.to_json())
+
         results_report.increase_step(fcr.Report_Success.ORIGIN_RECORD_GLOBAL) # report stats
     # Closes CSV file
     es.csv.close_file()
@@ -249,7 +256,7 @@ def main(es: fcr.Execution_Settings):
     # ------------------------------ FINAL OUTPUT ------------------------------
     # --------------- JSON FILE ---------------
     with open(es.file_path_out_json, 'w', encoding='utf-8') as f:
-        json.dump(results, f, ensure_ascii=False, indent=4)
+        json.dump(json_output, f, ensure_ascii=False, indent=4)
     es.log.simple_info("JSON output file", es.file_path_out_json)
 
     # --------------- CSV FILE ---------------