From ddce25f14c46972c24cf545587ee1e771835d8cd Mon Sep 17 00:00:00 2001 From: Gordon Blackadder Date: Fri, 6 Sep 2024 16:56:46 -0400 Subject: [PATCH] fix bugs when working with templates --- pydantic_schemas/metadata_manager.py | 22 ++--- .../tests/test_excel_interface.py | 25 ------ .../tests/test_metadata_manager.py | 90 +++++++++++++++++++ pydantic_schemas/utils/excel_to_pydantic.py | 13 ++- 4 files changed, 113 insertions(+), 37 deletions(-) delete mode 100644 pydantic_schemas/tests/test_excel_interface.py create mode 100644 pydantic_schemas/tests/test_metadata_manager.py diff --git a/pydantic_schemas/metadata_manager.py b/pydantic_schemas/metadata_manager.py index 77a3d3f..0cfe462 100644 --- a/pydantic_schemas/metadata_manager.py +++ b/pydantic_schemas/metadata_manager.py @@ -1,5 +1,5 @@ from copy import copy -from typing import Dict, Optional, Type, Union +from typing import Dict, List, Optional, Type, Union from openpyxl import load_workbook from pydantic import BaseModel @@ -68,18 +68,18 @@ class MetadataManager: "video": excel_single_sheet_to_pydantic, # one sheet } - def metadata_class_from_name(self, metadata_name: str): + def metadata_class_from_name(self, metadata_name: str) -> Type[BaseModel]: metadata_name = self.standardize_metadata_name(metadata_name) schema = self._TYPE_TO_SCHEMA[metadata_name] return copy(schema) @property - def metadata_type_names(self): + def metadata_type_names(self) -> List[str]: return list(self._TYPE_TO_SCHEMA.keys()) def standardize_metadata_name(self, metadata_name: str) -> str: metadata_name = metadata_name.lower() - metadata_name = metadata_name.replace("-", "_") + metadata_name = metadata_name.replace("-", "_").replace(" ", "_") if metadata_name == "microdata" or metadata_name == "survey_microdata": metadata_name = "survey" self._raise_if_unsupported_metadata_name(metadata_name=metadata_name) @@ -175,10 +175,11 @@ def save_metadata_to_excel( if metadata_name == "geospatial": raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") schema = self.metadata_class_from_name(metadata_name) + writer = self._TYPE_TO_WRITER[metadata_name] else: + metadata_name = metadata_name_or_class.model_json_schema()["title"] schema = metadata_name_or_class writer = write_to_single_sheet - metadata_name = metadata_name_or_class.model_json_schema()["title"] skeleton_object = self.create_metadata_outline(metadata_name_or_class=metadata_name_or_class, debug=False) if filename is None: @@ -195,7 +196,7 @@ def save_metadata_to_excel( combined_dict = standardize_keys_in_dict(combined_dict) new_ob = schema(**combined_dict) - writer = self._TYPE_TO_WRITER[metadata_name] + # writer = self._TYPE_TO_WRITER[metadata_name] writer(filename, new_ob, metadata_name, title) return filename @@ -229,10 +230,11 @@ def _get_metadata_name_from_excel_file(filename: str) -> str: def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[BaseModel]] = None) -> BaseModel: """ Read in metadata from an appropriately formatted Excel file as a pydantic object. - If using s standard metadata types (documents, scripts, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. + If using standard metadata types (documents, resource, script, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. + Args: filename (str): The path to the Excel file. - metadata_class (Optional type of BaseModel): A pudantic class type correspondong to the type used to write the Excel file + metadata_class (Optional type of BaseModel): A pydantic class type correspondong to the type used to write the Excel file Returns: BaseModel: a pydantic object containing the metadata from the file @@ -242,11 +244,11 @@ def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[ metadata_name = self.standardize_metadata_name(metadata_name) schema = self._TYPE_TO_SCHEMA[metadata_name] reader = self._TYPE_TO_READER[metadata_name] - except ValueError: + except ValueError as e: if metadata_class is None: raise ValueError( f"'{metadata_name}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())} or try passing in the metadata_class" - ) + ) from e schema = metadata_class reader = excel_single_sheet_to_pydantic read_object = reader(filename, schema) diff --git a/pydantic_schemas/tests/test_excel_interface.py b/pydantic_schemas/tests/test_excel_interface.py deleted file mode 100644 index 543501d..0000000 --- a/pydantic_schemas/tests/test_excel_interface.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest - -from pydantic_schemas.metadata_manager import MetadataManager - - -@pytest.mark.parametrize( - "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"] -) -def test_metadata(tmpdir, metadata_name): - ei = MetadataManager() - - # Write empty metadata - filename = ei.write_metadata_outline_to_excel( - metadata_name_or_class=metadata_name, filename=tmpdir.join(f"test_{metadata_name}.xlsx"), title=metadata_name - ) - - # Read the metadata back - tmp = ei.read_metadata_from_excel(filename=filename) - - # Save the read metadata to a new file - filename2 = tmpdir.join(f"test_{metadata_name}_2.xlsx") - ei.save_metadata_to_excel(metadata_name_or_class=metadata_name, object=tmp, filename=filename2, title=metadata_name) - - # make an outline object - ei.create_metadata_outline(metadata_name_or_class=metadata_name) diff --git a/pydantic_schemas/tests/test_metadata_manager.py b/pydantic_schemas/tests/test_metadata_manager.py new file mode 100644 index 0000000..5df4fef --- /dev/null +++ b/pydantic_schemas/tests/test_metadata_manager.py @@ -0,0 +1,90 @@ +import pytest + +from pydantic_schemas.metadata_manager import MetadataManager + + +@pytest.mark.parametrize( + "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"] +) +def test_metadata_by_name(tmpdir, metadata_name): + mm = MetadataManager() + assert metadata_name in mm.metadata_type_names + + for debug in [True, False]: + mm.create_metadata_outline(metadata_name_or_class=metadata_name, debug=debug) + + # Write empty metadata + filename = mm.write_metadata_outline_to_excel( + metadata_name_or_class=metadata_name, filename=tmpdir.join(f"test_{metadata_name}.xlsx"), title=metadata_name + ) + + # Read the metadata back + tmp = mm.read_metadata_from_excel(filename=filename) + + # Save the read metadata to a new file + filename2 = tmpdir.join(f"test_{metadata_name}_2.xlsx") + mm.save_metadata_to_excel(metadata_name_or_class=metadata_name, object=tmp, filename=filename2, title=metadata_name) + + # make an outline object + mm.create_metadata_outline(metadata_name_or_class=metadata_name) + + +@pytest.mark.parametrize( + "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"] +) +def test_metadata_by_class(tmpdir, metadata_name): + mm = MetadataManager() + + metadata_class = mm.metadata_class_from_name(metadata_name=metadata_name) + + # outline from class + mm.create_metadata_outline(metadata_name_or_class=metadata_class) + + # write and read from class + filename_class = mm.write_metadata_outline_to_excel( + metadata_name_or_class=metadata_class, + filename=tmpdir.join(f"test_class_{metadata_name}.xlsx"), + title=metadata_name, + ) + mm.read_metadata_from_excel(filename=filename_class, metadata_class=metadata_class) + + +def test_standardize_metadata_name(): + mm = MetadataManager() + inputs = [ + "Document", + "SCRIPT", + "survey", + "survey-microdata", + "survey microdata", + "microdata", + "table", + "timeseries-db", + "timeseries-db", + "TimeSeries", + "VIdeo", + ] + + expecteds = [ + "document", + "script", + "survey", + "survey", + "survey", + "survey", + "table", + "timeseries_db", + "timeseries_db", + "timeseries", + "video", + ] + + for inp, expected in zip(inputs, expecteds): + actual = mm.standardize_metadata_name(inp) + assert actual == expected, f"expected {expected} but got {actual}" + + with pytest.raises(NotImplementedError): + mm.standardize_metadata_name("Image") + + with pytest.raises(ValueError): + mm.standardize_metadata_name("Bad-name") diff --git a/pydantic_schemas/utils/excel_to_pydantic.py b/pydantic_schemas/utils/excel_to_pydantic.py index 6387916..124b2e7 100644 --- a/pydantic_schemas/utils/excel_to_pydantic.py +++ b/pydantic_schemas/utils/excel_to_pydantic.py @@ -102,8 +102,17 @@ def get_relevant_sub_frame(m: Type[BaseModel], df: pd.DataFrame, name_of_field: def handle_optional(name, annotation, df, from_within_list: bool = False, debug=False): args = [a for a in get_args(annotation) if a is not type(None)] - assert len(args) == 1, f"handle_optional encountered {args}" - ret = annotation_switch(name, args[0], df, from_within_list=from_within_list) + # assert len(args) == 1, f"handle_optional encountered {args}" + if len(args) > 1: + if str in args: + arg = str + elif float in args: + arg = float + else: + arg = args[0] + else: + arg = args[0] + ret = annotation_switch(name, arg, df, from_within_list=from_within_list) if debug: print(f"optional ret: {ret}") print(f"isinstance(ret, list): {isinstance(ret, list)}")