From 5c508a668f2782ac2dcac9330f525fa3f7a31022 Mon Sep 17 00:00:00 2001 From: gordonblackadder <171737385+gblackadder@users.noreply.github.com> Date: Thu, 7 Nov 2024 10:54:46 -0500 Subject: [PATCH] U/gblackadder/fix template issues (#6) * simplify writers and better support templates * remove the dict like set item that no ones uses * dependabot identified issue with certifi library --- poetry.lock | 8 +- pydantic_schemas/metadata_manager.py | 107 ++++++++++-------- .../tests/test_metadata_manager.py | 49 +++++++- pydantic_schemas/utils/excel_to_pydantic.py | 3 +- pydantic_schemas/utils/pydantic_to_excel.py | 11 +- pydantic_schemas/utils/schema_base_model.py | 6 +- pydantic_schemas/utils/utils.py | 2 +- pyproject.toml | 1 + 8 files changed, 116 insertions(+), 71 deletions(-) diff --git a/poetry.lock b/poetry.lock index bea39c6..2d3b9b4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -100,13 +100,13 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "certifi" -version = "2024.6.2" +version = "2024.8.30" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, - {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, + {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, + {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, ] [[package]] @@ -1721,4 +1721,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "7671a941b5d68d34eb48386ca5fc6a447fbd50677549e35a46fe64869f94d6fb" +content-hash = "ecb7393f4d4a81dd1b35c734a4384c8b96093ebbaf707b9f9ba22e08f9d93e3c" diff --git a/pydantic_schemas/metadata_manager.py b/pydantic_schemas/metadata_manager.py index acd9434..ac1d23f 100644 --- a/pydantic_schemas/metadata_manager.py +++ b/pydantic_schemas/metadata_manager.py @@ -1,3 +1,4 @@ +import importlib.metadata from copy import copy from typing import Dict, List, Optional, Type, Union @@ -21,11 +22,13 @@ from .utils.quick_start import make_skeleton from .utils.utils import merge_dicts, standardize_keys_in_dict +__version__ = importlib.metadata.version("metadataschemas") + class MetadataManager: """ Interface with Excel for creating, saving and updating metadata for various types: - document, indicator, indicators_db, microdata, resource, script, table, video + document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video Retrieve pydantic model definitions for each metadata type """ @@ -85,7 +88,7 @@ def standardize_metadata_name(self, metadata_name: str) -> str: metadata_name = "microdata" elif metadata_name == "timeseries": metadata_name = "indicator" - elif metadata_name == "timeseries_db": + elif metadata_name == "timeseries_db" or metadata_name == "indicator_db": metadata_name = "indicators_db" self._raise_if_unsupported_metadata_name(metadata_name=metadata_name) return metadata_name @@ -100,6 +103,43 @@ def create_metadata_outline( skeleton_object = make_skeleton(schema, debug=debug) return skeleton_object + def _get_name_version_schema_writer(self, metadata_name_or_class): + """ + Determines the metadata name, version, schema, and writer based on the provided metadata name or class. + + Args: + metadata_name_or_class (str or class): The metadata name as a string or the metadata class. + + Returns: + tuple: A tuple containing: + - metadata_name (str): The standardized metadata name. + - version (str): The version information of the metadata. + - schema (type(BaseModel)): The schema associated with the metadata. + - writer (function): The writer function for the metadata. + + If `metadata_name_or_class` is a string or is one of the standard metadata types (document, + geospatial, image, indicator, indicators_db, microdata, resource, script, table, video), + it retrieves the corresponding metadata name, schema, version, and writer from the internal + mappings. Otherwise, it assumes this is a template and retrieves the title from the class, + and uses a default single page writer function. + """ + if isinstance(metadata_name_or_class, str) or metadata_name_or_class in self._TYPE_TO_SCHEMA.values(): + if isinstance(metadata_name_or_class, str): + metadata_name = self.standardize_metadata_name(metadata_name_or_class) + schema = self._TYPE_TO_SCHEMA[metadata_name] + else: + for metadata_name, schema in self._TYPE_TO_SCHEMA.items(): + if schema is metadata_name_or_class: + break + version = f"{metadata_name} type metadata version {__version__}" + writer = self._TYPE_TO_WRITER[metadata_name] + else: + writer = write_to_single_sheet + metadata_name = metadata_name_or_class.model_json_schema()["title"] + version = f"Template: {metadata_name}" + schema = metadata_name_or_class + return metadata_name, version, schema, writer + def write_metadata_outline_to_excel( self, metadata_name_or_class: Union[str, Type[BaseModel]], @@ -111,9 +151,7 @@ def write_metadata_outline_to_excel( Args: metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently: - document, indicator, indicators_db, microdata, resource, script, table, video - Currently not supported: - geospatial, image + document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video If passed as a BaseModel type, for instance this is what you would do with a template, then the writer defaults to a single page. filename (Optional[str]): The path to the Excel file. If None, defaults to {metadata_name}_metadata.xlsx @@ -125,33 +163,21 @@ def write_metadata_outline_to_excel( Outputs: An Excel file into which metadata can be entered """ - if isinstance(metadata_name_or_class, str): - metadata_name = self.standardize_metadata_name(metadata_name_or_class) - # if metadata_name == "geospatial": - # raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") - skeleton_object = self.create_metadata_outline(metadata_name, debug=False) - writer = self._TYPE_TO_WRITER[metadata_name] - if filename is None: - filename = f"{metadata_name}_metadata.xlsx" - if title is None: - title = f"{metadata_name.capitalize()} Metadata" - else: - skeleton_object = make_skeleton(metadata_name_or_class, debug=False) - writer = write_to_single_sheet - metadata_name = metadata_name_or_class.model_json_schema()["title"] - if filename is None: - filename = f"{metadata_name}_metadata.xlsx" - if title is None: - title = f"{metadata_name.capitalize()} Metadata" + metadata_name, version, schema, writer = self._get_name_version_schema_writer(metadata_name_or_class) + skeleton_object = self.create_metadata_outline(schema, debug=False) + + if filename is None: + filename = f"{metadata_name}_metadata.xlsx" + if title is None: + title = f"{metadata_name.capitalize()} Metadata" if not str(filename).endswith(".xlsx"): filename += ".xlsx" - writer(filename, skeleton_object, metadata_name, title) + writer(filename, skeleton_object, version, title) return filename def save_metadata_to_excel( self, - metadata_name_or_class: Union[str, Type[BaseModel]], object: BaseModel, filename: Optional[str] = None, title: Optional[str] = None, @@ -161,11 +187,6 @@ def save_metadata_to_excel( Save an Excel document of the given metadata object. Args: - metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently: - document, indicator, indicators_db, microdata, resource, script, table, video - Currently not supported: - geospatial, image - If passed as a BaseModel type, for instance this is what you would do with a template, then the writer defaults to a single page. object (BaseModel): The pydantic object to save to the Excel file. filename (Optional[str]): The path to the Excel file. Defaults to {name}_metadata.xlsx title (Optional[str]): The title for the Excel sheet. Defaults to '{name} Metadata' @@ -176,17 +197,10 @@ def save_metadata_to_excel( Outputs: An Excel file containing the metadata from the pydantic object. This file can be updated as needed. """ - if isinstance(metadata_name_or_class, str): - metadata_name = self.standardize_metadata_name(metadata_name_or_class) - # if metadata_name == "geospatial": - # raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") - schema = self.metadata_class_from_name(metadata_name) - writer = self._TYPE_TO_WRITER[metadata_name] - else: - metadata_name = metadata_name_or_class.model_json_schema()["title"] - schema = metadata_name_or_class - writer = write_to_single_sheet - skeleton_object = self.create_metadata_outline(metadata_name_or_class=metadata_name_or_class, debug=False) + metadata_name, version, schema, writer = self._get_name_version_schema_writer( + type(object) + ) # metadata_name_or_class) + skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=False) if filename is None: filename = f"{metadata_name}_metadata.xlsx" @@ -201,7 +215,7 @@ def save_metadata_to_excel( ) combined_dict = standardize_keys_in_dict(combined_dict) new_ob = schema.model_validate(combined_dict) - writer(filename, new_ob, metadata_name, title, verbose=verbose) + writer(filename, new_ob, version, title, verbose=verbose) return filename @staticmethod @@ -222,12 +236,15 @@ def _get_metadata_name_from_excel_file(filename: str) -> str: workbook.close() if not type_info or not isinstance(type_info, str): - raise ValueError(f"Cell C3 is empty or not a string. {error_message}") + raise ValueError(f"Cell C1 is empty or not a string. {error_message}") cell_values = type_info.split(" ") + if cell_values[0] == "Template:": + return " ".join(cell_values[1:]) + if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata": - raise ValueError(f"Cell C3 is improperly formatted. {error_message}") + raise ValueError(f"Cell C1 is improperly formatted. {error_message}") return cell_values[0] @@ -236,7 +253,7 @@ def read_metadata_from_excel( ) -> BaseModel: """ Read in metadata from an appropriately formatted Excel file as a pydantic object. - If using standard metadata types (document, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. + If using standard metadata types (document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. Args: filename (str): The path to the Excel file. diff --git a/pydantic_schemas/tests/test_metadata_manager.py b/pydantic_schemas/tests/test_metadata_manager.py index 40a0b26..f9a6797 100644 --- a/pydantic_schemas/tests/test_metadata_manager.py +++ b/pydantic_schemas/tests/test_metadata_manager.py @@ -1,6 +1,7 @@ import random import string from copy import copy +from typing import List, Optional import pytest from pydantic import BaseModel, ValidationError @@ -162,7 +163,7 @@ def test_metadata_by_name(tmpdir, metadata_name): # Save the read metadata to a new file filename2 = tmpdir.join(f"test_{metadata_name}_save.xlsx") - mm.save_metadata_to_excel(metadata_name_or_class=metadata_name, object=tmp, filename=filename2, title=metadata_name) + mm.save_metadata_to_excel(object=tmp, filename=filename2, title=metadata_name) for i in range(10): modl = mm.create_metadata_outline(metadata_name_or_class=metadata_name) @@ -171,9 +172,7 @@ def test_metadata_by_name(tmpdir, metadata_name): # Write filled in metadata filename3 = tmpdir.join(f"test_{metadata_name}_{i}.xlsx") # filename3 = f"test_{metadata_name}_{i}.xlsx" - mm.save_metadata_to_excel( - metadata_name_or_class=metadata_name, object=modl, filename=filename3, title=metadata_name - ) + mm.save_metadata_to_excel(object=modl, filename=filename3, title=metadata_name) # Read the metadata back actual = mm.read_metadata_from_excel(filename=filename3) @@ -199,7 +198,7 @@ def test_metadata_by_class(tmpdir, metadata_name): filename=tmpdir.join(f"test_class_{metadata_name}.xlsx"), title=metadata_name, ) - mm.read_metadata_from_excel(filename=filename_class, metadata_class=metadata_class) + mm.read_metadata_from_excel(filename=filename_class) def test_standardize_metadata_name(): @@ -244,3 +243,43 @@ def test_standardize_metadata_name(): with pytest.raises(ValueError): mm.standardize_metadata_name("Bad-name") + + +def test_write_read_and_save_for_templates(tmpdir): + class Simple(BaseModel): + a: str + b: List[str] + + class Midlevel(BaseModel): + c: Optional[str] = None + d: Optional[List[Simple]] + + class TopLevel(BaseModel): + e: Optional[Midlevel] + f: Optional[int] + + mm = MetadataManager() + filename1 = tmpdir.join(f"test_templates_1.xlsx") + + mm.write_metadata_outline_to_excel(TopLevel, filename=filename1, title="Outline Test") + + assert mm._get_metadata_name_from_excel_file(filename1) == "TopLevel" + + example = TopLevel( + e=Midlevel( + c="c_value", + d=[ + Simple(a="a_value", b=["the", "quick", "brown", "fox"]), + Simple(a="a_value_2", b=["jumped", "over", "the", "lazy", "dog"]), + ], + ), + f=99, + ) + + filename2 = tmpdir.join(f"test_templates_2.xlsx") + mm.save_metadata_to_excel(example, filename2) + + assert mm._get_metadata_name_from_excel_file(filename2) == "TopLevel" + + actual = mm.read_metadata_from_excel(filename2, TopLevel) + assert actual == example diff --git a/pydantic_schemas/utils/excel_to_pydantic.py b/pydantic_schemas/utils/excel_to_pydantic.py index aef3fd2..775ee00 100644 --- a/pydantic_schemas/utils/excel_to_pydantic.py +++ b/pydantic_schemas/utils/excel_to_pydantic.py @@ -5,8 +5,8 @@ import numpy as np import pandas as pd from pydantic import BaseModel, create_model -from utils.pydantic_to_excel import pydantic_to_dataframe +from ..utils.pydantic_to_excel import pydantic_to_dataframe from .quick_start import make_skeleton from .utils import ( annotation_contains_pydantic, @@ -14,7 +14,6 @@ is_dict_annotation, is_list_annotation, is_optional_annotation, - is_optional_list, seperate_simple_from_pydantic, standardize_keys_in_dict, subset_pydantic_model_type, diff --git a/pydantic_schemas/utils/pydantic_to_excel.py b/pydantic_schemas/utils/pydantic_to_excel.py index 84a2e7b..d8b0c30 100644 --- a/pydantic_schemas/utils/pydantic_to_excel.py +++ b/pydantic_schemas/utils/pydantic_to_excel.py @@ -1,12 +1,9 @@ import copy -import importlib.metadata import json import os from enum import Enum from typing import List, Optional, Tuple, Union, get_args -__version__ = importlib.metadata.version("metadataschemas") - import pandas as pd from openpyxl import Workbook, load_workbook from openpyxl.styles import Alignment, Border, Font, PatternFill, Protection, Side @@ -507,15 +504,12 @@ def create_sheet(workbook, sheetname, sheet_number): return new_sheet -def write_to_single_sheet( - doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, verbose=False -): +def write_to_single_sheet(doc_filepath: str, ob: BaseModel, version: str, title: Optional[str] = None, verbose=False): model_default_name = ob.model_json_schema()["title"] if title is None: title = model_default_name wb = open_or_create_workbook(doc_filepath) ws = create_sheet(wb, "metadata", sheet_number=0) - version = f"{metadata_type} type metadata version {__version__}" current_row = write_title_and_version_info(ws, title, version, protect_title=False) current_row = write_pydantic_to_sheet(ws, ob, current_row, debug=verbose) correct_column_widths(worksheet=ws) @@ -525,11 +519,10 @@ def write_to_single_sheet( def write_across_many_sheets( - doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, verbose=False + doc_filepath: str, ob: BaseModel, version: str, title: Optional[str] = None, verbose=False ): wb = open_or_create_workbook(doc_filepath) ws = create_sheet(wb, "metadata", sheet_number=0) - version = f"{metadata_type} type metadata version {__version__}" current_row = write_title_and_version_info(ws, title, version, protect_title=False) children = seperate_simple_from_pydantic(ob) diff --git a/pydantic_schemas/utils/schema_base_model.py b/pydantic_schemas/utils/schema_base_model.py index 90d8df9..a5e67f7 100644 --- a/pydantic_schemas/utils/schema_base_model.py +++ b/pydantic_schemas/utils/schema_base_model.py @@ -3,9 +3,5 @@ class SchemaBaseModel(BaseModel): model_config = ConfigDict( - validate_assignment=True, protected_namespaces=(), use_enum_values=True, extra="forbid" + validate_assignment=True, protected_namespaces=(), use_enum_values=True, extra="ignore" ) # if a subclass has a model_config then this will be overridden - - def __setitem__(self, key, value): - """Allow dict like setting: Model[key] = value""" - setattr(self, key, value) diff --git a/pydantic_schemas/utils/utils.py b/pydantic_schemas/utils/utils.py index 0ef2a71..579bcf6 100644 --- a/pydantic_schemas/utils/utils.py +++ b/pydantic_schemas/utils/utils.py @@ -253,6 +253,6 @@ def subset_pydantic_model(model: BaseModel, feature_names: List[str], name: Opti input_dict = {k: v for k, v in model.model_dump(mode="json").items() if k in feature_names} input_dict_standardized = standardize_keys_in_dict(input_dict) try: - return SubModel(**input_dict_standardized) + return SubModel.model_validate(input_dict_standardized) except: raise ValueError(input_dict_standardized) diff --git a/pyproject.toml b/pyproject.toml index 09ca40f..29b6516 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ pandas = "^2.2.2" numpy = "^2.1.0" pydantic = "^2.8.0" openpyxl = "^3.1.5" +certifi = "^2024.8.30" [tool.poetry.group.dev.dependencies] pytest = "^8.2.2"