diff --git a/README.md b/README.md index d429065..2cd459a 100644 --- a/README.md +++ b/README.md @@ -79,8 +79,8 @@ survey_metadata.study_desc.title_statement.idno = "project_idno" To update the pydantic schemas so that they match the latest json schemas run - `python pydantic_schemas\\generators\\generate_pydantic_schemas.py` + `python pydantic_schemas/generators/generate_pydantic_schemas.py` Then to update the Excel sheets run - `python pydantic_schemas\\generators\\generate_excel_files.py` \ No newline at end of file + `python pydantic_schemas/generators/generate_excel_files.py` \ No newline at end of file diff --git a/excel_sheets/Document_metadata.xlsx b/excel_sheets/Document_metadata.xlsx index 8b91933..c334e93 100644 Binary files a/excel_sheets/Document_metadata.xlsx and b/excel_sheets/Document_metadata.xlsx differ diff --git a/excel_sheets/Resource_metadata.xlsx b/excel_sheets/Resource_metadata.xlsx new file mode 100644 index 0000000..b9fe85f Binary files /dev/null and b/excel_sheets/Resource_metadata.xlsx differ diff --git a/excel_sheets/Script_metadata.xlsx b/excel_sheets/Script_metadata.xlsx index 66475c7..4977bb1 100644 Binary files a/excel_sheets/Script_metadata.xlsx and b/excel_sheets/Script_metadata.xlsx differ diff --git a/excel_sheets/Series_metadata.xlsx b/excel_sheets/Series_metadata.xlsx deleted file mode 100644 index ba893f3..0000000 Binary files a/excel_sheets/Series_metadata.xlsx and /dev/null differ diff --git a/excel_sheets/Survey_metadata.xlsx b/excel_sheets/Survey_metadata.xlsx index 3be5d6f..23a4e81 100644 Binary files a/excel_sheets/Survey_metadata.xlsx and b/excel_sheets/Survey_metadata.xlsx differ diff --git a/excel_sheets/Table_metadata.xlsx b/excel_sheets/Table_metadata.xlsx index ce38e33..028919e 100644 Binary files a/excel_sheets/Table_metadata.xlsx and b/excel_sheets/Table_metadata.xlsx differ diff --git a/excel_sheets/Timeseries_db_metadata.xlsx b/excel_sheets/Timeseries_db_metadata.xlsx index bab0859..0f2ae54 100644 Binary files a/excel_sheets/Timeseries_db_metadata.xlsx and b/excel_sheets/Timeseries_db_metadata.xlsx differ diff --git a/excel_sheets/Timeseries_metadata.xlsx b/excel_sheets/Timeseries_metadata.xlsx index 0d3fd1a..7b81414 100644 Binary files a/excel_sheets/Timeseries_metadata.xlsx and b/excel_sheets/Timeseries_metadata.xlsx differ diff --git a/excel_sheets/Video_metadata.xlsx b/excel_sheets/Video_metadata.xlsx index 1486a55..d21681a 100644 Binary files a/excel_sheets/Video_metadata.xlsx and b/excel_sheets/Video_metadata.xlsx differ diff --git a/pydantic_schemas/document_schema.py b/pydantic_schemas/document_schema.py index 58e26cd..f53ce7f 100644 --- a/pydantic_schemas/document_schema.py +++ b/pydantic_schemas/document_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: document-schema.json -# timestamp: 2024-07-24T21:06:20+00:00 +# timestamp: 2024-08-29T18:53:37+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pydantic_schemas/generators/generate_excel_files.py b/pydantic_schemas/generators/generate_excel_files.py index 03fdfec..00da0dd 100644 --- a/pydantic_schemas/generators/generate_excel_files.py +++ b/pydantic_schemas/generators/generate_excel_files.py @@ -1,12 +1,14 @@ import os -from pydantic_schemas.schema_interface import SchemaInterface +from pydantic_schemas.metadata_manager import MetadataManager -ei = SchemaInterface() +metadata_manager = MetadataManager() -for metadata_type in ei.list_metadata_types(): - filename = f"excel_sheets/{metadata_type.capitalize()}_metadata.xlsx" - print(f"Writing {metadata_type} outline to {filename}") +for metadata_name in metadata_manager.metadata_type_names: + if metadata_name in ["image", "geospatial"]: + continue + filename = f"excel_sheets/{metadata_name.capitalize()}_metadata.xlsx" + print(f"Writing {metadata_name} outline to {filename}") if os.path.exists(filename): os.remove(filename) - ei.write_outline_metadata_to_excel(metadata_type=metadata_type, filename=filename) + metadata_manager.write_metadata_outline_to_excel(metadata_name_or_class=metadata_name, filename=filename) diff --git a/pydantic_schemas/generators/generate_pydantic_schemas.py b/pydantic_schemas/generators/generate_pydantic_schemas.py index 516a1b0..cd58c5b 100644 --- a/pydantic_schemas/generators/generate_pydantic_schemas.py +++ b/pydantic_schemas/generators/generate_pydantic_schemas.py @@ -4,12 +4,13 @@ SCHEMA_DIR = "schemas" OUTPUT_DIR = os.path.join("pydantic_schemas") PYTHON_VERSION = "3.11" -BASE_CLASS = ".schema_base_model.SchemaBaseModel" +BASE_CLASS = ".utils.schema_base_model.SchemaBaseModel" INPUTS = [ "document-schema.json", "geospatial-schema.json", "image-schema.json", "microdata-schema.json", + "resource-schema.json", "script-schema.json", "table-schema.json", "timeseries-db-schema.json", diff --git a/pydantic_schemas/geospatial_schema.py b/pydantic_schemas/geospatial_schema.py index 96ce18f..5011676 100644 --- a/pydantic_schemas/geospatial_schema.py +++ b/pydantic_schemas/geospatial_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: geospatial-schema.json -# timestamp: 2024-07-24T21:06:22+00:00 +# timestamp: 2024-08-29T18:53:39+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field, confloat -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Producer(SchemaBaseModel): diff --git a/pydantic_schemas/image_schema.py b/pydantic_schemas/image_schema.py index 0fd3700..d617409 100644 --- a/pydantic_schemas/image_schema.py +++ b/pydantic_schemas/image_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: image-schema.json -# timestamp: 2024-07-24T21:06:23+00:00 +# timestamp: 2024-08-29T18:53:41+00:00 from __future__ import annotations @@ -10,7 +10,7 @@ from pydantic import AnyUrl, Extra, Field, confloat, constr -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pydantic_schemas/metadata_manager.py b/pydantic_schemas/metadata_manager.py new file mode 100644 index 0000000..77a3d3f --- /dev/null +++ b/pydantic_schemas/metadata_manager.py @@ -0,0 +1,273 @@ +from copy import copy +from typing import Dict, Optional, Type, Union + +from openpyxl import load_workbook +from pydantic import BaseModel + +from . import ( # image_schema, + document_schema, + geospatial_schema, + microdata_schema, + resource_schema, + script_schema, + table_schema, + timeseries_db_schema, + timeseries_schema, + video_schema, +) +from .utils.excel_to_pydantic import excel_doc_to_pydantic, excel_single_sheet_to_pydantic +from .utils.pydantic_to_excel import write_across_many_sheets, write_to_single_sheet +from .utils.quick_start import make_skeleton +from .utils.utils import merge_dicts, standardize_keys_in_dict + + +class MetadataManager: + """ + Interface with Excel for creating, saving and updating metadata for various types: + documents, scripts, survey, table, timeseries, timeseries_db, video + + Retrieve pydantic model definitions for each metadata type + """ + + _TYPE_TO_SCHEMA = { + "document": document_schema.ScriptSchemaDraft, + "geospatial": geospatial_schema.GeospatialSchema, + # "image":image_schema.ImageDataTypeSchema, + "resource": resource_schema.Model, + "script": script_schema.ResearchProjectSchemaDraft, + "survey": microdata_schema.MicrodataSchema, + "table": table_schema.Model, + "timeseries": timeseries_schema.TimeseriesSchema, + "timeseries_db": timeseries_db_schema.TimeseriesDatabaseSchema, + "video": video_schema.Model, + } + + _TYPE_TO_WRITER = { + "document": write_across_many_sheets, + # "geospatial":, + # "image":, + "resource": write_to_single_sheet, + "script": write_across_many_sheets, + "survey": write_across_many_sheets, + "table": write_across_many_sheets, + "timeseries": write_across_many_sheets, + "timeseries_db": write_to_single_sheet, # one sheet + "video": write_to_single_sheet, # one sheet + } + + _TYPE_TO_READER = { + "document": excel_doc_to_pydantic, + # "geospatial":, + # "image":, + "resource": excel_single_sheet_to_pydantic, + "script": excel_doc_to_pydantic, + "survey": excel_doc_to_pydantic, + "table": excel_doc_to_pydantic, + "timeseries": excel_doc_to_pydantic, + "timeseries_db": excel_single_sheet_to_pydantic, # one sheet + "video": excel_single_sheet_to_pydantic, # one sheet + } + + def metadata_class_from_name(self, metadata_name: str): + metadata_name = self.standardize_metadata_name(metadata_name) + schema = self._TYPE_TO_SCHEMA[metadata_name] + return copy(schema) + + @property + def metadata_type_names(self): + return list(self._TYPE_TO_SCHEMA.keys()) + + def standardize_metadata_name(self, metadata_name: str) -> str: + metadata_name = metadata_name.lower() + metadata_name = metadata_name.replace("-", "_") + if metadata_name == "microdata" or metadata_name == "survey_microdata": + metadata_name = "survey" + self._raise_if_unsupported_metadata_name(metadata_name=metadata_name) + return metadata_name + + def create_metadata_outline( + self, metadata_name_or_class: Union[str, Type[BaseModel]], debug: bool = False + ) -> BaseModel: + if isinstance(metadata_name_or_class, str): + schema = self.metadata_class_from_name(metadata_name_or_class) + else: + schema = metadata_name_or_class + skeleton_object = make_skeleton(schema, debug=debug) + return skeleton_object + + def write_metadata_outline_to_excel( + self, + metadata_name_or_class: Union[str, Type[BaseModel]], + filename: Optional[str] = None, + title: Optional[str] = None, + ) -> str: + """ + Create an Excel file formatted for writing the given metadata_name metadata. + + Args: + metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently: + document, script, series, survey, table, timeseries, timeseries_DB, video + Currently not supported: + geospatial, image + If passed as a BaseModel type, for instance this is what you would do with a template, then the writer + defaults to a single page. + filename (Optional[str]): The path to the Excel file. If None, defaults to {metadata_name}_metadata.xlsx + title (Optional[str]): The title for the Excel sheet. If None, defaults to '{metadata_name} Metadata' + + Returns: + str: filename of metadata file + + Outputs: + An Excel file into which metadata can be entered + """ + if isinstance(metadata_name_or_class, str): + metadata_name = self.standardize_metadata_name(metadata_name_or_class) + if metadata_name == "geospatial": + raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") + skeleton_object = self.create_metadata_outline(metadata_name, debug=False) + writer = self._TYPE_TO_WRITER[metadata_name] + if filename is None: + filename = f"{metadata_name}_metadata.xlsx" + if title is None: + title = f"{metadata_name.capitalize()} Metadata" + else: + skeleton_object = make_skeleton(metadata_name_or_class, debug=False) + writer = write_to_single_sheet + metadata_name = metadata_name_or_class.model_json_schema()["title"] + if filename is None: + filename = f"{metadata_name}_metadata.xlsx" + if title is None: + title = f"{metadata_name.capitalize()} Metadata" + + if not str(filename).endswith(".xlsx"): + filename += ".xlsx" + writer(filename, skeleton_object, metadata_name, title) + return filename + + def save_metadata_to_excel( + self, + metadata_name_or_class: Union[str, Type[BaseModel]], + object: BaseModel, + filename: Optional[str] = None, + title: Optional[str] = None, + ) -> str: + """ + Save an Excel document of the given metadata object. + + Args: + metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently: + document, script, series, survey, table, timeseries, timeseries_DB, video + Currently not supported: + geospatial, image + If passed as a BaseModel type, for instance this is what you would do with a template, then the writer defaults to a single page. + object (BaseModel): The pydantic object to save to the Excel file. + filename (Optional[str]): The path to the Excel file. Defaults to {name}_metadata.xlsx + title (Optional[str]): The title for the Excel sheet. Defaults to '{name} Metadata' + + Returns: + str: filename of metadata file + + Outputs: + An Excel file containing the metadata from the pydantic object. This file can be updated as needed. + """ + if isinstance(metadata_name_or_class, str): + metadata_name = self.standardize_metadata_name(metadata_name_or_class) + if metadata_name == "geospatial": + raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") + schema = self.metadata_class_from_name(metadata_name) + else: + schema = metadata_name_or_class + writer = write_to_single_sheet + metadata_name = metadata_name_or_class.model_json_schema()["title"] + skeleton_object = self.create_metadata_outline(metadata_name_or_class=metadata_name_or_class, debug=False) + + if filename is None: + filename = f"{metadata_name}_metadata.xlsx" + if not str(filename).endswith(".xlsx"): + filename += ".xlsx" + if title is None: + title = f"{metadata_name.capitalize()} Metadata" + + combined_dict = merge_dicts( + skeleton_object.model_dump(), + object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True), + ) + combined_dict = standardize_keys_in_dict(combined_dict) + new_ob = schema(**combined_dict) + + writer = self._TYPE_TO_WRITER[metadata_name] + writer(filename, new_ob, metadata_name, title) + return filename + + @staticmethod + def _get_metadata_name_from_excel_file(filename: str) -> str: + error_message = "Improperly formatted Excel file for metadata" + workbook = load_workbook(filename) + # Select the 'metadata' sheet + try: + sheet = workbook["metadata"] + # Get the value of cell C1 + type_info = sheet["C1"].value + except KeyError: + raise ValueError(f"Sheet 'metadata' not found. {error_message}") + except Exception as e: + raise ValueError(f"Error reading Excel file: {e}") + finally: + # Close the workbook + workbook.close() + + if not type_info or not isinstance(type_info, str): + raise ValueError(f"Cell C3 is empty or not a string. {error_message}") + + cell_values = type_info.split(" ") + + if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata": + raise ValueError(f"Cell C3 is improperly formatted. {error_message}") + + return cell_values[0] + + def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[BaseModel]] = None) -> BaseModel: + """ + Read in metadata from an appropriately formatted Excel file as a pydantic object. + If using s standard metadata types (documents, scripts, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. + Args: + filename (str): The path to the Excel file. + metadata_class (Optional type of BaseModel): A pudantic class type correspondong to the type used to write the Excel file + + Returns: + BaseModel: a pydantic object containing the metadata from the file + """ + metadata_name = self._get_metadata_name_from_excel_file(filename) + try: + metadata_name = self.standardize_metadata_name(metadata_name) + schema = self._TYPE_TO_SCHEMA[metadata_name] + reader = self._TYPE_TO_READER[metadata_name] + except ValueError: + if metadata_class is None: + raise ValueError( + f"'{metadata_name}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())} or try passing in the metadata_class" + ) + schema = metadata_class + reader = excel_single_sheet_to_pydantic + read_object = reader(filename, schema) + + skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=False) + + read_object_dict = read_object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True) + combined_dict = merge_dicts( + skeleton_object.model_dump(), + read_object_dict, + ) + combined_dict = standardize_keys_in_dict(combined_dict) + new_ob = schema(**combined_dict) + return new_ob + + def _raise_if_unsupported_metadata_name(self, metadata_name: str): + """ + If the type is specifically unsupported - geospatial or image - a NotImplementedError is raised + If the type is simply unknown then a ValueError is raised. + """ + if metadata_name == "image": + raise NotImplementedError("Due to an issue with image metadata schema definition causing __root__ errors") + if metadata_name not in self._TYPE_TO_SCHEMA.keys(): + raise ValueError(f"'{metadata_name}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())}") diff --git a/pydantic_schemas/microdata_schema.py b/pydantic_schemas/microdata_schema.py index f3bb4d3..28646c2 100644 --- a/pydantic_schemas/microdata_schema.py +++ b/pydantic_schemas/microdata_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: microdata-schema.json -# timestamp: 2024-07-24T21:06:25+00:00 +# timestamp: 2024-08-29T18:53:43+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field, constr -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class AccessPolicy(Enum): diff --git a/pydantic_schemas/resource_schema.py b/pydantic_schemas/resource_schema.py new file mode 100644 index 0000000..de2a619 --- /dev/null +++ b/pydantic_schemas/resource_schema.py @@ -0,0 +1,64 @@ +# generated by datamodel-codegen: +# filename: resource-schema.json +# timestamp: 2024-08-29T18:53:45+00:00 + +from __future__ import annotations + +from typing import Optional + +from pydantic import Field + +from .utils.schema_base_model import SchemaBaseModel + + +class Model(SchemaBaseModel): + """ + External resource schema + """ + + dctype: Optional[str] = Field( + "doc/oth", + description=( + "Document types for external resource e.g. `doc/adm` \n* `doc/adm` - Document, Administrative [doc/adm] \n*" + " `doc/anl` - Document, Analytical [doc/anl] \n* `doc/oth` - Document, Other [doc/oth] \n* `doc/qst` -" + " Document, Questionnaire [doc/qst] \n* `doc/ref` - Document, Reference [doc/ref] \n* `doc/rep` - Document," + " Report [doc/rep] \n* `doc/tec` - Document, Technical [doc/tec] \n* `aud` - Audio [aud]\n* `dat` -" + " Database [dat]\n* `map` - Map [map]\n* `dat/micro` - Microdata File [dat/micro]\n* `pic` - Photo [pic]\n*" + " `prg` - Program [prg]\n* `tbl` - Table [tbl]\n* `vid` - Video [vid] \n* `web` - Web Site [web]" + ), + title="Resource type", + ) + dcformat: Optional[str] = Field( + None, + description=( + "Document file format e.g. `application/zip` \n* `application/x-compressed` - Compressed, Generic \n*" + " `application/zip` - Compressed, ZIP \n* `application/x-cspro` - Data, CSPro \n* `application/dbase` -" + " Data, dBase \n* `application/msaccess` - Data, Microsoft Access \n* `application/x-sas` - Data, SAS " + " \n* `application/x-spss` - Data, SPSS \n* `application/x-stata` - Data, Stata \n* `text` - Document," + " Generic \n* `text/html` - Document, HTML \n* `application/msexcel` - Document, Microsoft Excel \n*" + " `application/mspowerpoint` - Document, Microsoft PowerPoint \n* `application/msword` - Document," + " Microsoft Word \n* `application/pdf` - Document, PDF \n* `application/postscript` - Document," + " Postscript \n* `text/plain` - Document, Plain \n* `text/wordperfect` - Document, WordPerfect \n*" + " `image/gif` - Image, GIF \n* `image/jpeg` - Image, JPEG \n* `image/png` - Image, PNG \n*" + " `image/tiff` - Image, TIFF" + ), + title="Resource Format", + ) + title: str = Field(..., description="Title") + author: Optional[str] = Field(None, description="Author") + dcdate: Optional[str] = Field(None, description="Date") + country: Optional[str] = Field(None, description="Country") + language: Optional[str] = Field(None, description="Language") + contributor: Optional[str] = Field(None, description="Contributor") + publisher: Optional[str] = Field(None, description="Publisher") + rights: Optional[str] = Field(None, description="Rights") + description: Optional[str] = Field(None, description="Description") + abstract: Optional[str] = Field(None, description="Abstract") + toc: Optional[str] = Field(None, description="TOC") + filename: Optional[str] = Field( + None, + description=( + "Resource file name or URL. For uploading a file, use the field `file` in formData or use the `Upload file`" + " endpoint." + ), + ) diff --git a/pydantic_schemas/schema_interface.py b/pydantic_schemas/schema_interface.py deleted file mode 100644 index 5bf3a13..0000000 --- a/pydantic_schemas/schema_interface.py +++ /dev/null @@ -1,276 +0,0 @@ -from typing import Dict, Optional, Type - -from openpyxl import load_workbook -from pydantic import BaseModel - -from . import ( # image_schema, - document_schema, - geospatial_schema, - microdata_schema, - script_schema, - table_schema, - timeseries_db_schema, - timeseries_schema, - video_schema, -) -from .utils.excel_to_pydantic import excel_doc_to_pydantic, excel_single_sheet_to_pydantic -from .utils.pydantic_to_excel import write_across_many_sheets, write_to_single_sheet -from .utils.quick_start import make_skeleton -from .utils.template_to_pydantic import pydantic_from_template -from .utils.utils import standardize_keys_in_dict - - -class SchemaInterface: - """ - Interface with Excel for creating, saving and updating metadata for various types: - documents, scripts, survey, table, timeseries, timeseries_db, video - - Retrieve pydantic model definitions for each metadata type - """ - - _TYPE_TO_SCHEMA = { - "document": document_schema.ScriptSchemaDraft, - "geospatial": geospatial_schema.GeospatialSchema, - # "image":image_schema.ImageDataTypeSchema, - "script": script_schema.ResearchProjectSchemaDraft, - "survey": microdata_schema.MicrodataSchema, - "table": table_schema.Model, - "timeseries": timeseries_schema.TimeseriesSchema, - "timeseries_db": timeseries_db_schema.TimeseriesDatabaseSchema, - "video": video_schema.Model, - } - - _TYPE_TO_WRITER = { - "document": write_across_many_sheets, - # "geospatial":, - # "image":, - "script": write_across_many_sheets, - "survey": write_across_many_sheets, - "table": write_across_many_sheets, - "timeseries": write_across_many_sheets, - "timeseries_db": write_to_single_sheet, # one sheet - "video": write_to_single_sheet, # one sheet - } - - _TYPE_TO_READER = { - "document": excel_doc_to_pydantic, - # "geospatial":, - # "image":, - "script": excel_doc_to_pydantic, - "survey": excel_doc_to_pydantic, - "table": excel_doc_to_pydantic, - "timeseries": excel_doc_to_pydantic, - "timeseries_db": excel_single_sheet_to_pydantic, # one sheet - "video": excel_single_sheet_to_pydantic, # one sheet - } - - def get_metadata_class(self, metadata_type: str): - metadata_type = self.standardize_metadata_type_name(metadata_type) - schema = self._TYPE_TO_SCHEMA[metadata_type] - return schema - - def template_to_pydantic( - self, template: Dict, parent_schema_type: str, name: Optional[str] = None - ) -> Type[BaseModel]: - schema = self.get_metadata_class(parent_schema_type) - - return pydantic_from_template(template, schema, name) - - def list_metadata_types(self): - return list(self._TYPE_TO_SCHEMA.keys()) - - @staticmethod - def _merge_dicts(base, update): - if len(update) == 0: - return base - new_dict = {} - for key, base_value in base.items(): - if key in update: - update_value = update[key] - if isinstance(base_value, dict): - if isinstance(update_value, dict) and len(update_value) > 0: - new_dict[key] = SchemaInterface._merge_dicts(base_value, update_value) - else: - new_dict[key] = base_value - elif isinstance(base_value, list): - if isinstance(update_value, list) and len(update_value) > 0: - new_list = [] - min_length = min(len(base_value), len(update_value)) - for i in range(min_length): - if isinstance(base_value[i], dict): - if isinstance(update_value[i], dict): - new_list.append(SchemaInterface._merge_dicts(base_value[i], update_value[i])) - else: - new_list.append(base_value[i]) - else: - new_list.append(update_value[i]) - new_list.extend(update_value[min_length:]) - new_dict[key] = new_list - else: - new_dict[key] = base_value - else: - if update_value is not None: - new_dict[key] = update_value - else: - new_dict[key] = base_value - else: - new_dict[key] = base_value - return new_dict - - def standardize_metadata_type_name(self, metadata_type: str) -> str: - metadata_type = metadata_type.lower() - metadata_type = metadata_type.replace("-", "_") - if metadata_type == "microdata" or metadata_type == "survey_microdata": - metadata_type = "survey" - self._raise_if_unsupported_metadata_type(metadata_type=metadata_type) - return metadata_type - - def type_to_outline(self, metadata_type: str, debug: bool = False) -> BaseModel: - schema = self.get_metadata_class(metadata_type) - skeleton_object = make_skeleton(schema, debug=debug) - return skeleton_object - - def write_outline_metadata_to_excel( - self, metadata_type: str, filename: Optional[str] = None, title: Optional[str] = None - ) -> str: - """ - Create an Excel file formatted for writing the given metadata_type metadata. - - Args: - metadata_type (str): the name of a supported metadata type, currently: - document, script, series, survey, table, timeseries, timeseries_DB, video - Currently not supported: - geospatial, image - filename (Optional[str]): The path to the Excel file. If None, defaults to {metadata_type}_metadata.xlsx - title (Optional[str]): The title for the Excel sheet. If None, defaults to '{metadata_type} Metadata' - - Returns: - str: filename of metadata file - - Outputs: - An Excel file into which metadata can be entered - """ - metadata_type = self.standardize_metadata_type_name(metadata_type) - if metadata_type == "geospatial": - raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") - - if filename is None: - filename = f"{metadata_type}_metadata.xlsx" - if not str(filename).endswith(".xlsx"): - filename += ".xlsx" - if title is None: - title = f"{metadata_type.capitalize()} Metadata" - skeleton_object = self.type_to_outline(metadata_type, debug=False) - writer = self._TYPE_TO_WRITER[metadata_type] - writer(filename, skeleton_object, metadata_type, title) - return filename - - def save_metadata_to_excel( - self, metadata_type: str, object: BaseModel, filename: Optional[str] = None, title: Optional[str] = None - ) -> str: - """ - Save an Excel document of the given metadata_type metadata. - - Args: - metadata_type (str): the name of a supported metadata type, currently: - document, script, series, survey, table, timeseries, timeseries_db, video - Currently not supported: - geospatial, image - object (BaseModel): The pydantic object to save to the Excel file. - filename (Optional[str]): The path to the Excel file. Defaults to {name}_metadata.xlsx - title (Optional[str]): The title for the Excel sheet. Defaults to '{name} Metadata' - - Returns: - str: filename of metadata file - - Outputs: - An Excel file containing the metadata from the pydantic object. This file can be updated as needed. - """ - metadata_type = self.standardize_metadata_type_name(metadata_type) - if metadata_type == "geospatial": - raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") - - if filename is None: - filename = f"{metadata_type}_metadata.xlsx" - if not str(filename).endswith(".xlsx"): - filename += ".xlsx" - if title is None: - title = f"{metadata_type.capitalize()} Metadata" - - skeleton_object = self.type_to_outline(metadata_type=metadata_type, debug=False) - combined_dict = self._merge_dicts( - skeleton_object.model_dump(), - object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True), - ) - combined_dict = standardize_keys_in_dict(combined_dict) - - schema = self._TYPE_TO_SCHEMA[metadata_type] - new_ob = schema(**combined_dict) - - writer = self._TYPE_TO_WRITER[metadata_type] - writer(filename, new_ob, metadata_type, title) - return filename - - @staticmethod - def _get_metadata_type_from_excel_file(filename: str) -> str: - error_message = "Improperly formatted Excel file for metadata" - workbook = load_workbook(filename) - # Select the 'metadata' sheet - try: - sheet = workbook["metadata"] - # Get the value of cell C1 - type_info = sheet["C1"].value - except KeyError: - raise ValueError(f"Sheet 'metadata' not found. {error_message}") - except Exception as e: - raise ValueError(f"Error reading Excel file: {e}") - finally: - # Close the workbook - workbook.close() - - if not type_info or not isinstance(type_info, str): - raise ValueError(f"Cell C3 is empty or not a string. {error_message}") - - cell_values = type_info.split(" ") - - if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata": - raise ValueError(f"Cell C3 is improperly formatted. {error_message}") - - return cell_values[0] - - def read_metadata_from_excel(self, filename: str) -> BaseModel: - """ - Read in metadata_type metadata from an appropriately formatted Excel file as a pydantic object. - - Args: - filename (str): The path to the Excel file. - - Returns: - BaseModel: a pydantic object containing the metadata from the file - """ - metadata_type = self._get_metadata_type_from_excel_file(filename) - metadata_type = self.standardize_metadata_type_name(metadata_type) - schema = self._TYPE_TO_SCHEMA[metadata_type] - reader = self._TYPE_TO_READER[metadata_type] - read_object = reader(filename, schema) - skeleton_object = self.type_to_outline(metadata_type=metadata_type, debug=False) - - read_object_dict = read_object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True) - combined_dict = self._merge_dicts( - skeleton_object.model_dump(), - read_object_dict, - ) - combined_dict = standardize_keys_in_dict(combined_dict) - schema = self._TYPE_TO_SCHEMA[metadata_type] - new_ob = schema(**combined_dict) - return new_ob - - def _raise_if_unsupported_metadata_type(self, metadata_type: str): - """ - If the type is specifically unsupported - geospatial or image - a NotImplementedError is raised - If the type is simply unknown then a ValueError is raised. - """ - if metadata_type == "image": - raise NotImplementedError("Due to an issue with image metadata schema definition causing __root__ errors") - if metadata_type not in self._TYPE_TO_SCHEMA.keys(): - raise ValueError(f"'{metadata_type}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())}") diff --git a/pydantic_schemas/script_schema.py b/pydantic_schemas/script_schema.py index 17ef719..cb9e2ee 100644 --- a/pydantic_schemas/script_schema.py +++ b/pydantic_schemas/script_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: script-schema.json -# timestamp: 2024-07-24T21:06:27+00:00 +# timestamp: 2024-08-29T18:53:47+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pydantic_schemas/table_schema.py b/pydantic_schemas/table_schema.py index d04c0bb..6c0a88d 100644 --- a/pydantic_schemas/table_schema.py +++ b/pydantic_schemas/table_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: table-schema.json -# timestamp: 2024-07-24T21:06:30+00:00 +# timestamp: 2024-08-29T18:53:48+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pydantic_schemas/tests/test_excel_interface.py b/pydantic_schemas/tests/test_excel_interface.py index 4d6203e..543501d 100644 --- a/pydantic_schemas/tests/test_excel_interface.py +++ b/pydantic_schemas/tests/test_excel_interface.py @@ -1,25 +1,25 @@ import pytest -from pydantic_schemas.schema_interface import SchemaInterface +from pydantic_schemas.metadata_manager import MetadataManager @pytest.mark.parametrize( - "metadata_type", ["document", "script", "series", "survey", "table", "timeseries_db", "timeseries", "video"] + "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"] ) -def test_metadata(tmpdir, metadata_type): - ei = SchemaInterface() +def test_metadata(tmpdir, metadata_name): + ei = MetadataManager() # Write empty metadata - filename = ei.write_outline_metadata_to_excel( - metadata_type=metadata_type, filename=tmpdir.join(f"test_{metadata_type}.xlsx"), title=metadata_type + filename = ei.write_metadata_outline_to_excel( + metadata_name_or_class=metadata_name, filename=tmpdir.join(f"test_{metadata_name}.xlsx"), title=metadata_name ) # Read the metadata back tmp = ei.read_metadata_from_excel(filename=filename) # Save the read metadata to a new file - filename2 = tmpdir.join(f"test_{metadata_type}_2.xlsx") - ei.save_metadata_to_excel(metadata_type=metadata_type, object=tmp, filename=filename2, title=metadata_type) + filename2 = tmpdir.join(f"test_{metadata_name}_2.xlsx") + ei.save_metadata_to_excel(metadata_name_or_class=metadata_name, object=tmp, filename=filename2, title=metadata_name) # make an outline object - ei.type_to_outline(metadata_type=metadata_type) + ei.create_metadata_outline(metadata_name_or_class=metadata_name) diff --git a/pydantic_schemas/tests/test_pydantic_to_excel.py b/pydantic_schemas/tests/test_pydantic_to_excel.py index ef64653..6090fd1 100644 --- a/pydantic_schemas/tests/test_pydantic_to_excel.py +++ b/pydantic_schemas/tests/test_pydantic_to_excel.py @@ -12,7 +12,6 @@ # from pydantic_schemas.definitions.image_schema import ImageDataTypeSchema from pydantic_schemas.microdata_schema import MicrodataSchema from pydantic_schemas.script_schema import ResearchProjectSchemaDraft -from pydantic_schemas.series_schema import Series from pydantic_schemas.table_schema import Model as TableModel from pydantic_schemas.timeseries_db_schema import TimeseriesDatabaseSchema from pydantic_schemas.timeseries_schema import TimeseriesSchema @@ -365,7 +364,6 @@ class WithDict(BaseModel): # "Image":ImageDataTypeSchema, "Survey": (MicrodataSchema, write_across_many_sheets, excel_doc_to_pydantic), "Script": (ResearchProjectSchemaDraft, write_across_many_sheets, excel_doc_to_pydantic), - "Series": (Series, write_to_single_sheet, excel_single_sheet_to_pydantic), # should be one sheet "Table": (TableModel, write_across_many_sheets, excel_doc_to_pydantic), "Timeseries_DB": ( TimeseriesDatabaseSchema, diff --git a/pydantic_schemas/timeseries_db_schema.py b/pydantic_schemas/timeseries_db_schema.py index 6c5bc95..add308e 100644 --- a/pydantic_schemas/timeseries_db_schema.py +++ b/pydantic_schemas/timeseries_db_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: timeseries-db-schema.json -# timestamp: 2024-07-24T21:06:31+00:00 +# timestamp: 2024-08-29T18:53:50+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pydantic_schemas/timeseries_schema.py b/pydantic_schemas/timeseries_schema.py index b3f39a2..dd5dcab 100644 --- a/pydantic_schemas/timeseries_schema.py +++ b/pydantic_schemas/timeseries_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: timeseries-schema.json -# timestamp: 2024-07-24T21:06:33+00:00 +# timestamp: 2024-08-29T18:53:52+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Producer(SchemaBaseModel): diff --git a/pydantic_schemas/utils/quick_start.py b/pydantic_schemas/utils/quick_start.py index ece358f..de09833 100644 --- a/pydantic_schemas/utils/quick_start.py +++ b/pydantic_schemas/utils/quick_start.py @@ -14,7 +14,6 @@ # "image_schema": "ImageDataTypeSchema", "microdata_schema": "MicrodataSchema", "script_schema": "ResearchProjectSchemaDraft", - "series_schema": "Series", "table_schema": "Model", "timeseries_db_schema": "TimeseriesDatabaseSchema", "timeseries_schema": "TimeseriesSchema", diff --git a/pydantic_schemas/schema_base_model.py b/pydantic_schemas/utils/schema_base_model.py similarity index 100% rename from pydantic_schemas/schema_base_model.py rename to pydantic_schemas/utils/schema_base_model.py diff --git a/pydantic_schemas/utils/template_to_pydantic.py b/pydantic_schemas/utils/template_to_pydantic.py deleted file mode 100644 index f44d311..0000000 --- a/pydantic_schemas/utils/template_to_pydantic.py +++ /dev/null @@ -1,189 +0,0 @@ -import warnings -from typing import Dict, List, Optional, Tuple, Type - -from pydantic import BaseModel, Field, create_model - -from .utils import get_subtype_of_optional_or_list, is_list_annotation, is_optional_annotation, standardize_keys_in_dict - - -def get_child_field_info_from_dot_annotated_name(name, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "get_child_field_info_from_dot_annotated_name" - name_split = name.split(".") - for key in name_split[:-1]: - parent_schema = parent_schema.model_fields[key].annotation - if is_optional_annotation(parent_schema) or is_list_annotation(parent_schema): - parent_schema = get_subtype_of_optional_or_list(parent_schema) - if not isinstance(parent_schema, type(BaseModel)): - raise KeyError(name) - try: - child_field_info = parent_schema.model_fields[name_split[-1]] - except KeyError as e: - raise KeyError(name) from e - except: - raise ValueError(f"name={name}, parent_schema={parent_schema}") - return child_field_info - - -def define_simple_element(item, parent_schema, element_type=str): - assert isinstance(parent_schema, type(BaseModel)), "define_simple_element" - assert ( - isinstance(item, dict) and "type" in item and item["type"] in ["string", "text", "integer", "number", "boolean"] - ), f"expected string, integer or boolean item, got {item}" - try: - child_field_info = get_child_field_info_from_dot_annotated_name(item["key"], parent_schema) - if "title" in item: - child_field_info.title = item["title"] - if "description" in item: - child_field_info.description = item["description"] - except KeyError as e: - warnings.warn(f"KeyError: {e}. Proceeding since {item['key']} is a string type.", UserWarning) - child_field_info = Field(..., title=item["title"]) - if "help_text" in item: - child_field_info.description = item["help_text"] - if "required" in item and item["required"]: - field_type = element_type, child_field_info - else: - child_field_info.default = None - field_type = Optional[element_type], child_field_info - return {item["key"]: field_type} - - -def get_children_of_props(props, parent_schema) -> Dict[str, Tuple["type_annotation", "field_info"]]: - assert isinstance(parent_schema, type(BaseModel)), "get_children_of_props" - children = {} - for prop in props: - if "prop_key" not in prop: - children.update(template_type_handler(prop, parent_schema)) - else: - name = prop["prop_key"] - try: - child_field_info = get_child_field_info_from_dot_annotated_name(name, parent_schema) - if "title" in prop: - child_field_info.title = prop["title"] - if "help_text" in prop: - child_field_info.description = prop["help_text"] - child_field = child_field_info.annotation, child_field_info - children[prop["key"]] = child_field - except KeyError as e: - children.update(template_type_handler(prop, parent_schema)) - return children - - -def define_array_element(item, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "define_array_element" - assert "type" in item and ( - item["type"] == "array" or item["type"] == "nested_array" - ), f"expected array item but got {item}" - assert "key" in item, f"expected key in item but got {item.keys()}" - if "props" not in item: - warnings.warn(f"array without type found, assuming array of str: {item}") - field_info = Field(..., title=item["title"]) - if "help_text" in item: - field_info.description = item["help_text"] - return {item["key"]: (List[str], field_info)} - else: - children = get_children_of_props(item["props"], parent_schema) - item_element = create_model(f"{item['key']}_item", **children) - return {item["key"]: (List[item_element], item_element)} - - -def define_simple_array_element(item, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "define_simple_array_element" - assert ( - isinstance(item, dict) and "type" in item and item["type"] == "simple_array" - ), f"expected simple_array item, got {item}" - try: - child_field_info = get_child_field_info_from_dot_annotated_name(item["key"], parent_schema) - if "title" in item: - child_field_info.title = item["title"] - if "description" in item: - child_field_info.description = item["description"] - except KeyError as e: - warnings.warn(f"KeyError: {e}. Proceeding since {item['key']} is a simple_array type.", UserWarning) - child_field_info = Field(..., title=item["title"]) - if "help_test" in item: - child_field_info.description = item["help_text"] - if "required" in item and item["required"]: - field_type = List[str], child_field_info - else: - child_field_info.default = None - field_type = Optional[List[str]], child_field_info - return {item["key"]: field_type} - - -def define_from_section_container(item, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "define_from_section_container" - assert ( - isinstance(item, dict) and "type" in item and item["type"] == "section_container" - ), f"expected section_container got {item}" - name = item["key"] - sub_model = create_model(name, **define_group_of_elements(item["items"], parent_schema)) - sub_field = Field(...) - if "title" in item: - sub_field.title = item["title"] - if "required" not in item or not item["required"]: - sub_field.default = None - return {name: (sub_model, sub_field)} - - -def define_group_of_elements(items, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "define_group_of_elements" - elements = {} - for i, item in enumerate(items): - if "is_custom" in item and item["is_custom"] == True: - if "additional" not in elements: - elements["additional"] = {} - elements["additional"].update(template_type_handler(item, parent_schema)) - elements["additional"] = standardize_keys_in_dict(elements["additional"], pascal_to_snake=True) - else: - elements.update(template_type_handler(item, parent_schema)) - elements = standardize_keys_in_dict(elements, pascal_to_snake=True) - if "additional" in elements: - additional = elements.pop("additional") - additional = create_model("additional", **additional) - sub_field = Field(...) - sub_field.title = "additional" - elements["additional"] = additional, sub_field - return elements - - -def template_type_handler(item, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "template_type_handler" - if item["type"] == "section_container": - return define_from_section_container(item, parent_schema) - elif item["type"] in ["string", "text"]: - return define_simple_element(item, parent_schema, str) - elif item["type"] in ["integer", "number"]: - return define_simple_element(item, parent_schema, int) - elif item["type"] == "boolean": - return define_simple_element(item, parent_schema, bool) - elif item["type"] in ["array", "nested_array"]: - return define_array_element(item, parent_schema) - elif item["type"] == "simple_array": - return define_simple_array_element(item, parent_schema) - elif item["type"] == "section": - warnings.warn(f"encountered section {item['key']}, {item['title']}, ignoring this heirarchy and appending") - if "items" in item: - return define_group_of_elements(item["items"], parent_schema) - elif "props" in item: - return define_group_of_elements(item["props"], parent_schema) - else: - raise ValueError(f"section does not contain items or props, found only {item}") - else: - raise NotImplementedError(f"type {item['type']}, {item}") - - -def pydantic_from_template( - template: Dict, parent_schema: Type[BaseModel], name: Optional[str] = None -) -> Type[BaseModel]: - assert isinstance(parent_schema, type(BaseModel)), "pydantic_from_template" - assert "items" in template, f"expected 'items' in template but got {list(template.keys())}" - m = define_group_of_elements(template["items"], parent_schema) - m = standardize_keys_in_dict(m, pascal_to_snake=True) - if name is None: - if "title" in template: - name = template["title"] - else: - name = "new_model" - name = name.replace(" ", "_").rstrip("_").split(".")[-1] - return create_model(name, **m) diff --git a/pydantic_schemas/utils/utils.py b/pydantic_schemas/utils/utils.py index 1e4a923..a7b3d02 100644 --- a/pydantic_schemas/utils/utils.py +++ b/pydantic_schemas/utils/utils.py @@ -110,6 +110,62 @@ def seperate_simple_from_pydantic(ob: BaseModel) -> Dict[str, Dict]: return {"simple": simple_children, "pydantic": pydantic_children} +def merge_dicts(base, update): + """merge a pair of dicitonaries in which the values are themselves either dictionaries to be merged or lists of + dictionaries to be merged""" + if len(update) == 0: + return base + elif len(base) == 0: + return update + new_dict = {} + for key, base_value in base.items(): + if key in update: + update_value = update[key] + if isinstance(base_value, dict): + if isinstance(update_value, dict): + new_dict[key] = merge_dicts(base_value, update_value) + else: + new_dict[key] = base_value + elif isinstance(base_value, list): + if isinstance(update_value, list) and len(update_value) > 0: + new_list = [] + min_length = min(len(base_value), len(update_value)) + for i in range(min_length): + if isinstance(base_value[i], dict): + if isinstance(update_value[i], dict): + new_list.append(merge_dicts(base_value[i], update_value[i])) + else: + new_list.append(base_value[i]) + else: + new_list.append(update_value[i]) + new_list.extend(update_value[min_length:]) + new_dict[key] = new_list + else: + new_dict[key] = base_value + else: + if update_value is not None: + new_dict[key] = update_value + else: + new_dict[key] = base_value + else: + new_dict[key] = base_value + for key, update_value in update.items(): + if key not in base: + new_dict[key] = update_value + return new_dict + + +def capitalize_first_letter(s): + if s: + return s[0].upper() + s[1:] + return s + + +def split_on_capitals(s): + # Use regular expression to split on capitalized letters + return re.findall(r"[a-z]+|[A-Z][a-z]*", s) + + def _standardize_keys_in_list_of_possible_dicts(lst: List[any], snake_to_pascal, pascal_to_snake) -> List[Any]: new_value = [] for item in lst: @@ -128,17 +184,6 @@ def _standardize_keys_in_list_of_possible_dicts(lst: List[any], snake_to_pascal, return new_value -def capitalize_first_letter(s): - if s: - return s[0].upper() + s[1:] - return s - - -def split_on_capitals(s): - # Use regular expression to split on capitalized letters - return re.findall(r"[a-z]+|[A-Z][a-z]*", s) - - def standardize_keys_in_dict( d: Dict[str, Any], snake_to_pascal: bool = False, pascal_to_snake: bool = False ) -> Dict[str, Any]: diff --git a/pydantic_schemas/video_schema.py b/pydantic_schemas/video_schema.py index f0a26c9..285479a 100644 --- a/pydantic_schemas/video_schema.py +++ b/pydantic_schemas/video_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: video-schema.json -# timestamp: 2024-07-24T21:06:35+00:00 +# timestamp: 2024-08-29T18:53:54+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pyproject.toml b/pyproject.toml index bb6a5d6..51d8047 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,12 @@ [tool.poetry] name = "metadataschemas" -version = "0.1.5" +version = "0.1.6" description = "" authors = ["Mehmood Asghar ", "Gordon Blackadder "] readme = "README.md" packages = [ { include = "*_schema.py", from = "pydantic_schemas", to = "metadataschemas"}, - { include = "schema_base_model.py", from = "pydantic_schemas", to = "metadataschemas"}, - { include = "schema_interface.py", from = "pydantic_schemas", to = "metadataschemas"}, + { include = "metadata_manager.py", from = "pydantic_schemas", to = "metadataschemas"}, { include = "utils", from = "pydantic_schemas", to = "metadataschemas"}, ]