diff --git a/README.md b/README.md index efdf8e7..0189f2c 100644 --- a/README.md +++ b/README.md @@ -97,19 +97,21 @@ microdata_metadata.study_desc.title_statement.idno = "project_idno" ## Updating Schemas -First create a branch from the main branch. +First create a branch from the main branch. Branch names should follow the pattern 'schema/\/\'. Then make the change you want to the json schema in the schemas folder. -Then in pyproject.toml update the version number, changing either the major, minor or patch number as appropriate. +Then in pyproject.toml update the version number, changing either the major, minor or patch number as appropriate given the conventions below. + +After, update the version number of the **specific schema you updated** in the json_to_python_config.yaml file to match the version number in pyproject.toml. Next update the pydantic schemas so that they match the latest json schemas by running - `python pydantic_schemas/generators/generate_pydantic_schemas.py` + python pydantic_schemas/generators/generate_pydantic_schemas.py Finally update the Excel sheets by running - `python -m pydantic_schemas.generators.generate_excel_files` + python -m pydantic_schemas.generators.generate_excel_files ## Versioning conventions for schemas diff --git a/excel_sheets/Document_metadata.xlsx b/excel_sheets/Document_metadata.xlsx index 2020cc3..fc3b5bb 100644 Binary files a/excel_sheets/Document_metadata.xlsx and b/excel_sheets/Document_metadata.xlsx differ diff --git a/excel_sheets/Geospatial_metadata.xlsx b/excel_sheets/Geospatial_metadata.xlsx index 77d68d8..352ffe4 100644 Binary files a/excel_sheets/Geospatial_metadata.xlsx and b/excel_sheets/Geospatial_metadata.xlsx differ diff --git a/excel_sheets/Image_metadata.xlsx b/excel_sheets/Image_metadata.xlsx index 99809cc..a9c67e3 100644 Binary files a/excel_sheets/Image_metadata.xlsx and b/excel_sheets/Image_metadata.xlsx differ diff --git a/excel_sheets/Indicator_metadata.xlsx b/excel_sheets/Indicator_metadata.xlsx index 1e8011d..db906d5 100644 Binary files a/excel_sheets/Indicator_metadata.xlsx and b/excel_sheets/Indicator_metadata.xlsx differ diff --git a/excel_sheets/Indicators_db_metadata.xlsx b/excel_sheets/Indicators_db_metadata.xlsx index a563383..b0c9892 100644 Binary files a/excel_sheets/Indicators_db_metadata.xlsx and b/excel_sheets/Indicators_db_metadata.xlsx differ diff --git a/excel_sheets/Microdata_metadata.xlsx b/excel_sheets/Microdata_metadata.xlsx index b6d47d5..d562faa 100644 Binary files a/excel_sheets/Microdata_metadata.xlsx and b/excel_sheets/Microdata_metadata.xlsx differ diff --git a/excel_sheets/Resource_metadata.xlsx b/excel_sheets/Resource_metadata.xlsx index 1524740..40ccad4 100644 Binary files a/excel_sheets/Resource_metadata.xlsx and b/excel_sheets/Resource_metadata.xlsx differ diff --git a/excel_sheets/Script_metadata.xlsx b/excel_sheets/Script_metadata.xlsx index 2b725d5..ae31c90 100644 Binary files a/excel_sheets/Script_metadata.xlsx and b/excel_sheets/Script_metadata.xlsx differ diff --git a/excel_sheets/Table_metadata.xlsx b/excel_sheets/Table_metadata.xlsx index 16cc3fc..3fbb790 100644 Binary files a/excel_sheets/Table_metadata.xlsx and b/excel_sheets/Table_metadata.xlsx differ diff --git a/excel_sheets/Video_metadata.xlsx b/excel_sheets/Video_metadata.xlsx index 7a541a6..d24cf00 100644 Binary files a/excel_sheets/Video_metadata.xlsx and b/excel_sheets/Video_metadata.xlsx differ diff --git a/json_to_python_config.yaml b/json_to_python_config.yaml new file mode 100644 index 0000000..b1740b8 --- /dev/null +++ b/json_to_python_config.yaml @@ -0,0 +1,59 @@ +document: + version: 0.1.0 + json_file: document-schema.json + python_file: document_schema.py + model_name: ScriptSchemaDraft + +geospatial: + version: 0.1.0 + json_file: geospatial-schema.json + python_file: geospatial_schema.py + model_name: GeospatialSchema + +image: + version: 0.1.0 + json_file: image-schema.json + python_file: image_schema.py + model_name: ImageDataTypeSchema + +microdata: + version: 0.1.0 + json_file: microdata-schema.json + python_file: microdata_schema.py + model_name: DdiSchema + +resource: + version: 0.1.0 + json_file: resource-schema.json + python_file: resource_schema.py + model_name: Model + +script: + version: 0.1.0 + json_file: script-schema.json + python_file: script_schema.py + model_name: ResearchProjectSchemaDraft + +table: + version: 0.1.0 + json_file: table-schema.json + python_file: table_schema.py + model_name: Model + +indicators_db: + version: 0.1.0 + json_file: timeseries-db-schema.json + python_file: indicators_db_schema.py + model_name: TimeseriesDatabaseSchema + +indicator: + version: 0.1.0 + json_file: timeseries-schema.json + python_file: indicator_schema.py + model_name: TimeseriesSchema + +video: + version: 0.1.0 + json_file: video-schema.json + python_file: video_schema.py + model_name: Model \ No newline at end of file diff --git a/pydantic_schemas/document_schema.py b/pydantic_schemas/document_schema.py index e21163f..a1f5e55 100644 --- a/pydantic_schemas/document_schema.py +++ b/pydantic_schemas/document_schema.py @@ -6,7 +6,7 @@ from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import Extra, Field +from pydantic import ConfigDict, Field from .utils.schema_base_model import SchemaBaseModel @@ -23,9 +23,9 @@ class MetadataInformation(SchemaBaseModel): Document description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field(None, description="Document title", title="Document title") idno: Optional[str] = Field(None, title="Unique ID number for the document") producers: Optional[List[Producer]] = Field(None, description="List of producers", title="Producers") @@ -299,9 +299,9 @@ class DocumentDescription(SchemaBaseModel): Document Description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title_statement: TitleStatement = Field(..., description="Study title") authors: Optional[List[Author]] = Field(None, description="Authors", title="Authors") editors: Optional[List[Editor]] = Field(None, description="Editors", title="Editors") @@ -540,6 +540,9 @@ class ScriptSchemaDraft(SchemaBaseModel): Schema for Document data type """ + __metadata_type__ = "document" + __metadata_type_version__ = "0.1.0" + idno: Optional[str] = Field(None, description="Project unique identifier", title="Project unique identifier") metadata_information: Optional[MetadataInformation] = Field( None, description="Document description", title="Document metadata information" diff --git a/pydantic_schemas/generators/generate_excel_files.py b/pydantic_schemas/generators/generate_excel_files.py index a43725a..59fa9a3 100644 --- a/pydantic_schemas/generators/generate_excel_files.py +++ b/pydantic_schemas/generators/generate_excel_files.py @@ -30,8 +30,8 @@ def compare_excel_files(file1, file2): for row in ws1.iter_rows(): for cell in row: cell_address = cell.coordinate - if sheet_name == "metadata" and cell_address == "C1": - continue # Skip comparison for cell C1 in 'metadata' sheet which only contains the versioning number + # if sheet_name == "metadata" and cell_address == "C1": + # continue # Skip comparison for cell C1 in 'metadata' sheet which only contains the versioning number differences = [] if ws1[cell_address].value != ws2[cell_address].value: diff --git a/pydantic_schemas/generators/generate_pydantic_schemas.py b/pydantic_schemas/generators/generate_pydantic_schemas.py index a04344c..4e9fdc5 100644 --- a/pydantic_schemas/generators/generate_pydantic_schemas.py +++ b/pydantic_schemas/generators/generate_pydantic_schemas.py @@ -1,32 +1,33 @@ +# import importlib.metadata import os +import re from subprocess import run +import yaml + SCHEMA_DIR = "schemas" OUTPUT_DIR = os.path.join("pydantic_schemas") PYTHON_VERSION = "3.11" BASE_CLASS = ".utils.schema_base_model.SchemaBaseModel" - -INPUTS_TO_OUTPUTS = { - "document-schema.json": "document_schema.py", - "geospatial-schema.json": "geospatial_schema.py", - "image-schema.json": "image_schema.py", - "microdata-schema.json": "microdata_schema.py", - "resource-schema.json": "resource_schema.py", - "script-schema.json": "script_schema.py", - "table-schema.json": "table_schema.py", - "timeseries-db-schema.json": "indicators_db_schema.py", - "timeseries-schema.json": "indicator_schema.py", - "video-schema.json": "video_schema.py", -} +# __version__ = importlib.metadata.version("metadataschemas") if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) -for input_file, output_file in INPUTS_TO_OUTPUTS.items(): - print(f"Generating pydantic schema for {input_file}") - input_path = os.path.join(SCHEMA_DIR, input_file) - output_path = os.path.join(OUTPUT_DIR, output_file).replace("-", "_") +with open("json_to_python_config.yaml", "r") as file: + data = yaml.safe_load(file) + +# for json_file, (python_file, metadata_type, schema_class_name) in INPUTS_TO_OUTPUTS.items(): +for section, details in data.items(): + json_file = details["json_file"] + python_file = details["python_file"] + model_name = details["model_name"] + version = details["version"] + + print(f"Generating pydantic schema for {json_file}") + input_path = os.path.join(SCHEMA_DIR, json_file) + output_path = os.path.join(OUTPUT_DIR, python_file).replace("-", "_") run( [ "datamodel-codegen", @@ -44,7 +45,21 @@ "--disable-timestamp", "--base-class", BASE_CLASS, + "--output-model-type", + "pydantic_v2.BaseModel", "--output", output_path, ] ) + + with open(output_path, "r") as file: + content = file.read() + + updated_content = re.sub( + f'class {model_name}\(SchemaBaseModel\):\n( """\n.*\n """)', # + lambda match: f"""class {model_name}(SchemaBaseModel):\n{match.group(1)}\n __metadata_type__ = "{section}"\n __metadata_type_version__ = "{version}" """, + content, + ) + + with open(output_path, "w") as file: + file.write(updated_content) diff --git a/pydantic_schemas/geospatial_schema.py b/pydantic_schemas/geospatial_schema.py index d04b1c7..a9da30c 100644 --- a/pydantic_schemas/geospatial_schema.py +++ b/pydantic_schemas/geospatial_schema.py @@ -6,7 +6,7 @@ from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import Extra, Field, confloat +from pydantic import ConfigDict, Field, RootModel, confloat from .utils.schema_base_model import SchemaBaseModel @@ -23,9 +23,9 @@ class MetadataInformation(SchemaBaseModel): Document description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field(None, description="Document title", title="Document title") idno: Optional[str] = Field(None, title="Unique ID number for the document") producers: Optional[List[Producer]] = Field(None, description="List of producers", title="Producers") @@ -1478,6 +1478,9 @@ class GeospatialSchema(SchemaBaseModel): Geospatial draft schema """ + __metadata_type__ = "geospatial" + __metadata_type_version__ = "0.1.0" + idno: Optional[str] = Field(None, description="Project unique identifier", title="Project unique identifier") metadata_information: Optional[MetadataInformation] = Field( None, description="Document description", title="Document metadata information" @@ -1512,4 +1515,4 @@ class Locale(SchemaBaseModel): ) -OperationMetadata.update_forward_refs() +OperationMetadata.model_rebuild() diff --git a/pydantic_schemas/image_schema.py b/pydantic_schemas/image_schema.py index a71d648..c7ddb01 100644 --- a/pydantic_schemas/image_schema.py +++ b/pydantic_schemas/image_schema.py @@ -3,11 +3,10 @@ from __future__ import annotations -from datetime import datetime from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import AnyUrl, Extra, Field, confloat +from pydantic import AnyUrl, AwareDatetime, ConfigDict, Field, confloat from .utils.schema_base_model import SchemaBaseModel @@ -33,9 +32,9 @@ class MetadataInformation(SchemaBaseModel): Document description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field(None, description="Document title", title="Document title") idno: Optional[str] = Field(None, title="Unique ID number for the document") producers: Optional[List[Producer]] = Field(None, description="List of producers", title="Producers") @@ -93,18 +92,18 @@ class MediaFragment(SchemaBaseModel): Object defining this fragement of a media asset - if ommitted = the whole asset """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) uri: AnyUrl delimitertype: Optional[Delimitertype] = None description: Optional[str] = None class ArtworkOrObject(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field( None, description="A reference for the artwork or object in the image.", @@ -149,7 +148,7 @@ class Config: ), title="Style Period {Artwork or Object detail}", ) - dateCreated: Optional[datetime] = Field( + dateCreated: Optional[AwareDatetime] = Field( None, description=( "Designates the date and optionally the time the artwork or object in the image was created. This relates" @@ -218,9 +217,9 @@ class Config: class CreatorContactInfo(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) country: Optional[str] = Field( None, description="The contact information country part.", title="Country {contact info detail}" ) @@ -263,9 +262,9 @@ class Config: class CvTerm(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) cvId: Optional[AnyUrl] = Field( None, description="The globally unique identifier of the Controlled Vocabulary the term is from.", @@ -289,9 +288,9 @@ class Config: class Device(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) manufacturer: Optional[str] = Field(None, description="Name of the manufacturer of the device") modelName: Optional[str] = Field(None, description="Name of the device model") serialNumber: Optional[str] = Field(None, description="Serial number, assigned by manufacturer") @@ -302,9 +301,9 @@ class Config: class EmbdEncRightsExpr(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) encRightsExpr: str = Field( ..., description=( @@ -325,9 +324,9 @@ class Config: class Entity(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) name: Optional[str] = Field(None, description="Full name of the entity/concept", title="Name") identifiers: Optional[List[AnyUrl]] = Field( None, description="Globally unique identifier of the entity/concept", title="Identifier" @@ -335,9 +334,9 @@ class Config: class EntityWRole(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) name: Optional[str] = Field(None, description="Full name of the entity/concept", title="Name") role: Optional[List[AnyUrl]] = Field( None, description="Identifier of the role the entity has in the context of the metadata property", title="Role" @@ -348,9 +347,9 @@ class Config: class EpisodeSeason(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) name: Optional[str] = Field(None, description="Name of the episode or season of a series", title="Name") identifier: Optional[AnyUrl] = Field( None, description="Identifier of the episode or season of a series", title="Identifier" @@ -359,17 +358,17 @@ class Config: class FrameSize(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) heightPixels: Optional[int] = Field(None, description="Height of the video frame in pixels", title="Height") widthPixels: Optional[int] = Field(None, description="Width of the video frame in pixels", title="Width") class LinkedEncRightsExpr(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) linkedRightsExpr: AnyUrl = Field( ..., description="Link to a rights expression using a rights expression language.", @@ -388,9 +387,9 @@ class Config: class Location(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) name: Optional[str] = Field(None, description="Full name of the location", title="Name") identifiers: Optional[List[AnyUrl]] = Field( None, description="Globally unique identifier of the location", title="Identifier" @@ -423,9 +422,9 @@ class Config: class PersonWDetails(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) name: Optional[str] = Field(None, description="Name of the person", title="Name") description: Optional[str] = Field(None, description="A textual description of the person", title="Description") identifiers: Optional[List[AnyUrl]] = Field( @@ -437,9 +436,9 @@ class Config: class Product(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) description: Optional[str] = Field( None, description="A textual description of the product.", title="Description {Product detail}" ) @@ -452,9 +451,9 @@ class Config: class ProductWGtin(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) name: Optional[str] = Field(None, description="Name of the product.", title="Name") gtin: str = Field( ..., @@ -465,10 +464,10 @@ class Config: class PublicationEvent(SchemaBaseModel): - class Config: - extra = Extra.forbid - - date: datetime = Field( + model_config = ConfigDict( + extra="forbid", + ) + date: AwareDatetime = Field( ..., description="Date and optionally the time of publishing the video", title="Publication Date" ) name: Optional[str] = Field( @@ -480,17 +479,17 @@ class Config: class QualifiedLink(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) link: Optional[AnyUrl] = Field(None, description="URL of the link", title="Link") linkQualifier: Optional[AnyUrl] = Field(None, description="Term qualifying the use of the link", title="Qualifier") class Rating(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) ratingSourceLink: AnyUrl = Field( ..., description=( @@ -527,9 +526,9 @@ class MeasureType(Enum): class RegionWDelimiter(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) regionAreaX: Optional[float] = Field( None, description="Horizontal axis value of the upper left corner of the rectange", @@ -553,9 +552,9 @@ class Config: class RegistryEntry(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) role: Optional[AnyUrl] = Field( None, description="An identifier of the reason and/or purpose for this Registry Entry.", title="Role" ) @@ -570,21 +569,21 @@ class Config: class Series(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) name: Optional[str] = Field(None, description="Name of the series", title="Series name") identifier: Optional[AnyUrl] = Field(None, description="Identifier for the series", title="Series identifier") class TemporalCoverage(SchemaBaseModel): - class Config: - extra = Extra.forbid - - tempCoverageFrom: Optional[datetime] = Field( + model_config = ConfigDict( + extra="forbid", + ) + tempCoverageFrom: Optional[AwareDatetime] = Field( None, description="Optionally truncated date when the temporal coverage starts", title="From Date" ) - tempCoverageTo: Optional[datetime] = Field( + tempCoverageTo: Optional[AwareDatetime] = Field( None, description="Optionally truncated date when the temporal coverage ends", title="To Date" ) @@ -602,9 +601,9 @@ class VideoTime(SchemaBaseModel): Frame of the video used for this still image """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) timeValue: str = Field( ..., description=( @@ -625,9 +624,9 @@ class XmpSequence(SchemaBaseModel): Reflects the structure of an rdf:Seq in XMP/XML """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) Ordered: Optional[List[Dict[str, Any]]] = None @@ -766,9 +765,9 @@ class PhotoVideoMetadataIPTC(SchemaBaseModel): Container for IPTC photo/video metadata """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field( None, description=( @@ -798,7 +797,7 @@ class Config: ), title="Digital Image GUID", ) - dateCreated: Optional[datetime] = Field( + dateCreated: Optional[AwareDatetime] = Field( None, description=( "Designates the date and optionally the time the content of the image was created rather than the date of" @@ -1078,16 +1077,16 @@ class IptcPmdSchema(SchemaBaseModel): Overall structure of photo metadata of a single media asset - sets of metadata for the whole asset and parts of the asset -- the properties comply with the IPTC Photo Metadata Standard 2017.1(IPTC/MS/2017-07-06) """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) photoVideoMetadataIPTC: PhotoVideoMetadataIPTC = Field(..., description="Container for IPTC photo/video metadata") class LinkedImage(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) link: AnyUrl = Field(..., description="Link URL locating the image resource") mediaType: Optional[str] = Field(None, description="IANA Media (MIME) Type") widthPixels: Optional[int] = Field(None, description="Width of the image in pixels") @@ -1113,6 +1112,9 @@ class ImageDataTypeSchema(SchemaBaseModel): Uses IPTC JSON schema. See for more details - http://www.iptc.org/std/photometadata/specification/IPTC-PhotoMetadata. """ + __metadata_type__ = "image" + __metadata_type_version__ = "0.1.0" + repositoryid: Optional[str] = Field( "central", description="Abbreviation for the collection that owns the document", diff --git a/pydantic_schemas/indicator_schema.py b/pydantic_schemas/indicator_schema.py index 8a0b79f..7529e62 100644 --- a/pydantic_schemas/indicator_schema.py +++ b/pydantic_schemas/indicator_schema.py @@ -6,7 +6,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from pydantic import Extra, Field +from pydantic import ConfigDict, Field from .utils.schema_base_model import SchemaBaseModel @@ -38,9 +38,9 @@ class MetadataInformation(SchemaBaseModel): Information on the production of the metadata """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field(None, description="Document title", title="Document title") idno: Optional[str] = Field(None, title="Unique ID number for the document") producers: Optional[List[Producer]] = Field(None, description="List of producers", title="Producers") @@ -718,6 +718,9 @@ class TimeseriesSchema(SchemaBaseModel): Schema for timeseries data type """ + __metadata_type__ = "indicator" + __metadata_type_version__ = "0.1.0" + idno: Optional[str] = Field(None, description="Project unique identifier", title="Project unique identifier") metadata_information: Optional[MetadataInformation] = Field( None, description="Information on the production of the metadata", title="Metadata creation" diff --git a/pydantic_schemas/indicators_db_schema.py b/pydantic_schemas/indicators_db_schema.py index c796872..83afcd6 100644 --- a/pydantic_schemas/indicators_db_schema.py +++ b/pydantic_schemas/indicators_db_schema.py @@ -6,7 +6,7 @@ from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import Extra, Field +from pydantic import ConfigDict, Field from .utils.schema_base_model import SchemaBaseModel @@ -32,9 +32,9 @@ class MetadataInformation(SchemaBaseModel): Document description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field(None, description="Document title", title="Document title") idno: Optional[str] = Field(None, title="Unique ID number for the document") producers: Optional[List[Producer]] = Field(None, description="List of producers", title="Producers") @@ -247,9 +247,9 @@ class DatabaseDescription(SchemaBaseModel): Database Description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title_statement: TitleStatement = Field(..., description="Study title") authoring_entity: Optional[List[AuthoringEntityItem]] = Field( None, @@ -388,6 +388,9 @@ class TimeseriesDatabaseSchema(SchemaBaseModel): Schema for timeseries database """ + __metadata_type__ = "indicators_db" + __metadata_type_version__ = "0.1.0" + published: Optional[int] = Field(0, description="0=draft, 1=published", title="Status") overwrite: Optional[Overwrite] = Field("no", description="Overwrite database if already exists?") metadata_information: Optional[MetadataInformation] = Field( diff --git a/pydantic_schemas/metadata_manager.py b/pydantic_schemas/metadata_manager.py index e96afa2..591508d 100644 --- a/pydantic_schemas/metadata_manager.py +++ b/pydantic_schemas/metadata_manager.py @@ -1,4 +1,5 @@ import importlib.metadata +import warnings from copy import copy from typing import Dict, List, Optional, Type, Union @@ -18,8 +19,9 @@ video_schema, ) from .utils.excel_to_pydantic import excel_doc_to_pydantic, excel_single_sheet_to_pydantic -from .utils.pydantic_to_excel import write_across_many_sheets, write_to_single_sheet +from .utils.pydantic_to_excel import parse_version, write_across_many_sheets, write_to_single_sheet from .utils.quick_start import make_skeleton +from .utils.schema_base_model import SchemaBaseModel from .utils.utils import merge_dicts, standardize_keys_in_dict __version__ = importlib.metadata.version("metadataschemas") @@ -158,9 +160,9 @@ def create_metadata_outline( skeleton_object = make_skeleton(schema, debug=debug) return skeleton_object - def _get_name_version_schema_writer(self, metadata_name_or_class): + def _get_name_schema_writer(self, metadata_name_or_class): """ - Determines the metadata name, version, schema, and writer based on the provided metadata name or class. + Determines the metadata name, schema, and writer based on the provided metadata name or class. Args: metadata_name_or_class (str or class): The metadata name as a string or the metadata class. @@ -168,13 +170,12 @@ def _get_name_version_schema_writer(self, metadata_name_or_class): Returns: tuple: A tuple containing: - metadata_name (str): The standardized metadata name. - - version (str): The version information of the metadata. - schema (type(BaseModel)): The schema associated with the metadata. - writer (function): The writer function for the metadata. If `metadata_name_or_class` is a string or is one of the standard metadata types (document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video), - it retrieves the corresponding metadata name, schema, version, and writer from the internal + it retrieves the corresponding metadata name, schema, and writer from the internal mappings. Otherwise, it assumes this is a template and retrieves the title from the class, and uses a default single page writer function. """ @@ -190,14 +191,12 @@ def _get_name_version_schema_writer(self, metadata_name_or_class): for metadata_name, schema in self._TYPE_TO_SCHEMA.items(): if schema is metadata_name_or_class or schema is type(metadata_name_or_class): break - version = f"{metadata_name} type metadata version {__version__}" writer = self._TYPE_TO_WRITER[metadata_name] else: writer = write_to_single_sheet metadata_name = metadata_name_or_class.model_json_schema()["title"] - version = f"Template: {metadata_name}" schema = metadata_name_or_class - return metadata_name, version, schema, writer + return metadata_name, schema, writer def write_metadata_outline_to_excel( self, @@ -235,10 +234,10 @@ def write_metadata_outline_to_excel( and type(metadata_name_or_class) not in self._TYPE_TO_SCHEMA.values() ): metadata_type = self.standardize_metadata_name(metadata_type) - _, _, _, writer = self._get_name_version_schema_writer(metadata_type) - metadata_name, version, schema, _ = self._get_name_version_schema_writer(metadata_name_or_class) + _, _, writer = self._get_name_schema_writer(metadata_type) + metadata_name, schema, _ = self._get_name_schema_writer(metadata_name_or_class) else: - metadata_name, version, schema, writer = self._get_name_version_schema_writer(metadata_name_or_class) + metadata_name, schema, writer = self._get_name_schema_writer(metadata_name_or_class) skeleton_object = self.create_metadata_outline(schema, debug=False) if filename is None: @@ -248,7 +247,7 @@ def write_metadata_outline_to_excel( if not str(filename).endswith(".xlsx"): filename += ".xlsx" - writer(filename, skeleton_object, version, title) + writer(filename, skeleton_object, title) return filename def save_metadata_to_excel( @@ -283,13 +282,10 @@ def save_metadata_to_excel( and type(object) not in self._TYPE_TO_SCHEMA.values() ): metadata_type = self.standardize_metadata_name(metadata_type) - _, _, _, writer = self._get_name_version_schema_writer(metadata_type) - metadata_name, version, schema, _ = self._get_name_version_schema_writer(type(object)) + _, _, writer = self._get_name_schema_writer(metadata_type) + metadata_name, schema, _ = self._get_name_schema_writer(type(object)) else: - metadata_name, version, schema, writer = self._get_name_version_schema_writer(type(object)) - # metadata_name, version, schema, writer = self._get_name_version_schema_writer( - # type(object) - # ) # metadata_name_or_class) + metadata_name, schema, writer = self._get_name_schema_writer(type(object)) skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=False) if filename is None: @@ -306,11 +302,11 @@ def save_metadata_to_excel( ) combined_dict = standardize_keys_in_dict(combined_dict) new_ob = schema.model_validate(combined_dict) - writer(filename, new_ob, version, title, verbose=verbose) + writer(filename, new_ob, title, verbose=verbose) return filename @staticmethod - def _get_metadata_name_from_excel_file(filename: str) -> str: + def get_metadata_type_info_from_excel_file(filename: str) -> str: error_message = "Improperly formatted Excel file for metadata" workbook = load_workbook(filename) # Select the 'metadata' sheet @@ -329,33 +325,21 @@ def _get_metadata_name_from_excel_file(filename: str) -> str: if not type_info or not isinstance(type_info, str): raise ValueError(f"Cell C1 is empty or not a string. {error_message}") - cell_values = type_info.split(" ") - - if cell_values[0] == "Template:": - return " ".join(cell_values[1:]) - - if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata": - raise ValueError(f"Cell C1 is improperly formatted. {error_message}") - - return cell_values[0] + return parse_version(type_info) def read_metadata_from_excel( self, filename: str, - metadata_class: Optional[Type[BaseModel]] = None, - metadata_type: Optional[str] = None, + metadata_class: Optional[Type[SchemaBaseModel]] = None, verbose: bool = False, ) -> BaseModel: """ Read in metadata from an appropriately formatted Excel file as a pydantic object. - If using standard metadata types (document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. + If using standard metadata types (document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class should be provided to avoid compatability issues. Args: filename (str): The path to the Excel file. metadata_class (Optional type of BaseModel): A pydantic class type correspondong to the type used to write the Excel file - metadata_type (Optional[str]): The name of the metadata type, such as 'geospatial', 'document', etc. Used if - the metadata_name_or_class is an instance of a template. The name is used to determine the number of - sheets in the Excel file. verbose (bool): If True, print debug information on the file reading. @@ -370,27 +354,59 @@ def read_metadata_from_excel( >>> manager = MetadataManager() >>> document_metadata = manager.read_metadata_from_excel("document_metadata.xlsx") """ - metadata_name = self._get_metadata_name_from_excel_file(filename) - try: - metadata_name = self.standardize_metadata_name(metadata_name) - schema = self._TYPE_TO_SCHEMA[metadata_name] - reader = self._TYPE_TO_READER[metadata_name] - except ValueError as e: - if metadata_class is None: + metadata_type_info = self.get_metadata_type_info_from_excel_file(filename) + metadata_name = metadata_type_info["metadata_type"] + metadata_version = metadata_type_info["metadata_type_version"] + template_uid = metadata_type_info.get("template_uid", None) + + if metadata_class is not None: + if metadata_class.__metadata_type__ != metadata_name: + warnings.warn( + f"metadata_class metadata type {metadata_class.__metadata_type__} does not match the Excel file metadata type {metadata_name}" + "this may cause compatability issues" + ) + elif metadata_class.__metadata_type_version__ != metadata_version: + warnings.warn( + f"metadata_class metadata version {metadata_class.__metadata_type_version__} does not match the Excel file metadata version {metadata_version}" + "this may cause issues" + ) + elif metadata_class.__template_uid__ is not None and template_uid is None: + warnings.warn( + f"metadata_class template_uid {metadata_class.__template_uid__} does not match the Excel file which is not from a template" + "this may cause compatability issues" + ) + elif metadata_class.__template_uid__ is not None and metadata_class.__template_uid__ != template_uid: + warnings.warn( + f"metadata_class template_uid {metadata_class.__template_uid__} does not match the Excel file template_uid {metadata_type_info.get('template_uid', None)}" + "this may cause compatability issues" + ) + elif metadata_class.__template_uid__ is None and template_uid is not None: + warnings.warn( + f"metadata_class is not a template type but the Excel file is from a template" + "this may cause compatability issues" + ) + metadata_name = metadata_class.__metadata_type__ + else: + if metadata_type_info.get("template_uid", None) is not None: raise ValueError( - f"'{metadata_name}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())} or try passing in the metadata_class" - ) from e - schema = metadata_class - if metadata_type is not None: - metadata_type = self.standardize_metadata_name(metadata_type) - reader = self._TYPE_TO_READER[metadata_type] + "metadata_class must be provided when reading in a template Excel file, but none was provided" + ) else: - reader = excel_single_sheet_to_pydantic - if verbose: - print("reader is falling back to excel_single_sheet_to_pydantic") - read_object = reader(filename, schema, verbose=verbose) + metadata_class = self.metadata_class_from_name(metadata_name) + + try: + metadata_name = self.standardize_metadata_name(metadata_class.__metadata_type__) + reader = self._TYPE_TO_READER[metadata_name] + except ValueError: + reader = excel_single_sheet_to_pydantic + warnings.warn( + f"metadata_class metadata type {metadata_class.__metadata_type__} is not a standard type" + "falling back to excel_single_sheet_to_pydantic" + ) + + read_object = reader(filename, metadata_class, verbose=verbose) - skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=verbose) + skeleton_object = self.create_metadata_outline(metadata_name_or_class=metadata_class, debug=verbose) read_object_dict = read_object.model_dump( mode="json", exclude_none=False, exclude_unset=True, exclude_defaults=True @@ -404,7 +420,7 @@ def read_metadata_from_excel( skeleton_mode=True, ) combined_dict = standardize_keys_in_dict(combined_dict) - new_ob = schema.model_validate(combined_dict) + new_ob = metadata_class.model_validate(combined_dict) return new_ob def _raise_if_unsupported_metadata_name(self, metadata_name: str): diff --git a/pydantic_schemas/microdata_schema.py b/pydantic_schemas/microdata_schema.py index fdf0c0a..257ef92 100644 --- a/pydantic_schemas/microdata_schema.py +++ b/pydantic_schemas/microdata_schema.py @@ -6,7 +6,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from pydantic import Extra, Field, constr +from pydantic import ConfigDict, Field, constr from .utils.schema_base_model import SchemaBaseModel @@ -279,9 +279,9 @@ class DocDesc(SchemaBaseModel): Document Description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field(None, description="Document title", title="Document title") idno: Optional[str] = Field(None, title="Unique ID number for the document") producers: Optional[List[Producer]] = Field(None, description="List of producers", title="Producers") @@ -1263,9 +1263,9 @@ class StudyDesc(SchemaBaseModel): Study Description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title_statement: TitleStatement = Field(..., description="Study title") authoring_entity: Optional[List[AuthoringEntityItem]] = Field( None, @@ -1376,6 +1376,9 @@ class DdiSchema(SchemaBaseModel): Schema for Microdata data type based on DDI 2.5 """ + __metadata_type__ = "microdata" + __metadata_type_version__ = "0.1.0" + doc_desc: Optional[DocDesc] = None study_desc: Optional[StudyDesc] = None data_files: Optional[List[DatafileSchema]] = Field(None, description="Data files") diff --git a/pydantic_schemas/resource_schema.py b/pydantic_schemas/resource_schema.py index beaf096..8ade9d5 100644 --- a/pydantic_schemas/resource_schema.py +++ b/pydantic_schemas/resource_schema.py @@ -15,6 +15,9 @@ class Model(SchemaBaseModel): External resource schema """ + __metadata_type__ = "resource" + __metadata_type_version__ = "0.1.0" + dctype: Optional[str] = Field( "doc/oth", description=( diff --git a/pydantic_schemas/script_schema.py b/pydantic_schemas/script_schema.py index 74b592a..763b87d 100644 --- a/pydantic_schemas/script_schema.py +++ b/pydantic_schemas/script_schema.py @@ -6,7 +6,7 @@ from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import Extra, Field +from pydantic import ConfigDict, Field from .utils.schema_base_model import SchemaBaseModel @@ -32,9 +32,9 @@ class DocDesc(SchemaBaseModel): Document description; the Document is the file containing the structured metadata """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field(None, description="Document title", title="Document title") idno: Optional[str] = Field(None, title="Unique ID number for the document") producers: Optional[List[Producer]] = Field( @@ -364,9 +364,9 @@ class Method(SchemaBaseModel): class SoftwareItem(SchemaBaseModel): - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) name: Optional[str] = Field(None, title="Name") version: Optional[str] = Field(None, title="Version") library: Optional[List[str]] = Field( @@ -626,6 +626,9 @@ class ResearchProjectSchemaDraft(SchemaBaseModel): Schema for documenting research projects and data analysis scripts """ + __metadata_type__ = "script" + __metadata_type_version__ = "0.1.0" + repositoryid: Optional[str] = Field( None, description="Abbreviation for the collection that owns the research project", diff --git a/pydantic_schemas/table_schema.py b/pydantic_schemas/table_schema.py index 172b401..96359a1 100644 --- a/pydantic_schemas/table_schema.py +++ b/pydantic_schemas/table_schema.py @@ -6,7 +6,7 @@ from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import Extra, Field +from pydantic import ConfigDict, Field, RootModel from .utils.schema_base_model import SchemaBaseModel @@ -32,9 +32,9 @@ class MetadataInformation(SchemaBaseModel): Document description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) idno: Optional[str] = Field(None, title="Unique ID number for the document") title: Optional[str] = Field(None, title="Document title") producers: Optional[List[Producer]] = Field(None, description="List of producers", title="Producers") @@ -362,9 +362,9 @@ class TableDescription(SchemaBaseModel): Table Description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title_statement: Optional[TitleStatement] = Field(None, description="Title statement") identifiers: Optional[List[Identifier]] = Field(None, description="Other identifiers", title="Other identifiers") authoring_entity: Optional[List[AuthoringEntityItem]] = Field( @@ -454,6 +454,9 @@ class Model(SchemaBaseModel): Draft Schema for Table data type """ + __metadata_type__ = "table" + __metadata_type_version__ = "0.1.0" + repositoryid: Optional[str] = Field( None, description="Abbreviation for the collection that owns the document", diff --git a/pydantic_schemas/tests/test_generators.py b/pydantic_schemas/tests/test_generators.py new file mode 100644 index 0000000..cfae612 --- /dev/null +++ b/pydantic_schemas/tests/test_generators.py @@ -0,0 +1,71 @@ +import importlib.metadata +import importlib.util +import os + +import yaml + +from pydantic_schemas.metadata_manager import MetadataManager + + +def test_yaml_file(): + # Load the YAML file + with open("json_to_python_config.yaml", "r") as file: + data = yaml.safe_load(file) + + # Get the version from importlib.metadata + __version__ = importlib.metadata.version("metadataschemas") + + for section, details in data.items(): + # Check that each value is non-null + assert details["version"] is not None, f"Version is null in section {section}" + assert details["json_file"] is not None, f"JSON file is null in section {section}" + assert details["python_file"] is not None, f"Python file is null in section {section}" + assert details["model_name"] is not None, f"Model name is null in section {section}" + + # Check that the JSON and Python files exist + json_file = os.path.join("schemas", details["json_file"]) + assert os.path.exists(json_file), f"JSON file {json_file} does not exist in section {section}" + python_file = os.path.join("pydantic_schemas", details["python_file"]) + assert os.path.exists(python_file), f"Python file {python_file} does not exist in section {section}" + + # Check that the version is equal to or less than the version from importlib.metadata + assert ( + details["version"] <= __version__ + ), f"Version {details['version']} in section {section} is greater than {__version__}" + + # Check the version is a string fomatted as digits.digits.digits + assert isinstance(details["version"], str), f"Version {details['version']} in section {section} is not a string" + assert ( + details["version"].count(".") == 2 + ), f"Version {details['version']} in section {section} is not formatted as digits.digits.digits" + assert all( + [x.isdigit() for x in details["version"].split(".")] + ), f"Version {details['version']} in section {section} is not formatted as digits.digits.digits" + + +def test_every_schema_has_version(): + mm = MetadataManager() + for v in mm.metadata_type_names: + m = mm.create_metadata_outline(mm.metadata_class_from_name(v)) + assert m.__metadata_type__ is not None, f"__metadata_type__ is None for {v}" + assert m.__metadata_type_version__ is not None, f"__metadata_type_version__ is None for {v}" + assert hasattr(m, "__template_name__"), f"__template_name__ not in {v}" + assert hasattr(m, "__template_uid__"), f"__template_uid__ not in {v}" + assert m.__template_name__ is None, f"__template_name__ is not None for {v} = {m.__template_name__}" + assert m.__template_uid__ is None, f"__template_uid__ is not None for {v} = {m.__template_uid__}" + + m = mm.create_metadata_outline(v) + assert m.__metadata_type__ is not None, f"__metadata_type__ is None for {v}" + assert m.__metadata_type_version__ is not None, f"__metadata_type_version__ is None for {v}" + assert hasattr(m, "__template_name__"), f"__template_name__ not in {v}" + assert hasattr(m, "__template_uid__"), f"__template_uid__ not in {v}" + assert m.__template_name__ is None, f"__template_name__ is not None for {v} = {m.__template_name__}" + assert m.__template_uid__ is None, f"__template_uid__ is not None for {v} = {m.__template_uid__}" + + m = mm._TYPE_TO_SCHEMA[v] + assert m.__metadata_type__ is not None, f"__metadata_type__ is None for {v}" + assert m.__metadata_type_version__ is not None, f"__metadata_type_version__ is None for {v}" + assert hasattr(m, "__template_name__"), f"__template_name__ not in {v}" + assert hasattr(m, "__template_uid__"), f"__template_uid__ not in {v}" + assert m.__template_name__ is None, f"__template_name__ is not None for {v} = {m.__template_name__}" + assert m.__template_uid__ is None, f"__template_uid__ is not None for {v} = {m.__template_uid__}" diff --git a/pydantic_schemas/tests/test_metadata_manager.py b/pydantic_schemas/tests/test_metadata_manager.py index 5b8a735..13cf2d2 100644 --- a/pydantic_schemas/tests/test_metadata_manager.py +++ b/pydantic_schemas/tests/test_metadata_manager.py @@ -3,6 +3,7 @@ import pytest from pydantic import BaseModel, ValidationError +from utils.schema_base_model import SchemaBaseModel from utils.test_utils import assert_pydantic_models_equal, fill_in_pydantic_outline from pydantic_schemas.metadata_manager import MetadataManager @@ -122,16 +123,21 @@ class Midlevel(BaseModel): c: Optional[str] = None d: Optional[List[Simple]] - class TopLevel(BaseModel): + class TopLevel(SchemaBaseModel): e: Optional[Midlevel] f: Optional[int] + __metadata_type__ = "TopLevel" + __metadata_type_version__ = "1.0.0" mm = MetadataManager() filename1 = tmpdir.join(f"test_templates_1.xlsx") mm.write_metadata_outline_to_excel(TopLevel, filename=filename1, title="Outline Test") - assert mm._get_metadata_name_from_excel_file(filename1) == "TopLevel" + assert mm.get_metadata_type_info_from_excel_file(filename1) == { + "metadata_type": "TopLevel", + "metadata_type_version": "1.0.0", + } example = TopLevel( e=Midlevel( @@ -142,12 +148,17 @@ class TopLevel(BaseModel): ], ), f=99, + __metadata_type__="TopLevel", + __metadata_type_version__="1.0.0", ) filename2 = tmpdir.join(f"test_templates_2.xlsx") mm.save_metadata_to_excel(example, filename2) - assert mm._get_metadata_name_from_excel_file(filename2) == "TopLevel" + assert mm.get_metadata_type_info_from_excel_file(filename2) == { + "metadata_type": "TopLevel", + "metadata_type_version": "1.0.0", + } actual = mm.read_metadata_from_excel(filename2, TopLevel) assert actual == example diff --git a/pydantic_schemas/tests/test_pydantic_to_excel.py b/pydantic_schemas/tests/test_pydantic_to_excel.py index 4a24e11..0e39f52 100644 --- a/pydantic_schemas/tests/test_pydantic_to_excel.py +++ b/pydantic_schemas/tests/test_pydantic_to_excel.py @@ -5,6 +5,7 @@ import pandas as pd import pytest from pydantic import BaseModel, Field +from utils.schema_base_model import SchemaBaseModel from pydantic_schemas.document_schema import ScriptSchemaDraft from pydantic_schemas.geospatial_schema import GeospatialSchema @@ -22,8 +23,10 @@ from pydantic_schemas.utils.pydantic_to_excel import ( correct_column_widths, create_sheet, + create_version, open_or_create_workbook, - shade_30_rows_and_protect_sheet, + parse_version, + shade_80_rows_and_protect_sheet, shade_locked_cells, write_across_many_sheets, write_pydantic_to_sheet, @@ -35,12 +38,14 @@ def test_simple_schema(tmpdir, index_above=False): - class Simple(BaseModel): + class Simple(SchemaBaseModel): idno: str title: str author: str - simple_original = Simple(idno="AVal", title="BVal", author="CVal") + simple_original = Simple( + idno="AVal", title="BVal", author="CVal", __metadata_type__="simple", __metadata_type_version__="1.0" + ) filename = tmpdir.join(f"integration_test_simple_schema_.xlsx") write_to_single_sheet(filename, simple_original, "simple_original", "Simple Metadata") @@ -59,13 +64,15 @@ class Country(BaseModel): name: str initials: str - class ProductionAndCountries(BaseModel): + class ProductionAndCountries(SchemaBaseModel): production: Production countries: Country inp = ProductionAndCountries( production=Production(idno="AVal", title="BVal", author="CVal"), countries=Country(name="MyCountry", initials="MC"), + __metadata_type__="production_and_countries", + __metadata_type_version__="1.0", ) filename = tmpdir.join(f"integration_test_two_layer_simple_schema.xlsx") @@ -97,7 +104,11 @@ class SeriesDescription(BaseModel): language: Language topic: Topic - class ProductionAndCountries(BaseModel): + series_description = SeriesDescription( + language=Language(name="English", code="EN"), topic=Topic(id="topic1", name="topic1") + ) + + class ProductionAndCountries(SchemaBaseModel): production: Production countries: Country series_description: SeriesDescription @@ -105,16 +116,14 @@ class ProductionAndCountries(BaseModel): title: Optional[str] = None subtitle: Optional[str] = None - series_description = SeriesDescription( - language=Language(name="English", code="EN"), topic=Topic(id="topic1", name="topic1") - ) - inp = ProductionAndCountries( production=Production(idno="AVal", title="BVal", author="CVal"), countries=Country(name="MyCountry", initials="MC"), series_description=series_description, idno="example_idno", title="example_title", + __metadata_type__="production_and_countries", + __metadata_type_version__="1.0", ) filename = tmpdir.join(f"integration_test_multilayer_simple_schema_.xlsx") @@ -124,24 +133,28 @@ class ProductionAndCountries(BaseModel): def test_optional_missing_deprecated_new_simple(tmpdir): - class Production(BaseModel): + class Production(SchemaBaseModel): idno: Optional[str] = None title: Optional[str] = None subtitle: Optional[str] = None author: str deprecatedFeature: str + __metadata_type__ = "production" + __metadata_type_version__ = "1.0" original_production = Production(idno="", subtitle=None, author="author", deprecatedFeature="toberemoved") filename = tmpdir.join(f"integration_test_optional_missing_deprecated_new_simple_.xlsx") write_to_single_sheet(filename, original_production, "Production", "Production") - class Production(BaseModel): + class Production(SchemaBaseModel): idno: Optional[str] = None title: Optional[str] = None author: str newFeature: Optional[str] = None requiredNewFeature: str + __metadata_type__ = "production" + __metadata_type_version__ = "1.0" new_production = excel_sheet_to_pydantic(filename=filename, sheetname="metadata", model_type=Production) assert new_production.idno is None @@ -152,24 +165,29 @@ class Production(BaseModel): def test_optional_missing_deprecated_new_two_level(tmpdir): - class Production(BaseModel): + class Production(SchemaBaseModel): idno: Optional[str] = None title: Optional[str] = None subtitle: Optional[str] = None author: str deprecatedFeature: str - class Country(BaseModel): + class Country(SchemaBaseModel): name: str initials: str - class ProductionAndCountries(BaseModel): + class ProductionAndCountries(SchemaBaseModel): production: Production countries: Country example_production = Production(idno="", subtitle=None, author="author", deprecatedFeature="toberemoved") example_country = Country(name="MadeupCountry", initials="MC") - example_production_and_country = ProductionAndCountries(production=example_production, countries=example_country) + example_production_and_country = ProductionAndCountries( + production=example_production, + countries=example_country, + __metadata_type__="production_and_countries", + __metadata_type_version__="1.0", + ) filename = tmpdir.join(f"integration_test_optional_missing_deprecated_new_two_level_.xlsx") @@ -226,7 +244,7 @@ class Country(BaseModel): name: str initials: str - class ProductionAndCountries(BaseModel): + class ProductionAndCountries(SchemaBaseModel): production: Production countries: List[Country] dates: List[str] @@ -254,6 +272,8 @@ class ProductionAndCountries(BaseModel): dates=["April", "May", "June"], other=[], otherOptional=None, + __metadata_type__="production_and_countries", + __metadata_type_version__="1.0", ) filename = tmpdir.join(f"integration_test_lists_.xlsx") @@ -294,7 +314,7 @@ class Country(BaseModel): name: str initials: str - class ProductionAndCountries(BaseModel): + class ProductionAndCountries(SchemaBaseModel): production: Production countries: List[Country] dates: List[str] @@ -317,6 +337,8 @@ class ProductionAndCountries(BaseModel): other=["12"], otherOptional=None, single_val="single", + __metadata_type__="production_and_countries", + __metadata_type_version__="1.0", ) filename = tmpdir.join(f"integration_test_optional_missing_deprecated_new_two_level_.xlsx") @@ -367,14 +389,18 @@ class StudyDesc(BaseModel): None, description="Methodology and processing", title="Methodology and Processing" ) - class MicrodataSchema(BaseModel): + class MicrodataSchema(SchemaBaseModel): """ Schema for Microdata data type based on DDI 2.5 """ study_desc: Optional[StudyDesc] = None - ms = MicrodataSchema(study_desc=StudyDesc(method=Method(study_class=["a1", "b2"]))) + ms = MicrodataSchema( + study_desc=StudyDesc(method=Method(study_class=["a1", "b2"])), + __metadata_type__="microdata", + __metadata_type_version__="1.0", + ) filename = tmpdir.join(f"integration_test_union_list_.xlsx") write_across_many_sheets(filename, ms, "UnionList", "Looking at a union with a list") @@ -386,12 +412,17 @@ def test_dictionaries(tmpdir): class SubDict(BaseModel): sub_additional: Optional[Dict[str, Any]] = Field(None, description="Additional metadata at a lower level") - class WithDict(BaseModel): + class WithDict(SchemaBaseModel): additional: Optional[Dict[str, Any]] = Field(None, description="Additional metadata") optional_dict: Optional[Dict[str, Any]] = None sub: SubDict - wd = WithDict(additional={"s": "sa", "a": "va"}, sub=SubDict(sub_additional={"sub": "subval", "sub2": "subval2"})) + wd = WithDict( + additional={"s": "sa", "a": "va"}, + sub=SubDict(sub_additional={"sub": "subval", "sub2": "subval2"}), + __metadata_type__="with_dict", + __metadata_type_version__="1.0", + ) filename = tmpdir.join(f"integration_test_dictionaries_.xlsx") write_across_many_sheets(filename, wd, "WithDict", "Looking at dictionaries") @@ -453,7 +484,7 @@ class ServiceIdentification(BaseModel): None, description="Constraints associated to the service", title="Service constraints" ) - class MetaDataOfVariousHierarchies(BaseModel): + class MetaDataOfVariousHierarchies(SchemaBaseModel): citation: Optional[Citation] = None identification_info: Optional[IdentificationInfo] = None lst: Optional[List[str]] = (None,) @@ -470,6 +501,8 @@ class MetaDataOfVariousHierarchies(BaseModel): Constraints(legalConstraints=LegalConstraints(useLimitation=["s1", "s2"], accessConstraints=["s3"])) ] ), + __metadata_type__="metadata_of_various_hierarchies", + __metadata_type_version__="1.0", ) # index = pd.MultiIndex.from_tuples([("identification_info", "citation", "title"), ("identification_info", "citation", "alternateTitle"), ("service_identification", "restrictions", "legalConstraints", "useLimitation"), ("service_identification", "restrictions", "legalConstraints", "accessConstraints")]) @@ -611,7 +644,7 @@ class ServiceIdentification(BaseModel): None, description="Constraints associated to the service", title="Service constraints" ) - class MetaDataOfVariousHierarchies(BaseModel): + class MetaDataOfVariousHierarchies(SchemaBaseModel): idno: Optional[str] = None database_name: Optional[str] = None single_level_data: SingleLevelData @@ -642,9 +675,60 @@ class MetaDataOfVariousHierarchies(BaseModel): service_identification=ServiceIdentification( restrictions=[Constraints(legalConstraints=LegalConstraints(useLimitation=[], accessConstraints=[]))] ), + __metadata_type__="metadata_of_various_hierarchies", + __metadata_type_version__="1.0", ) if os.path.exists(filename): os.remove(filename) - write_to_single_sheet(filename, example, "MetaDataOfVariousHierarchies", sheet_title, verbose=True) + write_to_single_sheet(filename, example, sheet_title, verbose=True) + + +def test_create_version(): + class Sub2(SchemaBaseModel): + a: str + b: str + __metadata_type__ = "dataset" + __metadata_type_version__ = "1.0" + + # test with no template name or uid + ob_with_sub2 = Sub2(a="a", b="b") + assert ob_with_sub2.__template_name__ is None + assert ob_with_sub2.__template_uid__ is None + version_with_sub2 = create_version(ob_with_sub2) + assert version_with_sub2 == "metadata_type: dataset, metadata_type_version: 1.0" + expected_output_with_sub2 = {"metadata_type": "dataset", "metadata_type_version": "1.0"} + assert parse_version(version_with_sub2) == expected_output_with_sub2 + + # test with template name and uid + ob_with_sub2.__template_name__ = "My Template" + ob_with_sub2.__template_uid__ = "1234" + version_with_sub2 = create_version(ob_with_sub2) + assert ( + version_with_sub2 + == "metadata_type: dataset, metadata_type_version: 1.0, template_uid: 1234, template_name: My Template" + ) + expected_output_with_sub2 = { + "metadata_type": "dataset", + "metadata_type_version": "1.0", + "template_uid": "1234", + "template_name": "My Template", + } + + assert parse_version(version_with_sub2) == expected_output_with_sub2 + + # test with commas and colons in template name + ob_with_sub2.__template_name__ = "My: Template, with, commas: and colons" + version_with_sub2 = create_version(ob_with_sub2) + assert ( + version_with_sub2 + == "metadata_type: dataset, metadata_type_version: 1.0, template_uid: 1234, template_name: My: Template, with, commas: and colons" + ) + expected_output_with_sub2 = { + "metadata_type": "dataset", + "metadata_type_version": "1.0", + "template_uid": "1234", + "template_name": "My: Template, with, commas: and colons", + } + assert parse_version(version_with_sub2) == expected_output_with_sub2 diff --git a/pydantic_schemas/utils/pydantic_to_excel.py b/pydantic_schemas/utils/pydantic_to_excel.py index d8b0c30..0b2d2ce 100644 --- a/pydantic_schemas/utils/pydantic_to_excel.py +++ b/pydantic_schemas/utils/pydantic_to_excel.py @@ -13,6 +13,7 @@ from openpyxl.worksheet.worksheet import Worksheet from pydantic import AnyUrl, BaseModel +from .schema_base_model import SchemaBaseModel from .utils import ( annotation_contains_dict, annotation_contains_list, @@ -109,9 +110,9 @@ def correct_column_widths(worksheet: Worksheet): worksheet.column_dimensions[column].width = adjusted_width -def shade_30_rows_and_protect_sheet(worksheet: Worksheet, startrow: int): +def shade_80_rows_and_protect_sheet(worksheet: Worksheet, startrow: int): """For use after all data is written so there is a clear border around the data""" - for r in range(startrow, startrow + 30): + for r in range(startrow, startrow + 80): protect_and_shade_row(worksheet, r) worksheet.protection = SheetProtection( sheet=True, @@ -504,25 +505,81 @@ def create_sheet(workbook, sheetname, sheet_number): return new_sheet -def write_to_single_sheet(doc_filepath: str, ob: BaseModel, version: str, title: Optional[str] = None, verbose=False): +def write_to_single_sheet(doc_filepath: str, ob: BaseModel, title: Optional[str] = None, verbose=False): model_default_name = ob.model_json_schema()["title"] if title is None: title = model_default_name wb = open_or_create_workbook(doc_filepath) ws = create_sheet(wb, "metadata", sheet_number=0) + version = create_version(ob) current_row = write_title_and_version_info(ws, title, version, protect_title=False) current_row = write_pydantic_to_sheet(ws, ob, current_row, debug=verbose) correct_column_widths(worksheet=ws) - shade_30_rows_and_protect_sheet(worksheet=ws, startrow=current_row) + shade_80_rows_and_protect_sheet(worksheet=ws, startrow=current_row) shade_locked_cells(worksheet=ws) wb.save(doc_filepath) -def write_across_many_sheets( - doc_filepath: str, ob: BaseModel, version: str, title: Optional[str] = None, verbose=False -): +def create_version(ob: SchemaBaseModel): + """ + Create a version string from the metadata_type and metadata_type_version attributes of a SchemaBaseModel. + Optionally include the template_uid and template_name attributes if they are not None. + + Args: + ob (SchemaBaseModel): The SchemaBaseModel object to generate the version string for. + + Returns: + str: The version string. + + Example output: + 'metadata_type: dataset, metadata_type_version: 1.0' + 'metadata_type: dataset, metadata_type_version: 1.0, template_uid: 1234, template_name: My Template' + """ + output = f"metadata_type: {ob.__metadata_type__}, metadata_type_version: {ob.__metadata_type_version__}" + if ob.__template_name__ is not None and ob.__template_uid__ is not None: + output += f", template_uid: {ob.__template_uid__}, template_name: {ob.__template_name__}" + return output + + +def parse_version(version: str): + """ + Parse a version string into a dictionary of key-value pairs. + + Args: + version (str): The version string to parse. + + Returns: + dict: A dictionary of key-value pairs extracted from the version string. + + Example input: + 'metadata_type: dataset, metadata_type_version: 1.0, template_uid: 1234, template_name: My Template' + + Example output: + { + 'metadata_type': 'dataset', + 'metadata_type_version': '1.0', + 'template_uid': '1234', + 'template_name': 'My Template' + } + """ + version_dict = {} + version_info = version.split(",") + if len(version_info) > 4: + template_name_info = ",".join(version_info[3:]) + version_info = version_info[:3] + version_info.append(template_name_info) + for item in version_info: + key_values = item.strip().split(":") + key = key_values[0] + value = ":".join(key_values[1:]) + version_dict[key.strip()] = value.strip() + return version_dict + + +def write_across_many_sheets(doc_filepath: str, ob: SchemaBaseModel, title: Optional[str] = None, verbose=False): wb = open_or_create_workbook(doc_filepath) ws = create_sheet(wb, "metadata", sheet_number=0) + version = create_version(ob) current_row = write_title_and_version_info(ws, title, version, protect_title=False) children = seperate_simple_from_pydantic(ob) @@ -534,9 +591,9 @@ def write_across_many_sheets( child_object = subset_pydantic_model(ob, children["simple"]) current_row = write_pydantic_to_sheet(ws, child_object, current_row, debug=verbose) - correct_column_widths(worksheet=ws) - shade_30_rows_and_protect_sheet(worksheet=ws, startrow=current_row) - shade_locked_cells(worksheet=ws) + correct_column_widths(worksheet=ws) + shade_80_rows_and_protect_sheet(worksheet=ws, startrow=current_row) + shade_locked_cells(worksheet=ws) sheet_number += 1 for fieldname in children["pydantic"]: @@ -554,7 +611,7 @@ def write_across_many_sheets( current_row = write_title_and_version_info(ws, sheet_title, None, protect_title=True) current_row = write_pydantic_to_sheet(ws, child_object, current_row, debug=verbose) correct_column_widths(worksheet=ws) - shade_30_rows_and_protect_sheet(worksheet=ws, startrow=current_row) + shade_80_rows_and_protect_sheet(worksheet=ws, startrow=current_row) shade_locked_cells(worksheet=ws) sheet_number += 1 wb.save(doc_filepath) diff --git a/pydantic_schemas/utils/schema_base_model.py b/pydantic_schemas/utils/schema_base_model.py index c52732b..77a798c 100644 --- a/pydantic_schemas/utils/schema_base_model.py +++ b/pydantic_schemas/utils/schema_base_model.py @@ -1,4 +1,6 @@ -from pydantic import BaseModel, ConfigDict +from typing import Optional + +from pydantic import BaseModel, ConfigDict, Field from rich import print as print_rich # from rich.pretty import pretty_repr @@ -12,6 +14,31 @@ class SchemaBaseModel(BaseModel): def pretty_print(self): print_rich(self) + __metadata_type__: Optional[str] = None + __metadata_type_version__: Optional[str] = None + __template_name__: Optional[str] = None + __template_uid__: Optional[str] = None + + # metadata_type_: Optional[str] = Field(default=None, alias="__metadata_type__") + # metadata_type_version_: Optional[str] = Field(default=None, alias="__metadata_type_version__") + # template_name_: Optional[str] = Field(default=None, alias="__template_name__") + # template_uid_: Optional[str] = Field(default=None, alias="__template_uid__") + + # @property + # def __metadata_type__(self): + # return self.metadata_type_ + + # @property + # def __metadata_type_version__(self): + # return self.metadata_type_version_ + + # @property + # def __template_name__(self): + # return self.template_name_ + # @property + # def __template_uid__(self): + # return self.template_uid_ + # def __repr__(self): # return pretty_repr(self) diff --git a/pydantic_schemas/video_schema.py b/pydantic_schemas/video_schema.py index 42c0932..e9d1149 100644 --- a/pydantic_schemas/video_schema.py +++ b/pydantic_schemas/video_schema.py @@ -6,7 +6,7 @@ from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import Extra, Field +from pydantic import ConfigDict, Field from .utils.schema_base_model import SchemaBaseModel @@ -32,9 +32,9 @@ class MetadataInformation(SchemaBaseModel): Document description """ - class Config: - extra = Extra.forbid - + model_config = ConfigDict( + extra="forbid", + ) title: Optional[str] = Field(None, description="Document title", title="Document title") idno: Optional[str] = Field(None, title="Unique ID number for the document") producers: Optional[List[Producer]] = Field(None, description="List of producers", title="Producers") @@ -278,6 +278,9 @@ class Model(SchemaBaseModel): Video schema based on the elements from Dublin Core and Schema.org's VideoObject """ + __metadata_type__ = "video" + __metadata_type_version__ = "0.1.0" + repositoryid: Optional[str] = Field( None, description="Abbreviation for the collection that owns the document",