Skip to content

Commit

Permalink
generalise excel interface to schema interface, fix bugs in template …
Browse files Browse the repository at this point in the history
…to pydantic
  • Loading branch information
Gordon Blackadder committed Aug 28, 2024
1 parent 8acb008 commit d762c3f
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 267 deletions.
2 changes: 1 addition & 1 deletion pydantic_schemas/generators/generate_excel_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

ei = SchemaInterface()

for metadata_type in ei.get_metadata_types():
for metadata_type in ei.list_metadata_types():
filename = f"excel_sheets/{metadata_type.capitalize()}_metadata.xlsx"
print(f"Writing {metadata_type} outline to {filename}")
if os.path.exists(filename):
Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/generators/generate_pydantic_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"image-schema.json",
"microdata-schema.json",
"script-schema.json",
"series-schema.json",
"table-schema.json",
"timeseries-db-schema.json",
"timeseries-schema.json",
Expand Down
59 changes: 21 additions & 38 deletions pydantic_schemas/schema_interface.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Optional
from typing import Dict, Optional, Type

from openpyxl import load_workbook
from pydantic import BaseModel
Expand All @@ -8,7 +8,6 @@
geospatial_schema,
microdata_schema,
script_schema,
series_schema,
table_schema,
timeseries_db_schema,
timeseries_schema,
Expand All @@ -24,7 +23,7 @@
class SchemaInterface:
"""
Interface with Excel for creating, saving and updating metadata for various types:
documents, scripts, series, survey, table, timeseries, timeseries_db, video
documents, scripts, survey, table, timeseries, timeseries_db, video
Retrieve pydantic model definitions for each metadata type
"""
Expand All @@ -34,7 +33,6 @@ class SchemaInterface:
"geospatial": geospatial_schema.GeospatialSchema,
# "image":image_schema.ImageDataTypeSchema,
"script": script_schema.ResearchProjectSchemaDraft,
"series": series_schema.Series,
"survey": microdata_schema.MicrodataSchema,
"table": table_schema.Model,
"timeseries": timeseries_schema.TimeseriesSchema,
Expand All @@ -47,7 +45,6 @@ class SchemaInterface:
# "geospatial":,
# "image":,
"script": write_across_many_sheets,
"series": write_to_single_sheet, # one sheet
"survey": write_across_many_sheets,
"table": write_across_many_sheets,
"timeseries": write_across_many_sheets,
Expand All @@ -60,7 +57,6 @@ class SchemaInterface:
# "geospatial":,
# "image":,
"script": excel_doc_to_pydantic,
"series": excel_single_sheet_to_pydantic, # one sheet
"survey": excel_doc_to_pydantic,
"table": excel_doc_to_pydantic,
"timeseries": excel_doc_to_pydantic,
Expand All @@ -69,21 +65,19 @@ class SchemaInterface:
}

def get_metadata_class(self, metadata_type: str):
metadata_type = self._process_metadata_type(metadata_type)
if metadata_type not in self._TYPE_TO_SCHEMA:
raise NameError(f"{metadata_type} not known, must be one of {list(self._TYPE_TO_SCHEMA.keys())}.")
metadata_type = self.standardize_metadata_type_name(metadata_type)
schema = self._TYPE_TO_SCHEMA[metadata_type]
return schema

def template_to_pydantic(self, template: Dict, parent_schema_type: str, name: Optional[str] = None) -> BaseModel:
# metadata_type = self._process_metadata_type(parent_schema_type)
# schema = self._TYPE_TO_SCHEMA[metadata_type]
def template_to_pydantic(
self, template: Dict, parent_schema_type: str, name: Optional[str] = None
) -> Type[BaseModel]:
schema = self.get_metadata_class(parent_schema_type)

return pydantic_from_template(template, schema, name)

def get_metadata_types(self):
return list(self._TYPE_TO_READER.keys())
def list_metadata_types(self):
return list(self._TYPE_TO_SCHEMA.keys())

@staticmethod
def _merge_dicts(base, update):
Expand Down Expand Up @@ -123,11 +117,12 @@ def _merge_dicts(base, update):
new_dict[key] = base_value
return new_dict

@staticmethod
def _process_metadata_type(metadata_type: str) -> str:
def standardize_metadata_type_name(self, metadata_type: str) -> str:
metadata_type = metadata_type.lower()
metadata_type = metadata_type.replace("-", "_")
if metadata_type == "microdata" or metadata_type == "survey_microdata":
metadata_type = "survey"
self._raise_if_unsupported_metadata_type(metadata_type=metadata_type)
return metadata_type

def type_to_outline(self, metadata_type: str, debug: bool = False) -> BaseModel:
Expand Down Expand Up @@ -155,8 +150,10 @@ def write_outline_metadata_to_excel(
Outputs:
An Excel file into which metadata can be entered
"""
metadata_type = self._process_metadata_type(metadata_type)
self._raise_if_unsupported_metadata_type(metadata_type=metadata_type)
metadata_type = self.standardize_metadata_type_name(metadata_type)
if metadata_type == "geospatial":
raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel")

if filename is None:
filename = f"{metadata_type}_metadata.xlsx"
if not str(filename).endswith(".xlsx"):
Expand Down Expand Up @@ -189,8 +186,9 @@ def save_metadata_to_excel(
Outputs:
An Excel file containing the metadata from the pydantic object. This file can be updated as needed.
"""
metadata_type = self._process_metadata_type(metadata_type)
self._raise_if_unsupported_metadata_type(metadata_type=metadata_type)
metadata_type = self.standardize_metadata_type_name(metadata_type)
if metadata_type == "geospatial":
raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel")

if filename is None:
filename = f"{metadata_type}_metadata.xlsx"
Expand Down Expand Up @@ -240,7 +238,7 @@ def _get_metadata_type_from_excel_file(filename: str) -> str:

return cell_values[0]

def read_metadata_excel(self, filename: str) -> BaseModel:
def read_metadata_from_excel(self, filename: str) -> BaseModel:
"""
Read in metadata_type metadata from an appropriately formatted Excel file as a pydantic object.
Expand All @@ -251,25 +249,13 @@ def read_metadata_excel(self, filename: str) -> BaseModel:
BaseModel: a pydantic object containing the metadata from the file
"""
metadata_type = self._get_metadata_type_from_excel_file(filename)
metadata_type = self._process_metadata_type(metadata_type)
self._raise_if_unsupported_metadata_type(metadata_type=metadata_type)
metadata_type = self.standardize_metadata_type_name(metadata_type)
schema = self._TYPE_TO_SCHEMA[metadata_type]
reader = self._TYPE_TO_READER[metadata_type]
read_object = reader(filename, schema)
new_ob = self.inflate_read_data_to_schema(metadata_type, read_object)
return new_ob

def inflate_read_data_to_schema(self, metadata_type, read_object):
metadata_type = self._process_metadata_type(metadata_type)
self._raise_if_unsupported_metadata_type(metadata_type=metadata_type)
skeleton_object = self.type_to_outline(metadata_type=metadata_type, debug=False)

if isinstance(read_object, dict):
read_object_dict = read_object
elif isinstance(read_object, BaseModel):
read_object_dict = read_object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True)
else:
raise ValueError(f"Expected dict or pydantic BaseModel but got {type(read_object)}")
read_object_dict = read_object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True)
combined_dict = self._merge_dicts(
skeleton_object.model_dump(),
read_object_dict,
Expand All @@ -284,9 +270,6 @@ def _raise_if_unsupported_metadata_type(self, metadata_type: str):
If the type is specifically unsupported - geospatial or image - a NotImplementedError is raised
If the type is simply unknown then a ValueError is raised.
"""
metadata_type = self._process_metadata_type(metadata_type)
if metadata_type == "geospatial":
raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel")
if metadata_type == "image":
raise NotImplementedError("Due to an issue with image metadata schema definition causing __root__ errors")
if metadata_type not in self._TYPE_TO_SCHEMA.keys():
Expand Down
167 changes: 0 additions & 167 deletions pydantic_schemas/series_schema.py

This file was deleted.

2 changes: 1 addition & 1 deletion pydantic_schemas/tests/test_excel_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_metadata(tmpdir, metadata_type):
)

# Read the metadata back
tmp = ei.read_metadata_excel(filename=filename)
tmp = ei.read_metadata_from_excel(filename=filename)

# Save the read metadata to a new file
filename2 = tmpdir.join(f"test_{metadata_type}_2.xlsx")
Expand Down
Loading

0 comments on commit d762c3f

Please sign in to comment.