Skip to content

Commit

Permalink
fix bugs when working with templates
Browse files Browse the repository at this point in the history
  • Loading branch information
Gordon Blackadder committed Sep 6, 2024
1 parent 82d57ce commit ddce25f
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 37 deletions.
22 changes: 12 additions & 10 deletions pydantic_schemas/metadata_manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from copy import copy
from typing import Dict, Optional, Type, Union
from typing import Dict, List, Optional, Type, Union

from openpyxl import load_workbook
from pydantic import BaseModel
Expand Down Expand Up @@ -68,18 +68,18 @@ class MetadataManager:
"video": excel_single_sheet_to_pydantic, # one sheet
}

def metadata_class_from_name(self, metadata_name: str):
def metadata_class_from_name(self, metadata_name: str) -> Type[BaseModel]:
metadata_name = self.standardize_metadata_name(metadata_name)
schema = self._TYPE_TO_SCHEMA[metadata_name]
return copy(schema)

@property
def metadata_type_names(self):
def metadata_type_names(self) -> List[str]:
return list(self._TYPE_TO_SCHEMA.keys())

def standardize_metadata_name(self, metadata_name: str) -> str:
metadata_name = metadata_name.lower()
metadata_name = metadata_name.replace("-", "_")
metadata_name = metadata_name.replace("-", "_").replace(" ", "_")
if metadata_name == "microdata" or metadata_name == "survey_microdata":
metadata_name = "survey"
self._raise_if_unsupported_metadata_name(metadata_name=metadata_name)
Expand Down Expand Up @@ -175,10 +175,11 @@ def save_metadata_to_excel(
if metadata_name == "geospatial":
raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel")
schema = self.metadata_class_from_name(metadata_name)
writer = self._TYPE_TO_WRITER[metadata_name]
else:
metadata_name = metadata_name_or_class.model_json_schema()["title"]
schema = metadata_name_or_class
writer = write_to_single_sheet
metadata_name = metadata_name_or_class.model_json_schema()["title"]
skeleton_object = self.create_metadata_outline(metadata_name_or_class=metadata_name_or_class, debug=False)

if filename is None:
Expand All @@ -195,7 +196,7 @@ def save_metadata_to_excel(
combined_dict = standardize_keys_in_dict(combined_dict)
new_ob = schema(**combined_dict)

writer = self._TYPE_TO_WRITER[metadata_name]
# writer = self._TYPE_TO_WRITER[metadata_name]
writer(filename, new_ob, metadata_name, title)
return filename

Expand Down Expand Up @@ -229,10 +230,11 @@ def _get_metadata_name_from_excel_file(filename: str) -> str:
def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[BaseModel]] = None) -> BaseModel:
"""
Read in metadata from an appropriately formatted Excel file as a pydantic object.
If using s standard metadata types (documents, scripts, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided.
If using standard metadata types (documents, resource, script, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided.
Args:
filename (str): The path to the Excel file.
metadata_class (Optional type of BaseModel): A pudantic class type correspondong to the type used to write the Excel file
metadata_class (Optional type of BaseModel): A pydantic class type correspondong to the type used to write the Excel file
Returns:
BaseModel: a pydantic object containing the metadata from the file
Expand All @@ -242,11 +244,11 @@ def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[
metadata_name = self.standardize_metadata_name(metadata_name)
schema = self._TYPE_TO_SCHEMA[metadata_name]
reader = self._TYPE_TO_READER[metadata_name]
except ValueError:
except ValueError as e:
if metadata_class is None:
raise ValueError(
f"'{metadata_name}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())} or try passing in the metadata_class"
)
) from e
schema = metadata_class
reader = excel_single_sheet_to_pydantic
read_object = reader(filename, schema)
Expand Down
25 changes: 0 additions & 25 deletions pydantic_schemas/tests/test_excel_interface.py

This file was deleted.

90 changes: 90 additions & 0 deletions pydantic_schemas/tests/test_metadata_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import pytest

from pydantic_schemas.metadata_manager import MetadataManager


@pytest.mark.parametrize(
"metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"]
)
def test_metadata_by_name(tmpdir, metadata_name):
mm = MetadataManager()
assert metadata_name in mm.metadata_type_names

for debug in [True, False]:
mm.create_metadata_outline(metadata_name_or_class=metadata_name, debug=debug)

# Write empty metadata
filename = mm.write_metadata_outline_to_excel(
metadata_name_or_class=metadata_name, filename=tmpdir.join(f"test_{metadata_name}.xlsx"), title=metadata_name
)

# Read the metadata back
tmp = mm.read_metadata_from_excel(filename=filename)

# Save the read metadata to a new file
filename2 = tmpdir.join(f"test_{metadata_name}_2.xlsx")
mm.save_metadata_to_excel(metadata_name_or_class=metadata_name, object=tmp, filename=filename2, title=metadata_name)

# make an outline object
mm.create_metadata_outline(metadata_name_or_class=metadata_name)


@pytest.mark.parametrize(
"metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"]
)
def test_metadata_by_class(tmpdir, metadata_name):
mm = MetadataManager()

metadata_class = mm.metadata_class_from_name(metadata_name=metadata_name)

# outline from class
mm.create_metadata_outline(metadata_name_or_class=metadata_class)

# write and read from class
filename_class = mm.write_metadata_outline_to_excel(
metadata_name_or_class=metadata_class,
filename=tmpdir.join(f"test_class_{metadata_name}.xlsx"),
title=metadata_name,
)
mm.read_metadata_from_excel(filename=filename_class, metadata_class=metadata_class)


def test_standardize_metadata_name():
mm = MetadataManager()
inputs = [
"Document",
"SCRIPT",
"survey",
"survey-microdata",
"survey microdata",
"microdata",
"table",
"timeseries-db",
"timeseries-db",
"TimeSeries",
"VIdeo",
]

expecteds = [
"document",
"script",
"survey",
"survey",
"survey",
"survey",
"table",
"timeseries_db",
"timeseries_db",
"timeseries",
"video",
]

for inp, expected in zip(inputs, expecteds):
actual = mm.standardize_metadata_name(inp)
assert actual == expected, f"expected {expected} but got {actual}"

with pytest.raises(NotImplementedError):
mm.standardize_metadata_name("Image")

with pytest.raises(ValueError):
mm.standardize_metadata_name("Bad-name")
13 changes: 11 additions & 2 deletions pydantic_schemas/utils/excel_to_pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,17 @@ def get_relevant_sub_frame(m: Type[BaseModel], df: pd.DataFrame, name_of_field:

def handle_optional(name, annotation, df, from_within_list: bool = False, debug=False):
args = [a for a in get_args(annotation) if a is not type(None)]
assert len(args) == 1, f"handle_optional encountered {args}"
ret = annotation_switch(name, args[0], df, from_within_list=from_within_list)
# assert len(args) == 1, f"handle_optional encountered {args}"
if len(args) > 1:
if str in args:
arg = str
elif float in args:
arg = float
else:
arg = args[0]
else:
arg = args[0]
ret = annotation_switch(name, arg, df, from_within_list=from_within_list)
if debug:
print(f"optional ret: {ret}")
print(f"isinstance(ret, list): {isinstance(ret, list)}")
Expand Down

0 comments on commit ddce25f

Please sign in to comment.