Skip to content

Commit

Permalink
include versioning and automatic type detection
Browse files Browse the repository at this point in the history
  • Loading branch information
Gordon Blackadder committed Aug 9, 2024
1 parent 833ecd5 commit 4c73e49
Show file tree
Hide file tree
Showing 12 changed files with 55 additions and 11 deletions.
Binary file modified excel_sheets/Document_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Script_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Series_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Survey_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Table_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Timeseries_db_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Timeseries_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Video_metadata.xlsx
Binary file not shown.
39 changes: 32 additions & 7 deletions pydantic_schemas/excel_interface.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Optional

from openpyxl import load_workbook
from pydantic import BaseModel

from . import ( # image_schema,
Expand Down Expand Up @@ -148,7 +149,7 @@ def write_outline_metadata_to_excel(
title = f"{metadata_type.capitalize()} Metadata"
skeleton_object = self.type_to_outline(metadata_type, debug=False)
writer = self._TYPE_TO_WRITER[metadata_type]
writer(filename, skeleton_object, title)
writer(filename, skeleton_object, metadata_type, title)
return filename

def save_metadata_to_excel(
Expand Down Expand Up @@ -193,23 +194,47 @@ def save_metadata_to_excel(
new_ob = schema(**combined_dict)

writer = self._TYPE_TO_WRITER[metadata_type]
writer(filename, new_ob, title)
writer(filename, new_ob, metadata_type, title)
return filename

def read_metadata_excel(self, metadata_type: str, filename: str) -> BaseModel:
@staticmethod
def _get_metadata_type_from_excel_file(filename: str) -> str:
error_message = "Improperly formatted Excel file for metadata"
workbook = load_workbook(filename)
# Select the 'metadata' sheet
try:
sheet = workbook["metadata"]
# Get the value of cell C1
type_info = sheet["C1"].value
except KeyError:
raise ValueError(f"Sheet 'metadata' not found. {error_message}")
except Exception as e:
raise ValueError(f"Error reading Excel file: {e}")
finally:
# Close the workbook
workbook.close()

if not type_info or not isinstance(type_info, str):
raise ValueError(f"Cell C3 is empty or not a string. {error_message}")

cell_values = type_info.split(" ")

if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata":
raise ValueError(f"Cell C3 is improperly formatted. {error_message}")

return cell_values[0]

def read_metadata_excel(self, filename: str) -> BaseModel:
"""
Read in metadata_type metadata from an appropriately formatted Excel file as a pydantic object.
Args:
metadata_type (str): the name of a supported metadata type, currently:
document, script, series, survey, table, timeseries, timeseries_db, video
Currently not supported:
geospatial, image
filename (str): The path to the Excel file.
Returns:
BaseModel: a pydantic object containing the metadata from the file
"""
metadata_type = self._get_metadata_type_from_excel_file(filename)
metadata_type = self._process_metadata_type(metadata_type)
self.raise_if_unsupported_metadata_type(metadata_type=metadata_type)
schema = self._TYPE_TO_SCHEMA[metadata_type]
Expand Down
2 changes: 1 addition & 1 deletion pydantic_schemas/tests/test_excel_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_metadata(tmpdir, metadata_type):
)

# Read the metadata back
tmp = ei.read_metadata_excel(metadata_type=metadata_type, filename=filename)
tmp = ei.read_metadata_excel(filename=filename)

# Save the read metadata to a new file
filename2 = tmpdir.join(f"test_{metadata_type}_2.xlsx")
Expand Down
2 changes: 1 addition & 1 deletion pydantic_schemas/tests/test_pydantic_to_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ class WithDict(BaseModel):

wd = WithDict(additional={"s": "sa", "a": "va"}, sub=SubDict(sub_additional={"sub": "subval", "sub2": "subval2"}))
filename = tmpdir.join(f"integration_test_dictionaries_.xlsx")
write_across_many_sheets(filename, wd, title="Dictionaries")
write_across_many_sheets(filename, wd, "test", title="Dictionaries")
parsed_outp = excel_doc_to_pydantic(filename, WithDict)
assert parsed_outp == wd, parsed_outp

Expand Down
23 changes: 21 additions & 2 deletions pydantic_schemas/utils/pydantic_to_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,13 +510,28 @@ def write_nested_simple_pydantic_to_sheet(
return startrow


def write_to_single_sheet(doc_filepath: str, ob: BaseModel, title: Optional[str] = None, debug=False):
def write_metadata_type_and_version(doc_filepath: str, metadata_type: str):
wb = open_or_create_workbook(doc_filepath)
sheet = wb["metadata"]

sheet["C1"] = f"{metadata_type} type metadata version 20240809.1"

version_font = Font(name="Consolas", size=9)
sheet["C1"].font = version_font

wb.save(doc_filepath)


def write_to_single_sheet(
doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, debug=False
):
if title is None:
title = "Metadata"
sheet_name = "metadata"
current_row = create_sheet_and_write_title(
doc_filepath, sheet_name, title, sheet_number=0, protect_title=False, debug=debug
)
write_metadata_type_and_version(doc_filepath=doc_filepath, metadata_type=metadata_type)
current_row = write_nested_simple_pydantic_to_sheet(doc_filepath, sheet_name, ob, current_row + 1)
workbook = open_or_create_workbook(doc_filepath)
correct_column_widths(workbook, sheet_name=sheet_name)
Expand All @@ -525,7 +540,9 @@ def write_to_single_sheet(doc_filepath: str, ob: BaseModel, title: Optional[str]
workbook.save(doc_filepath)


def write_across_many_sheets(doc_filepath: str, ob: BaseModel, title: Optional[str] = None, debug=False):
def write_across_many_sheets(
doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, debug=False
):
children = seperate_simple_from_pydantic(ob)
if debug:
print(f"children: {children}")
Expand All @@ -537,6 +554,8 @@ def write_across_many_sheets(doc_filepath: str, ob: BaseModel, title: Optional[s
current_row = create_sheet_and_write_title(
doc_filepath, sheet_name, title, sheet_number=sheet_number, protect_title=False, debug=debug
)
write_metadata_type_and_version(doc_filepath=doc_filepath, metadata_type=metadata_type)

child_object = subset_pydantic_model(ob, children["simple"])

current_row = write_simple_pydantic_to_sheet(
Expand Down

0 comments on commit 4c73e49

Please sign in to comment.