diff --git a/excel_sheets/Document_metadata.xlsx b/excel_sheets/Document_metadata.xlsx index c48e874..5cb330d 100644 Binary files a/excel_sheets/Document_metadata.xlsx and b/excel_sheets/Document_metadata.xlsx differ diff --git a/excel_sheets/Script_metadata.xlsx b/excel_sheets/Script_metadata.xlsx index bf4034c..971d586 100644 Binary files a/excel_sheets/Script_metadata.xlsx and b/excel_sheets/Script_metadata.xlsx differ diff --git a/excel_sheets/Series_metadata.xlsx b/excel_sheets/Series_metadata.xlsx index 001455c..7b71266 100644 Binary files a/excel_sheets/Series_metadata.xlsx and b/excel_sheets/Series_metadata.xlsx differ diff --git a/excel_sheets/Survey_metadata.xlsx b/excel_sheets/Survey_metadata.xlsx index 265f79a..768a65e 100644 Binary files a/excel_sheets/Survey_metadata.xlsx and b/excel_sheets/Survey_metadata.xlsx differ diff --git a/excel_sheets/Table_metadata.xlsx b/excel_sheets/Table_metadata.xlsx index c7972a0..dec1d9e 100644 Binary files a/excel_sheets/Table_metadata.xlsx and b/excel_sheets/Table_metadata.xlsx differ diff --git a/excel_sheets/Timeseries_db_metadata.xlsx b/excel_sheets/Timeseries_db_metadata.xlsx index 7aae5bd..d0173b7 100644 Binary files a/excel_sheets/Timeseries_db_metadata.xlsx and b/excel_sheets/Timeseries_db_metadata.xlsx differ diff --git a/excel_sheets/Timeseries_metadata.xlsx b/excel_sheets/Timeseries_metadata.xlsx index 77c69db..a3a6ffa 100644 Binary files a/excel_sheets/Timeseries_metadata.xlsx and b/excel_sheets/Timeseries_metadata.xlsx differ diff --git a/excel_sheets/Video_metadata.xlsx b/excel_sheets/Video_metadata.xlsx index f494837..8dfa4ec 100644 Binary files a/excel_sheets/Video_metadata.xlsx and b/excel_sheets/Video_metadata.xlsx differ diff --git a/pydantic_schemas/excel_interface.py b/pydantic_schemas/excel_interface.py index 49c98dc..e4bb5a8 100644 --- a/pydantic_schemas/excel_interface.py +++ b/pydantic_schemas/excel_interface.py @@ -1,5 +1,6 @@ from typing import Optional +from openpyxl import load_workbook from pydantic import BaseModel from . import ( # image_schema, @@ -148,7 +149,7 @@ def write_outline_metadata_to_excel( title = f"{metadata_type.capitalize()} Metadata" skeleton_object = self.type_to_outline(metadata_type, debug=False) writer = self._TYPE_TO_WRITER[metadata_type] - writer(filename, skeleton_object, title) + writer(filename, skeleton_object, metadata_type, title) return filename def save_metadata_to_excel( @@ -193,23 +194,47 @@ def save_metadata_to_excel( new_ob = schema(**combined_dict) writer = self._TYPE_TO_WRITER[metadata_type] - writer(filename, new_ob, title) + writer(filename, new_ob, metadata_type, title) return filename - def read_metadata_excel(self, metadata_type: str, filename: str) -> BaseModel: + @staticmethod + def _get_metadata_type_from_excel_file(filename: str) -> str: + error_message = "Improperly formatted Excel file for metadata" + workbook = load_workbook(filename) + # Select the 'metadata' sheet + try: + sheet = workbook["metadata"] + # Get the value of cell C1 + type_info = sheet["C1"].value + except KeyError: + raise ValueError(f"Sheet 'metadata' not found. {error_message}") + except Exception as e: + raise ValueError(f"Error reading Excel file: {e}") + finally: + # Close the workbook + workbook.close() + + if not type_info or not isinstance(type_info, str): + raise ValueError(f"Cell C3 is empty or not a string. {error_message}") + + cell_values = type_info.split(" ") + + if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata": + raise ValueError(f"Cell C3 is improperly formatted. {error_message}") + + return cell_values[0] + + def read_metadata_excel(self, filename: str) -> BaseModel: """ Read in metadata_type metadata from an appropriately formatted Excel file as a pydantic object. Args: - metadata_type (str): the name of a supported metadata type, currently: - document, script, series, survey, table, timeseries, timeseries_db, video - Currently not supported: - geospatial, image filename (str): The path to the Excel file. Returns: BaseModel: a pydantic object containing the metadata from the file """ + metadata_type = self._get_metadata_type_from_excel_file(filename) metadata_type = self._process_metadata_type(metadata_type) self.raise_if_unsupported_metadata_type(metadata_type=metadata_type) schema = self._TYPE_TO_SCHEMA[metadata_type] diff --git a/pydantic_schemas/tests/test_excel_interface.py b/pydantic_schemas/tests/test_excel_interface.py index bd43dc8..c7f4807 100644 --- a/pydantic_schemas/tests/test_excel_interface.py +++ b/pydantic_schemas/tests/test_excel_interface.py @@ -15,7 +15,7 @@ def test_metadata(tmpdir, metadata_type): ) # Read the metadata back - tmp = ei.read_metadata_excel(metadata_type=metadata_type, filename=filename) + tmp = ei.read_metadata_excel(filename=filename) # Save the read metadata to a new file filename2 = tmpdir.join(f"test_{metadata_type}_2.xlsx") diff --git a/pydantic_schemas/tests/test_pydantic_to_excel.py b/pydantic_schemas/tests/test_pydantic_to_excel.py index d2895cb..6207851 100644 --- a/pydantic_schemas/tests/test_pydantic_to_excel.py +++ b/pydantic_schemas/tests/test_pydantic_to_excel.py @@ -407,7 +407,7 @@ class WithDict(BaseModel): wd = WithDict(additional={"s": "sa", "a": "va"}, sub=SubDict(sub_additional={"sub": "subval", "sub2": "subval2"})) filename = tmpdir.join(f"integration_test_dictionaries_.xlsx") - write_across_many_sheets(filename, wd, title="Dictionaries") + write_across_many_sheets(filename, wd, "test", title="Dictionaries") parsed_outp = excel_doc_to_pydantic(filename, WithDict) assert parsed_outp == wd, parsed_outp diff --git a/pydantic_schemas/utils/pydantic_to_excel.py b/pydantic_schemas/utils/pydantic_to_excel.py index c4095d7..dc69560 100644 --- a/pydantic_schemas/utils/pydantic_to_excel.py +++ b/pydantic_schemas/utils/pydantic_to_excel.py @@ -510,13 +510,28 @@ def write_nested_simple_pydantic_to_sheet( return startrow -def write_to_single_sheet(doc_filepath: str, ob: BaseModel, title: Optional[str] = None, debug=False): +def write_metadata_type_and_version(doc_filepath: str, metadata_type: str): + wb = open_or_create_workbook(doc_filepath) + sheet = wb["metadata"] + + sheet["C1"] = f"{metadata_type} type metadata version 20240809.1" + + version_font = Font(name="Consolas", size=9) + sheet["C1"].font = version_font + + wb.save(doc_filepath) + + +def write_to_single_sheet( + doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, debug=False +): if title is None: title = "Metadata" sheet_name = "metadata" current_row = create_sheet_and_write_title( doc_filepath, sheet_name, title, sheet_number=0, protect_title=False, debug=debug ) + write_metadata_type_and_version(doc_filepath=doc_filepath, metadata_type=metadata_type) current_row = write_nested_simple_pydantic_to_sheet(doc_filepath, sheet_name, ob, current_row + 1) workbook = open_or_create_workbook(doc_filepath) correct_column_widths(workbook, sheet_name=sheet_name) @@ -525,7 +540,9 @@ def write_to_single_sheet(doc_filepath: str, ob: BaseModel, title: Optional[str] workbook.save(doc_filepath) -def write_across_many_sheets(doc_filepath: str, ob: BaseModel, title: Optional[str] = None, debug=False): +def write_across_many_sheets( + doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, debug=False +): children = seperate_simple_from_pydantic(ob) if debug: print(f"children: {children}") @@ -537,6 +554,8 @@ def write_across_many_sheets(doc_filepath: str, ob: BaseModel, title: Optional[s current_row = create_sheet_and_write_title( doc_filepath, sheet_name, title, sheet_number=sheet_number, protect_title=False, debug=debug ) + write_metadata_type_and_version(doc_filepath=doc_filepath, metadata_type=metadata_type) + child_object = subset_pydantic_model(ob, children["simple"]) current_row = write_simple_pydantic_to_sheet(