diff --git a/excel_sheets/Document_metadata.xlsx b/excel_sheets/Document_metadata.xlsx index 5cb330d..8b91933 100644 Binary files a/excel_sheets/Document_metadata.xlsx and b/excel_sheets/Document_metadata.xlsx differ diff --git a/excel_sheets/Script_metadata.xlsx b/excel_sheets/Script_metadata.xlsx index 971d586..66475c7 100644 Binary files a/excel_sheets/Script_metadata.xlsx and b/excel_sheets/Script_metadata.xlsx differ diff --git a/excel_sheets/Series_metadata.xlsx b/excel_sheets/Series_metadata.xlsx index 7b71266..ba893f3 100644 Binary files a/excel_sheets/Series_metadata.xlsx and b/excel_sheets/Series_metadata.xlsx differ diff --git a/excel_sheets/Survey_metadata.xlsx b/excel_sheets/Survey_metadata.xlsx index 768a65e..3be5d6f 100644 Binary files a/excel_sheets/Survey_metadata.xlsx and b/excel_sheets/Survey_metadata.xlsx differ diff --git a/excel_sheets/Table_metadata.xlsx b/excel_sheets/Table_metadata.xlsx index dec1d9e..ce38e33 100644 Binary files a/excel_sheets/Table_metadata.xlsx and b/excel_sheets/Table_metadata.xlsx differ diff --git a/excel_sheets/Timeseries_db_metadata.xlsx b/excel_sheets/Timeseries_db_metadata.xlsx index d0173b7..bab0859 100644 Binary files a/excel_sheets/Timeseries_db_metadata.xlsx and b/excel_sheets/Timeseries_db_metadata.xlsx differ diff --git a/excel_sheets/Timeseries_metadata.xlsx b/excel_sheets/Timeseries_metadata.xlsx index a3a6ffa..0d3fd1a 100644 Binary files a/excel_sheets/Timeseries_metadata.xlsx and b/excel_sheets/Timeseries_metadata.xlsx differ diff --git a/excel_sheets/Video_metadata.xlsx b/excel_sheets/Video_metadata.xlsx index 8dfa4ec..1486a55 100644 Binary files a/excel_sheets/Video_metadata.xlsx and b/excel_sheets/Video_metadata.xlsx differ diff --git a/pydantic_schemas/tests/test_pydantic_to_excel.py b/pydantic_schemas/tests/test_pydantic_to_excel.py index 6207851..ef64653 100644 --- a/pydantic_schemas/tests/test_pydantic_to_excel.py +++ b/pydantic_schemas/tests/test_pydantic_to_excel.py @@ -23,19 +23,19 @@ ) from pydantic_schemas.utils.pydantic_to_excel import ( correct_column_widths, - create_sheet_and_write_title, + create_sheet, open_or_create_workbook, shade_30_rows_and_protect_sheet, shade_locked_cells, write_across_many_sheets, - write_nested_simple_pydantic_to_sheet, + write_pydantic_to_sheet, + write_title_and_version_info, write_to_single_sheet, ) from pydantic_schemas.utils.quick_start import make_skeleton from pydantic_schemas.video_schema import Model as VideoModel -# @pytest.mark.parametrize("index_above", [True, False]) def test_simple_schema(tmpdir, index_above=False): class Simple(BaseModel): idno: str @@ -44,25 +44,13 @@ class Simple(BaseModel): simple_original = Simple(idno="AVal", title="BVal", author="CVal") - filename = tmpdir.join(f"integration_test_simple_schema_{index_above}.xlsx") - sheetname = "Document Metadata" - sheet_title = "Document Metadata" - current_row = create_sheet_and_write_title(filename, sheetname, sheet_title) + filename = tmpdir.join(f"integration_test_simple_schema_.xlsx") + write_to_single_sheet(filename, simple_original, "simple_original", "Simple Metadata") - current_row = write_nested_simple_pydantic_to_sheet( - filename, sheetname, simple_original, current_row + 1, index_above=index_above - ) - worksheet = open_or_create_workbook(filename) - correct_column_widths(worksheet, sheetname) - shade_30_rows_and_protect_sheet(worksheet, sheetname, current_row + 1) - shade_locked_cells(worksheet, sheetname) - worksheet.save(filename) - - parsed_simple = excel_sheet_to_pydantic(filename, sheetname, Simple) + parsed_simple = excel_sheet_to_pydantic(filename, "metadata", Simple) assert parsed_simple == simple_original, parsed_simple -# @pytest.mark.parametrize("index_above", [True, False]) def test_two_layer_simple_schema(tmpdir, index_above=False): class Production(BaseModel): idno: str @@ -82,19 +70,10 @@ class ProductionAndCountries(BaseModel): countries=Country(name="MyCountry", initials="MC"), ) - filename = tmpdir.join(f"integration_test_two_layer_simple_schema_{index_above}.xlsx") - sheetname = "Document Metadata" - sheet_title = "Document Metadata" - current_row = create_sheet_and_write_title(filename, sheetname, sheet_title) + filename = tmpdir.join(f"integration_test_two_layer_simple_schema.xlsx") + write_to_single_sheet(filename, inp, "ProductionAndCountries", "Production and Countries") - current_row = write_nested_simple_pydantic_to_sheet(filename, sheetname, inp, current_row, index_above=index_above) - worksheet = open_or_create_workbook(filename) - correct_column_widths(worksheet, sheetname) - shade_30_rows_and_protect_sheet(worksheet, sheetname, current_row + 1) - shade_locked_cells(worksheet, sheetname) - worksheet.save(filename) - - parsed_outp = excel_sheet_to_pydantic(filename, sheetname, ProductionAndCountries) + parsed_outp = excel_sheet_to_pydantic(filename, "metadata", ProductionAndCountries) assert parsed_outp == inp, parsed_outp @@ -141,18 +120,8 @@ class ProductionAndCountries(BaseModel): ) filename = tmpdir.join(f"integration_test_multilayer_simple_schema_.xlsx") - sheetname = "Document Metadata" - sheet_title = "Document Metadata" - - current_row = create_sheet_and_write_title(filename, sheetname, sheet_title) - current_row = write_nested_simple_pydantic_to_sheet(filename, sheetname, inp, current_row + 1) - worksheet = open_or_create_workbook(filename) - correct_column_widths(worksheet, sheet_name=sheetname) - shade_30_rows_and_protect_sheet(worksheet, sheetname, current_row + 1) - shade_locked_cells(worksheet, sheetname) - worksheet.save(filename) - - parsed_outp = excel_sheet_to_pydantic(filename, sheetname, ProductionAndCountries) + write_to_single_sheet(filename, inp, "ProductionAndCountries", "Production and Countries") + parsed_outp = excel_sheet_to_pydantic(filename, "metadata", ProductionAndCountries) assert parsed_outp == inp, parsed_outp @@ -167,16 +136,7 @@ class Production(BaseModel): original_production = Production(idno="", subtitle=None, author="author", deprecatedFeature="toberemoved") filename = tmpdir.join(f"integration_test_optional_missing_deprecated_new_simple_.xlsx") - sheetname = "Document Metadata" - sheet_title = "Document Metadata" - - current_row = create_sheet_and_write_title(filename, sheetname, sheet_title) - current_row = write_nested_simple_pydantic_to_sheet(filename, sheetname, original_production, current_row + 1) - worksheet = open_or_create_workbook(filename) - correct_column_widths(worksheet, sheet_name=sheetname) - shade_30_rows_and_protect_sheet(worksheet, sheetname, current_row + 1) - shade_locked_cells(worksheet, sheetname) - worksheet.save(filename) + write_to_single_sheet(filename, original_production, "Production", "Production") class Production(BaseModel): idno: Optional[str] = None @@ -185,7 +145,7 @@ class Production(BaseModel): newFeature: Optional[str] = None requiredNewFeature: str - new_production = excel_sheet_to_pydantic(filename=filename, sheetname=sheetname, model_type=Production) + new_production = excel_sheet_to_pydantic(filename=filename, sheetname="metadata", model_type=Production) assert new_production.idno is None assert new_production.title is None assert new_production.author == "author" @@ -214,18 +174,10 @@ class ProductionAndCountries(BaseModel): example_production_and_country = ProductionAndCountries(production=example_production, countries=example_country) filename = tmpdir.join(f"integration_test_optional_missing_deprecated_new_two_level_.xlsx") - sheetname = "Document Metadata" - sheet_title = "Document Metadata" - current_row = create_sheet_and_write_title(filename, sheetname, sheet_title) - current_row = write_nested_simple_pydantic_to_sheet( - filename, sheetname, example_production_and_country, current_row + 1 + write_to_single_sheet( + filename, example_production_and_country, "ProductionAndCountries", "Production and Countries" ) - worksheet = open_or_create_workbook(filename) - correct_column_widths(worksheet, sheet_name=sheetname) - shade_30_rows_and_protect_sheet(worksheet, sheetname, current_row + 1) - shade_locked_cells(worksheet, sheetname) - worksheet.save(filename) class Production(BaseModel): idno: Optional[str] = None @@ -243,7 +195,7 @@ class ProductionAndCountries(BaseModel): countries: Country newTopLevelFeature: Optional[NewTopLevel] = None - new_pandc = excel_sheet_to_pydantic(filename=filename, sheetname=sheetname, model_type=ProductionAndCountries) + new_pandc = excel_sheet_to_pydantic(filename=filename, sheetname="metadata", model_type=ProductionAndCountries) assert new_pandc.production.idno is None assert new_pandc.production.title is None assert new_pandc.production.author == "author" @@ -307,20 +259,12 @@ class ProductionAndCountries(BaseModel): ) filename = tmpdir.join(f"integration_test_lists_.xlsx") - sheetname = "Document Metadata" - sheet_title = "Document Metadata" - - current_row = create_sheet_and_write_title(filename, sheetname, sheet_title) - current_row = write_nested_simple_pydantic_to_sheet( - filename, sheetname, example_production_and_country, current_row + 1 + # filename = "integration_test_lists_.xlsx" + write_to_single_sheet( + filename, example_production_and_country, "ProductionAndCountries", "Production and Countries" ) - worksheet = open_or_create_workbook(filename) - correct_column_widths(worksheet, sheet_name=sheetname) - shade_30_rows_and_protect_sheet(worksheet, sheetname, current_row + 1) - shade_locked_cells(worksheet, sheetname) - worksheet.save(filename) - new_pandc = excel_sheet_to_pydantic(filename=filename, sheetname=sheetname, model_type=ProductionAndCountries) + new_pandc = excel_sheet_to_pydantic(filename=filename, sheetname="metadata", model_type=ProductionAndCountries) assert new_pandc.production.idno is None assert new_pandc.production.title is None assert len(new_pandc.production.authors) == 4 @@ -376,8 +320,10 @@ class ProductionAndCountries(BaseModel): ) filename = tmpdir.join(f"integration_test_optional_missing_deprecated_new_two_level_.xlsx") - title = "Example" - write_across_many_sheets(filename, example_production_and_country, title) + # filename = f"integration_test_optional_missing_deprecated_new_two_level_.xlsx" + write_across_many_sheets( + filename, example_production_and_country, "ProductionAndCountries", "Production and Countries" + ) new_pandc = excel_doc_to_pydantic(filename, ProductionAndCountries) assert new_pandc.production.idno == "myidno" @@ -407,7 +353,8 @@ class WithDict(BaseModel): wd = WithDict(additional={"s": "sa", "a": "va"}, sub=SubDict(sub_additional={"sub": "subval", "sub2": "subval2"})) filename = tmpdir.join(f"integration_test_dictionaries_.xlsx") - write_across_many_sheets(filename, wd, "test", title="Dictionaries") + write_across_many_sheets(filename, wd, "WithDict", "Looking at dictionaries") + parsed_outp = excel_doc_to_pydantic(filename, WithDict) assert parsed_outp == wd, parsed_outp @@ -439,13 +386,12 @@ def test_write_real_skeleton(tmpdir, name, type_writer_reader): os.remove(filename) ob = make_skeleton(type) - writer(filename, ob, name) + writer(filename, ob, name, f"{name} Metadata") reader(filename, type) def test_demo(): filename = "demo_output.xlsx" - sheetname = "metadata" sheet_title = "Formatting metadata examples" class SingleLevelData(BaseModel): @@ -498,10 +444,12 @@ class MetaDataOfVariousHierarchies(BaseModel): if os.path.exists(filename): os.remove(filename) - current_row = create_sheet_and_write_title(filename, sheetname, sheet_title) - current_row = write_nested_simple_pydantic_to_sheet(filename, sheetname, example, current_row + 1) - worksheet = open_or_create_workbook(filename) - correct_column_widths(worksheet, sheet_name=sheetname) - shade_30_rows_and_protect_sheet(worksheet, sheetname, current_row + 1) - shade_locked_cells(worksheet, sheetname) - worksheet.save(filename) + write_to_single_sheet(filename, example, "MetaDataOfVariousHierarchies", sheet_title) + + # current_row = create_sheet_and_write_title(filename, sheetname, sheet_title) + # current_row = write_nested_simple_pydantic_to_sheet(filename, sheetname, example, current_row + 1) + # worksheet = open_or_create_workbook(filename) + # correct_column_widths(worksheet, sheet_name=sheetname) + # shade_30_rows_and_protect_sheet(worksheet, sheetname, current_row + 1) + # shade_locked_cells(worksheet, sheetname) + # worksheet.save(filename) diff --git a/pydantic_schemas/utils/excel_to_pydantic.py b/pydantic_schemas/utils/excel_to_pydantic.py index 5902e08..a8dc618 100644 --- a/pydantic_schemas/utils/excel_to_pydantic.py +++ b/pydantic_schemas/utils/excel_to_pydantic.py @@ -237,10 +237,11 @@ def instantiate_pydantic_object(model_type: Type[BaseModel], df: pd.DataFrame, f def excel_sheet_to_pydantic(filename: str, sheetname: str, model_type: Union[Type[BaseModel], Type[List[BaseModel]]]): df = pd.read_excel(filename, sheet_name=sheetname, header=None) df = df.where(df.notnull(), None) - try: - df = get_relevant_sub_frame(model_type, df) - except (KeyError, IndexError): - pass + if sheetname != "metadata": + try: + df = get_relevant_sub_frame(model_type, df) + except (KeyError, IndexError): + pass if is_optional_annotation(model_type): return handle_optional(df.iloc[0, 0], model_type, df) diff --git a/pydantic_schemas/utils/pydantic_to_excel.py b/pydantic_schemas/utils/pydantic_to_excel.py index dc69560..27567bd 100644 --- a/pydantic_schemas/utils/pydantic_to_excel.py +++ b/pydantic_schemas/utils/pydantic_to_excel.py @@ -1,17 +1,22 @@ +import copy +import json import os -import typing +from enum import Enum from typing import Dict, List, Optional, Tuple, Union import pandas as pd from openpyxl import Workbook, load_workbook -from openpyxl.styles import Alignment, Font, PatternFill, Protection +from openpyxl.styles import Alignment, Border, Font, PatternFill, Protection, Side +from openpyxl.utils.dataframe import dataframe_to_rows from openpyxl.worksheet.protection import SheetProtection +from openpyxl.worksheet.worksheet import Worksheet from pydantic import BaseModel -from .utils import ( +from pydantic_schemas.utils.utils import ( annotation_contains_dict, annotation_contains_list, assert_dict_annotation_is_strings_or_any, + get_subtype_of_optional_or_list, seperate_simple_from_pydantic, subset_pydantic_model, ) @@ -19,72 +24,61 @@ MAXCOL = 30 -def protect_and_shade_given_cell(sheet, row: int, col: int): - grey_fill = PatternFill(start_color="DDDDDD", end_color="DDDDDD", fill_type="solid") - sheet.cell(row=row, column=col).fill = grey_fill - sheet.cell(row=row, column=col).protection = Protection(locked=True) +def unprotect_cell(sheet, row, column): + sheet.cell(row=row, column=column).protection = Protection(locked=False) -def protect_and_shade_row(sheet, row: int, colmin: int = 1, colmax: Optional[int] = None): +def unprotect_row(sheet, row, colmin: int, colmax: Optional[int] = None): if colmax is None: colmax = max(colmin, MAXCOL, sheet.max_column) - for col in range(colmin, colmax): - protect_and_shade_given_cell(sheet, row, col) + for col in range(colmin, colmax + 1): + unprotect_cell(sheet, row, col) -def protect_and_shade_col(sheet, col: int, rowmin: int, rowmax: int): +def unprotect_given_col(sheet, col: int, rowmin: int, rowmax: int): for row in range(rowmin, rowmax): - protect_and_shade_given_cell(sheet, row, col) + unprotect_cell(sheet, row, col) -def unprotect_cell(sheet, row, column): - sheet.cell(row=row, column=column).protection = Protection(locked=False) +def protect_and_shade_given_cell(sheet, row: int, col: int): + grey_fill = PatternFill(start_color="DDDDDD", end_color="DDDDDD", fill_type="solid") + sheet.cell(row=row, column=col).fill = grey_fill + sheet.cell(row=row, column=col).protection = Protection(locked=True) -def unprotect_row(sheet, row, colmin: int, colmax: Optional[int] = None): +def protect_and_shade_row(sheet, row: int, colmin: int = 1, colmax: Optional[int] = None): if colmax is None: colmax = max(colmin, MAXCOL, sheet.max_column) - for col in range(colmin, colmax + 1): - unprotect_cell(sheet, row, col) + for col in range(colmin, colmax): + protect_and_shade_given_cell(sheet, row, col) -def unprotect_given_col(sheet, col: int, rowmin: int, rowmax: int): +def protect_and_shade_col(sheet, col: int, rowmin: int, rowmax: int): for row in range(rowmin, rowmax): - unprotect_cell(sheet, row, col) + protect_and_shade_given_cell(sheet, row, col) -def open_or_create_workbook(doc_filepath): - if os.path.exists(doc_filepath): - workbook = load_workbook(doc_filepath) - else: - workbook = Workbook() - # Remove the default sheet created by Workbook() - if len(workbook.sheetnames) == 1 and workbook.sheetnames[0] == "Sheet": - del workbook["Sheet"] - return workbook +def shade_locked_cells(worksheet: Worksheet): + """ + Shades every cell grey if it is locked and leaves it unshaded if it is not locked. + Args: + worksheet (Worksheet): The openPyxl Worksheet from an Excel file. + sheet_name (str): The name of the sheet to apply the shading. + """ + # Define the grey fill + grey_fill = PatternFill(start_color="DDDDDD", end_color="DDDDDD", fill_type="solid") -def shade_30_rows_and_protect_sheet(workbook: Workbook, sheet_name: str, startrow: int): - """For use after all data is written so there is a clear border around the data""" - ws = workbook[sheet_name] - for r in range(startrow, startrow + 30): - protect_and_shade_row(ws, r) - ws.protection = SheetProtection( - sheet=True, - formatCells=False, - formatColumns=False, - formatRows=False, - insertColumns=False, - insertRows=True, - insertHyperlinks=False, - deleteColumns=False, - deleteRows=True, - selectLockedCells=False, - selectUnlockedCells=False, - ) + # Iterate through each cell in the worksheet + for row in worksheet.iter_rows(): + for cell in row: + if cell.protection.locked: + cell.fill = grey_fill + else: + cell.fill = PatternFill() # Remove any fill (reset to default) -def correct_column_widths(workbook: Workbook, sheet_name: str): +def correct_column_widths(worksheet: Worksheet): """ Adjusts the column widths of an Excel sheet based on the maximum length of the content in each column. If a column has no filled values, its width remains unchanged. @@ -94,9 +88,8 @@ def correct_column_widths(workbook: Workbook, sheet_name: str): sheet_name (str): The name of the sheet to adjust column widths for. """ # Load the existing workbook - ws = workbook[sheet_name] # Adjust column widths based on the maximum length of the content in each column - for col in ws.columns: + for col in worksheet.columns: max_length = 0 column = col[0].column_letter # Get the column letter for cell in col: @@ -109,136 +102,26 @@ def correct_column_widths(workbook: Workbook, sheet_name: str): if max_length > 0: # Only adjust if there are filled values in the column max_length = max(min(max_length, 28), 11) adjusted_width = max_length + 2 - ws.column_dimensions[column].width = adjusted_width - - -def shade_locked_cells(workbook: Workbook, sheet_name: str): - """ - Shades every cell grey if it is locked and leaves it unshaded if it is not locked. - - Args: - workbook (Workbook): The openPyxl Workbook of an Excel file. - sheet_name (str): The name of the sheet to apply the shading. - """ - # Load the existing workbook - ws = workbook[sheet_name] - - # Define the grey fill - grey_fill = PatternFill(start_color="DDDDDD", end_color="DDDDDD", fill_type="solid") - - # Iterate through each cell in the worksheet - for row in ws.iter_rows(): - for cell in row: - if cell.protection.locked: - cell.fill = grey_fill - else: - cell.fill = PatternFill() # Remove any fill (reset to default) - - -def write_to_cell( - filename: str, sheet_name: str, row: int, col: int, text: str, isBold=False, size=14, debug: bool = False -): - """ - Writes text to a specified cell in the Excel file. - - Args: - filename (str): The path to the Excel file. - sheet_name (str): The name of the sheet. - row_num (int): The row number (1-based index). - col_num (int): The column number (1-based index). - text (str): The text to write to the cell. - """ - # Load the existing workbook or create a new one if it doesn't exist - try: - wb = load_workbook(filename) - except FileNotFoundError: - wb = Workbook() - - # Select the worksheet by name - ws = wb[sheet_name] - - # Write text to the specified cell - cell = ws.cell(row=row, column=col, value=text) - cell.font = Font(bold=isBold, size=size) - - protect_and_shade_row(ws, row=row, colmin=col) - - # Save the workbook - wb.save(filename) - - return row + 1 - - -def create_sheet_and_write_title( - doc_filepath: str, - sheet_name: str, - sheet_title: Optional[str] = None, - sheet_number: int = 0, - protect_title: bool = True, - debug=False, -): - """ - In the given excel document, creates a new sheet called sheet_name and in the top left cell - writes in sheet_title in bold. - - It will create the excel document at doc_filepath if it does not already exist. - - The new sheet will be inserted at the specified sheet_number position. If sheet_number is - greater than the total number of sheets, the new sheet will be added at the end. - - Args: - doc_filepath (str): The path to the Excel document. - sheet_name (str): The name of the new sheet to create. - sheet_title (str): The title to write in the top left cell of the new sheet, in bold. - sheet_number (int): The position to insert the new sheet (0-indexed). If greater than the - total number of sheets, the new sheet will be added at the end. - - Returns: - int: index of next row below the final written row - - Raises: - ValueError: if a sheet called sheet_name already exists in the document to prevent overwriting. - """ - # Check if the file exists - workbook = open_or_create_workbook(doc_filepath) - - # Check if the sheet already exists - if sheet_name in workbook.sheetnames: - raise ValueError(f"A sheet called '{sheet_name}' already exists in the document.") + worksheet.column_dimensions[column].width = adjusted_width - # Create a new sheet - new_sheet = workbook.create_sheet(title=sheet_name) - - if sheet_title is not None: - # Write the title in bold in the top left cell (A1) - bold_font = Font(bold=True, size=14) - new_sheet["A1"] = sheet_title.replace("_", " ") - new_sheet["A1"].font = bold_font - - # Shade the background of the cells in the first 2 rows grey and lock them - # for row in range(1, 3): - # protect_and_shade_row(new_sheet, row) - if protect_title: - protect_and_shade_given_cell(new_sheet, 1, 1) - else: - unprotect_cell(new_sheet, 1, 1) - protect_and_shade_row(new_sheet, 1, 2) - protect_and_shade_row(new_sheet, 2) - # Determine the position to insert the new sheet - total_sheets = len(workbook.sheetnames) - insert_position = min(sheet_number, total_sheets) - - # Move the new sheet to the specified position - workbook._sheets.insert(insert_position, workbook._sheets.pop()) - - # Save the workbook - workbook.save(doc_filepath) - - if sheet_title is not None: - return 3 - else: - return 0 +def shade_30_rows_and_protect_sheet(worksheet: Worksheet, startrow: int): + """For use after all data is written so there is a clear border around the data""" + for r in range(startrow, startrow + 30): + protect_and_shade_row(worksheet, r) + worksheet.protection = SheetProtection( + sheet=True, + formatCells=False, + formatColumns=False, + formatRows=False, + insertColumns=False, + insertRows=True, + insertHyperlinks=False, + deleteColumns=False, + deleteRows=True, + selectLockedCells=False, + selectUnlockedCells=False, + ) def replace_row_with_multiple_rows(original_df, new_df, row_to_replace): @@ -264,8 +147,7 @@ def replace_row_with_multiple_rows(original_df, new_df, row_to_replace): def pydantic_to_dataframe( - ob: Union[BaseModel, Dict, List[Dict]], - annotations: Optional[Dict[str, typing._UnionGenericAlias]] = None, + ob: Union[BaseModel, List[BaseModel]], debug: bool = False, ) -> Tuple[pd.DataFrame, List[int]]: """ @@ -275,323 +157,316 @@ def pydantic_to_dataframe( Returns the dataframe and also a list of the indexs (denoted by zero-based numbers) that are of list types. The list of indexs is intended to be used for appropriately shading the excel sheet. """ - if isinstance(ob, BaseModel): - ob_dict = ob.model_dump(mode="json") + if isinstance(ob, list): + ob_dict = [elem.model_dump() for elem in ob] + annotations = {k: v.annotation for k, v in ob[0].model_fields.items()} + is_list_of_objects = True else: - ob_dict = ob - try: - df = pd.json_normalize(ob_dict).T - except NotImplementedError: - raise NotImplementedError(ob) + ob_dict = ob.model_dump() + annotations = {k: v.annotation for k, v in ob.model_fields.items()} + is_list_of_objects = False + df = pd.json_normalize(ob_dict).T if debug: print("pydantic_to_dataframe") print(df) + + # handle dictionaries + # for idx, field in ob_dict.items(): + # if annotation_contains_dict(annotations[idx]): + for fieldname, anno in annotations.items(): + if annotation_contains_dict(anno): + if debug: + print("Found a dictionary") + if is_list_of_objects: + continue + assert_dict_annotation_is_strings_or_any(anno) + field = ob_dict[fieldname] + if field is None or len(field) == 0: + dict_df = pd.DataFrame(["", ""], index=["key", "value"]) + else: + dict_df = pd.DataFrame([field.keys(), field.values()], index=["key", "value"]) + dict_df.index = dict_df.index.map(lambda x: f"{fieldname}.{x}") + df = df[~df.index.str.startswith(f"{fieldname}.")] + df = df[df.index != fieldname] + df = pd.concat([df, dict_df]) + + i = 0 list_indices = [] - if isinstance(ob, list): - list_indices = list(range(len(df))) - else: - for idx, _ in ob_dict.items(): - if annotations is not None and annotation_contains_dict(annotations[idx]): - if debug: - print("Found a dictionary") - assert_dict_annotation_is_strings_or_any(annotations[idx]) - field = ob_dict[idx] - if field is None or len(field) == 0: - dict_df = pd.DataFrame(["", ""], index=["key", "value"]) - else: - dict_df = pd.DataFrame([field.keys(), field.values()], index=["key", "value"]) - dict_df.index = dict_df.index.map(lambda x: f"{idx}.{x}") - df = df[~df.index.str.startswith(f"{idx}.")] - df = df[df.index != idx] - df = pd.concat([df, dict_df]) - i = 0 - for idx in df.index: + enums = {} + for idx in df.index: + if debug: + print(f"pydantic_to_dataframe::172 idx = {idx}, df = {df}") + vals = df.loc[idx] # [0] + if debug: + print(f"vals: {vals}") + print(f'idx.split(".")[0]: {idx.split(".")[0]}') + print(f'annotations[idx.split(".")[0]]: {annotations[idx.split(".")[0]]}') + # field = ob_dict[idx.split(".")[0]] + + if annotation_contains_list(annotations[idx.split(".")[0]]) or annotation_contains_dict( + annotations[idx.split(".")[0]] + ): + if annotation_contains_list(annotations[idx.split(".")[0]]): + subtype = get_subtype_of_optional_or_list(annotations[idx.split(".")[0]]) + else: + subtype = dict if debug: - print(f"pydantic_to_dataframe::283 idx = {idx}, df = {df}") - vals = df.loc[idx][0] - field = ob_dict[idx.split(".")[0]] - - if ( - isinstance(vals, list) - or (annotations is not None and annotation_contains_list(annotations[idx.split(".")[0]])) - or (annotations is not None and annotation_contains_dict(annotations[idx.split(".")[0]])) - ): # (hasattr(ob, "annotation") and annotation_contains_list(ob.annotation)): - if vals is not None and len(vals) > 0 and (isinstance(vals[0], BaseModel) or isinstance(vals[0], Dict)): - if debug: - print("list of base models", vals[0]) - sub = pd.json_normalize(df.loc[idx].values[0]).reset_index(drop=True).T - sub.index = sub.index.map(lambda x: f"{idx}." + x) - df = replace_row_with_multiple_rows(df, sub, idx) - list_indices += list(range(i, i + len(sub))) - i += len(sub) - else: - if debug: - print("list of builtins or else empty") - df = replace_row_with_multiple_rows( - df, df.loc[idx].explode().to_frame().reset_index(drop=True).T, idx - ) - list_indices.append(i) - i += 1 + print("subtype = ", subtype) + print("isinstance(subtype, BaseModel)", isinstance(subtype, type(BaseModel))) + print("isinstance(subtype, dict)", isinstance(subtype, dict)) + if is_list_of_objects: + if debug: + print("list of lists") + list_indices.append(i) + i += 1 + elif isinstance(subtype, type(BaseModel)) or isinstance(subtype, dict): + if debug: + print("list of base models", vals) + sub = pd.json_normalize(df.loc[idx].values[0]).reset_index(drop=True).T + sub.index = sub.index.map(lambda x: f"{idx}." + x) + df = replace_row_with_multiple_rows(df, sub, idx) + list_indices += list(range(i, i + len(sub))) + i += len(sub) else: + if debug: + print("list of builtins or else empty") + df = replace_row_with_multiple_rows(df, df.loc[idx].explode().to_frame().reset_index(drop=True).T, idx) + list_indices.append(i) i += 1 + else: + if isinstance(annotations[idx.split(".")[0]], type(Enum)): + dropdown_options = [e.value for e in annotations[idx.split(".")[0]]] + dropdown = DataValidation( + type="list", + formula1=f'"{",".join(dropdown_options)}"', + showDropDown=False, + allow_blank=True, + showErrorMessage=True, + ) + enums[i] = dropdown + i += 1 if debug: print(df) if len(df): df.index = df.index.str.split(".", expand=True) - return df, list_indices - - -def write_simple_pydantic_to_sheet( - doc_filepath: str, - sheet_name: str, - ob: BaseModel, - startrow: int, - index_above=False, - write_title=True, - title: Optional[str] = None, - annotations=None, - debug: bool = False, -): - """ - Assumes a pydantic object made up of built in types or pydantic objects utimately made of built in types or Lists. - Do not use if the object or it's children contain Dicts or enums. - - Starting from startrow, it writes the name of the pydantic object in the first column. It then writes the data - starting in the row below and from the second column. - - If index_above = False then the data is printed with indexs down the second column and values down the third column - If index_above = True then the data is printed with indexs along the second row and values along the third row - - Example: - - class Simple(BaseModel): - a: str - b: str - - example = Simple(a="value_a", b="value_b") - # with index_above=True - write_simple_pydantic_to_sheet("filename", "sheetname", example, startrow=1, index_above=True) - - gives: - - Simple - a b - value_a value_b + if is_list_of_objects: + list_indices = list(range(len(df))) + return df, list_indices, enums - # with index_above=False - write_simple_pydantic_to_sheet("filename", "sheetname", example, startrow=1, index_above=False) - gives: +def stringify_enum(elem): + if isinstance(elem, Enum): + return str(elem.value) + else: + raise TypeError(f"{elem} is not an enum") - Simple - a value_a - b value_b +def stringify_cell_element(elem): + if isinstance(elem, list): + return json.dumps(elem, default=stringify_enum) + elif isinstance(elem, Enum): + return str(elem.value) + elif isinstance(elem, dict): + return json.dumps(elem, default=stringify_enum) + else: + return elem - Args: - doc_filepath (str): The path to the Excel document. - sheet_name (str): The name of the new sheet to create. - ob (BaseModel): a pydantic class - startrow (int): the row from which to start writing the data - index_above (bool): if True then the index is written along a row with the data below, if False then the data is - written in a column with the data to the right. Default is False - Returns: - int: index of next row below the final written row - """ - if write_title: - if title is None: - title = ob.model_json_schema()["title"] - if startrow == 1: - title = title.replace("_", " ") - size = 14 - else: - size = 12 - startrow = write_to_cell(doc_filepath, sheet_name, startrow, 1, title, isBold=True, size=size, debug=debug) - startcol = 2 - - df, list_rows = pydantic_to_dataframe(ob=ob, annotations=annotations, debug=debug) - index_levels = df.index.nlevels - # if index_above and index_levels > 1: - # warnings.warn( - # "Setting index_above=True is incompatible with a hierarchical index. Setting index_above to False.", - # UserWarning, - # ) - # index_above = False - - # if index_above: - # df = df.T - - # Annoyingly, openpyxl uses 1 based indexing but - # But pandas uses 0 based indexing. - - with pd.ExcelWriter(doc_filepath, mode="a", if_sheet_exists="overlay") as writer: - df.to_excel( - writer, - sheet_name=sheet_name, - header=index_above, - index=not index_above, - startrow=startrow - 1, - startcol=startcol - 1, - merge_cells=True, - ) +def write_pydantic_to_excel(ws, ob, row_number, debug=False): + df, list_rows, enums = pydantic_to_dataframe(ob, debug=debug) + list_rows_tracker = {} + list_of_enums_tracker = {} + for i, r in enumerate(dataframe_to_rows(df, index=True, header=False)): + if debug: + print(r) + if all(map(lambda x: x is None, r)): + continue + r = [stringify_cell_element(val) for val in r] + # r = [str(val) if isinstance(val, list) else str(val.value) if isinstance(val, Enum) else val for val in r ] + r = [""] + r + if debug: + print("about to append", r) + ws.append(r) + for col in range(2, df.index.nlevels + 2): + cell = ws.cell(row=row_number, column=col) + cell.font = Font(bold=True) + cell.border = Border( + top=Side(border_style=None), + left=Side(border_style="thin"), + right=Side(border_style="thin"), + bottom=Side(border_style=None), + ) + if cell.value is not None and cell.value != "": + if debug: + print("turning on some borders") + border_copy = copy.copy(cell.border) + border_copy.top = Side(border_style="thin") + cell.border = border_copy + min_unprotected_cell = df.index.nlevels + 2 + max_unprotected_cell = None if i - 1 in list_rows else min_unprotected_cell + unprotect_row(ws, row_number, colmin=min_unprotected_cell, colmax=max_unprotected_cell) + if i - 1 in enums: + dropdown = enums[i - 1] + ws.add_data_validation(dropdown) + for j in range( + min_unprotected_cell, ws.max_column if max_unprotected_cell is None else min_unprotected_cell + 1 + ): + dropdown.add(ws.cell(row_number, j)) + if max_unprotected_cell is None: + list_rows_tracker[row_number] = ws.max_column + if i - 1 in enums: + list_of_enums_tracker[row_number] = dropdown + row_number += 1 + + for col in range(2, df.index.nlevels + 2): + cell = ws.cell(row=row_number, column=col) + border_copy = copy.copy(cell.border) + border_copy.top = Side(border_style="thin") + cell.border = border_copy + + return row_number + 1, list_rows_tracker, list_of_enums_tracker + + +def write_title_and_version_info( + ws: Worksheet, sheet_title: Optional[str], version: Optional[str], protect_title=True +) -> int: + if sheet_title is None: + return 1 + if sheet_title is not None: + sheet_title = sheet_title.replace("_", " ") + ws.append([sheet_title, None, version]) - # Open the Excel file with openpyxl - workbook = load_workbook(doc_filepath) - sheet = workbook[sheet_name] - - # Get the DataFrame dimensions - rows, cols = df.shape - - # if index_above: - # protect_and_shade_row(sheet, startrow) - # for c in range(startcol, cols + startcol): - # cell = sheet.cell(startrow, c) - # cell.font = Font(bold=False) - # for r in range(startrow + 1, startrow + rows + 1): - # unprotect_row(sheet, r, startcol, colmax=startcol + cols) - # protect_and_shade_row(sheet, r, colmin=startcol + cols) - # next_row = startrow + rows + 2 - # else: - for col in range(startcol, startcol + index_levels): - protect_and_shade_col(sheet, col, startrow, startrow + rows) - for r in range(startrow, startrow + rows): - cell = sheet.cell(r, col) - cell.font = Font(bold=False) - firstdatacol = startcol + index_levels - for i, r in enumerate(range(startrow, startrow + rows)): - if i in list_rows: - unprotect_row(sheet, r, firstdatacol) - else: - unprotect_row(sheet, r, firstdatacol, colmax=firstdatacol + 1) - protect_and_shade_row(sheet, r, colmin=firstdatacol + 1) - next_row = startrow + rows + if sheet_title is not None: + bold_font = Font(bold=True, size=14) + ws["A1"].font = bold_font + if protect_title == False: + unprotect_row(ws, 1, colmin=1, colmax=1) - sheet.protection.enable() - # Save the workbook - workbook.save(doc_filepath) + if version is not None: + version_font = Font(name="Consolas", size=9) + ws["C1"].font = version_font - return next_row + ws.append([]) + return 3 -def write_nested_simple_pydantic_to_sheet( - doc_filepath: str, sheet_name: str, ob: BaseModel, startrow: int, index_above=False, debug=False -): - """ - Assumes the pydantic object is made up only of other pydantic objects that are themselves made up only of built in types - """ - if debug: - print(ob) +def write_pydantic_to_sheet(worksheet: Worksheet, ob: BaseModel, current_row: int, debug: bool = False) -> int: children = seperate_simple_from_pydantic(ob) if debug: - print(children["simple"]) + print("Children:") + print(children) + list_rows = {} + enum_list_rows = {} + if len(children["simple"]): child_object = subset_pydantic_model(ob, children["simple"]) - startrow = write_simple_pydantic_to_sheet( - doc_filepath, - sheet_name, - child_object, - startrow, - index_above=False, - write_title=False, - annotations={k: v.annotation for k, v in child_object.model_fields.items()}, - debug=debug, + current_row, sub_list_rows, sub_list_enums = write_pydantic_to_excel( + ws=worksheet, ob=child_object, row_number=current_row ) - if debug: - print("Done with simple children, now nesting pydantic objects") + list_rows.update(sub_list_rows) + enum_list_rows.update(sub_list_enums) + for mfield in children["pydantic"]: - field = ob.model_dump(mode="json")[mfield] - if debug: - print(f"write_nested_simple_pydantic_to_sheet::428, field={field}") - startrow = write_simple_pydantic_to_sheet( - doc_filepath, sheet_name, field, startrow, index_above=index_above, title=mfield, debug=debug + worksheet.append([mfield]) + worksheet.cell(row=current_row, column=1).font = Font(bold=True, size=12) + current_row += 1 + child_object = getattr(ob, mfield) + current_row, sub_list_rows, sub_list_enums = write_pydantic_to_excel( + ws=worksheet, ob=child_object, row_number=current_row ) + list_rows.update(sub_list_rows) + enum_list_rows.update(sub_list_enums) - return startrow + for row, col in list_rows.items(): + unprotect_row(worksheet, row, colmin=col, colmax=None) + if row in enum_list_rows: + dropdown = enum_list_rows[row] + for j in range(col, worksheet.max_column): + dropdown.add(worksheet.cell(row, j)) + return current_row -def write_metadata_type_and_version(doc_filepath: str, metadata_type: str): - wb = open_or_create_workbook(doc_filepath) - sheet = wb["metadata"] +def open_or_create_workbook(doc_filepath): + if os.path.exists(doc_filepath): + workbook = load_workbook(doc_filepath) + else: + workbook = Workbook() + # Remove the default sheet created by Workbook() + if len(workbook.sheetnames) == 1 and workbook.sheetnames[0] == "Sheet": + del workbook["Sheet"] + return workbook - sheet["C1"] = f"{metadata_type} type metadata version 20240809.1" - version_font = Font(name="Consolas", size=9) - sheet["C1"].font = version_font +def create_sheet(workbook, sheetname, sheet_number): + # Check if the sheet already exists + if sheetname in workbook.sheetnames: + raise ValueError(f"A sheet called '{sheetname}' already exists in the document.") - wb.save(doc_filepath) + # Create a new sheet + new_sheet = workbook.create_sheet(title=sheetname) + + # Determine the position to insert the new sheet + total_sheets = len(workbook.sheetnames) + insert_position = min(sheet_number, total_sheets) + + # Move the new sheet to the specified position + workbook._sheets.insert(insert_position, workbook._sheets.pop()) + return new_sheet def write_to_single_sheet( - doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, debug=False + doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, verbose=False ): - if title is None: - title = "Metadata" - sheet_name = "metadata" - current_row = create_sheet_and_write_title( - doc_filepath, sheet_name, title, sheet_number=0, protect_title=False, debug=debug - ) - write_metadata_type_and_version(doc_filepath=doc_filepath, metadata_type=metadata_type) - current_row = write_nested_simple_pydantic_to_sheet(doc_filepath, sheet_name, ob, current_row + 1) - workbook = open_or_create_workbook(doc_filepath) - correct_column_widths(workbook, sheet_name=sheet_name) - shade_30_rows_and_protect_sheet(workbook, sheet_name, current_row + 1) - shade_locked_cells(workbook, sheet_name) - workbook.save(doc_filepath) + model_default_name = ob.model_json_schema()["title"] + wb = open_or_create_workbook(doc_filepath) + ws = create_sheet(wb, "metadata", sheet_number=0) + version = f"{metadata_type} type metadata version 20240812.1" + current_row = write_title_and_version_info(ws, title, version, protect_title=False) + current_row = write_pydantic_to_sheet(ws, ob, current_row, debug=verbose) + correct_column_widths(worksheet=ws) + shade_30_rows_and_protect_sheet(worksheet=ws, startrow=current_row) + shade_locked_cells(worksheet=ws) + wb.save(doc_filepath) def write_across_many_sheets( - doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, debug=False + doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, verbose=False ): + wb = open_or_create_workbook(doc_filepath) + ws = create_sheet(wb, "metadata", sheet_number=0) + version = f"{metadata_type} type metadata version 20240812.1" + current_row = write_title_and_version_info(ws, title, version, protect_title=False) + children = seperate_simple_from_pydantic(ob) - if debug: + if verbose: print(f"children: {children}") sheet_number = 0 - if len(children["simple"]): - if title is None: - title = "Metadata" - sheet_name = "metadata" - current_row = create_sheet_and_write_title( - doc_filepath, sheet_name, title, sheet_number=sheet_number, protect_title=False, debug=debug - ) - write_metadata_type_and_version(doc_filepath=doc_filepath, metadata_type=metadata_type) + if len(children["simple"]): child_object = subset_pydantic_model(ob, children["simple"]) - current_row = write_simple_pydantic_to_sheet( - doc_filepath, - sheet_name, - child_object, - current_row + 1, - index_above=False, - write_title=False, - annotations={k: v.annotation for k, v in child_object.model_fields.items()}, - debug=debug, - ) - workbook = open_or_create_workbook(doc_filepath) - correct_column_widths(workbook, sheet_name=sheet_name) - shade_30_rows_and_protect_sheet(workbook, sheet_name, current_row + 1) - shade_locked_cells(workbook, sheet_name) - workbook.save(doc_filepath) - sheet_number += 1 + current_row = write_pydantic_to_sheet(ws, child_object, current_row, debug=verbose) + correct_column_widths(worksheet=ws) + shade_30_rows_and_protect_sheet(worksheet=ws, startrow=current_row) + shade_locked_cells(worksheet=ws) + sheet_number += 1 for fieldname in children["pydantic"]: - if debug: + if verbose: print(f"\n\n{fieldname}\n") - field = getattr(ob, fieldname) - if not isinstance(field, BaseModel): - field = subset_pydantic_model(ob, [fieldname], name=fieldname) + child_object = getattr(ob, fieldname) + if verbose: + print(child_object) + ws = create_sheet(wb, fieldname, sheet_number=sheet_number) + if not isinstance(child_object, BaseModel): + child_object = subset_pydantic_model(ob, [fieldname], name=fieldname) sheet_title = None else: sheet_title = fieldname - current_row = create_sheet_and_write_title( - doc_filepath, fieldname, sheet_title, sheet_number=sheet_number, protect_title=True, debug=debug - ) - - current_row = write_nested_simple_pydantic_to_sheet(doc_filepath, fieldname, field, current_row + 1) - workbook = open_or_create_workbook(doc_filepath) - correct_column_widths(workbook, sheet_name=fieldname) - shade_30_rows_and_protect_sheet(workbook, fieldname, current_row + 1) - shade_locked_cells(workbook, fieldname) - workbook.save(doc_filepath) + current_row = write_title_and_version_info(ws, sheet_title, None, protect_title=True) + current_row = write_pydantic_to_sheet(ws, child_object, current_row, debug=verbose) + correct_column_widths(worksheet=ws) + shade_30_rows_and_protect_sheet(worksheet=ws, startrow=current_row) + shade_locked_cells(worksheet=ws) sheet_number += 1 + wb.save(doc_filepath)