diff --git a/README.md b/README.md index dc40ff6..772a715 100644 --- a/README.md +++ b/README.md @@ -86,12 +86,34 @@ survey_metadata.study_desc.title_statement.idno = "project_idno" ``` -## Updating Pydantic definitions and Excel sheets +## Updating Schemas -To update the pydantic schemas so that they match the latest json schemas run +First create a branch from the main branch. + +Then make the change you want to the json schema in the schemas folder. + +Then in pyproject.toml update the version number, changing either the major, minor or patch number as appropriate. + +Next update the pydantic schemas so that they match the latest json schemas by running `python pydantic_schemas/generators/generate_pydantic_schemas.py` -Then to update the Excel sheets run +Finally update the Excel sheets by running + + `python pydantic_schemas/generators/generate_excel_files.py` + +## Versioning conventions for schemas + +### Major Changes + +- field type changes that break convention and cannot be coerced such as a field moving from string to an array +- a mandatory field added or optional field changed to mandatory + +### Minor Changes + +- field removed +- optional field added + +### Patch Changes - `python pydantic_schemas/generators/generate_excel_files.py` \ No newline at end of file +- field type changes that can be coerced such as int to string \ No newline at end of file diff --git a/excel_sheets/Document_metadata.xlsx b/excel_sheets/Document_metadata.xlsx index d5237aa..f9ec93d 100644 Binary files a/excel_sheets/Document_metadata.xlsx and b/excel_sheets/Document_metadata.xlsx differ diff --git a/excel_sheets/Indicator_metadata.xlsx b/excel_sheets/Indicator_metadata.xlsx index 3fcbb68..1e8011d 100644 Binary files a/excel_sheets/Indicator_metadata.xlsx and b/excel_sheets/Indicator_metadata.xlsx differ diff --git a/excel_sheets/Indicators_db_metadata.xlsx b/excel_sheets/Indicators_db_metadata.xlsx index 9f2d6b8..83e6331 100644 Binary files a/excel_sheets/Indicators_db_metadata.xlsx and b/excel_sheets/Indicators_db_metadata.xlsx differ diff --git a/excel_sheets/Microdata_metadata.xlsx b/excel_sheets/Microdata_metadata.xlsx index 173a2fd..6f1691e 100644 Binary files a/excel_sheets/Microdata_metadata.xlsx and b/excel_sheets/Microdata_metadata.xlsx differ diff --git a/excel_sheets/Resource_metadata.xlsx b/excel_sheets/Resource_metadata.xlsx index 0aa789a..1524740 100644 Binary files a/excel_sheets/Resource_metadata.xlsx and b/excel_sheets/Resource_metadata.xlsx differ diff --git a/excel_sheets/Script_metadata.xlsx b/excel_sheets/Script_metadata.xlsx index 6029541..9038bdc 100644 Binary files a/excel_sheets/Script_metadata.xlsx and b/excel_sheets/Script_metadata.xlsx differ diff --git a/excel_sheets/Table_metadata.xlsx b/excel_sheets/Table_metadata.xlsx index 04da647..422a2cc 100644 Binary files a/excel_sheets/Table_metadata.xlsx and b/excel_sheets/Table_metadata.xlsx differ diff --git a/excel_sheets/Video_metadata.xlsx b/excel_sheets/Video_metadata.xlsx index 6a85ec7..6fc08f9 100644 Binary files a/excel_sheets/Video_metadata.xlsx and b/excel_sheets/Video_metadata.xlsx differ diff --git a/pydantic_schemas/document_schema.py b/pydantic_schemas/document_schema.py index ba3c89f..e15b6ea 100644 --- a/pydantic_schemas/document_schema.py +++ b/pydantic_schemas/document_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: document-schema.json -# timestamp: 2024-09-13T19:00:20+00:00 from __future__ import annotations diff --git a/pydantic_schemas/generators/__init__.py b/pydantic_schemas/generators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pydantic_schemas/generators/generate_excel_files.py b/pydantic_schemas/generators/generate_excel_files.py index 00da0dd..d1acbfb 100644 --- a/pydantic_schemas/generators/generate_excel_files.py +++ b/pydantic_schemas/generators/generate_excel_files.py @@ -1,7 +1,75 @@ import os +import openpyxl + from pydantic_schemas.metadata_manager import MetadataManager + +def compare_excel_files(file1, file2): + # Load the workbooks + wb1 = openpyxl.load_workbook(file1) + wb2 = openpyxl.load_workbook(file2) + + # Get all sheet names + sheets1 = wb1.sheetnames + sheets2 = wb2.sheetnames + + # Check if both workbooks have the same sheets + if sheets1 != sheets2: + print("Sheet names do not match") + print(f"File1 sheets: {sheets1}") + print(f"File2 sheets: {sheets2}") + return False + + # Iterate through each sheet + for sheet_name in sheets1: + ws1 = wb1[sheet_name] + ws2 = wb2[sheet_name] + + # Iterate through each cell in the sheet + for row in ws1.iter_rows(): + for cell in row: + cell_address = cell.coordinate + if sheet_name == "metadata" and cell_address == "C1": + continue # Skip comparison for cell C1 in 'metadata' sheet which only contains the versioning number + + differences = [] + if ws1[cell_address].value != ws2[cell_address].value: + differences.append(f"Value: {ws1[cell_address].value} != {ws2[cell_address].value}") + if ( + ws1[cell_address].font.name != ws2[cell_address].font.name + or ws1[cell_address].font.size != ws2[cell_address].font.size + or ws1[cell_address].font.bold != ws2[cell_address].font.bold + or ws1[cell_address].font.italic != ws2[cell_address].font.italic + ): + differences.append(f"Font: {ws1[cell_address].font} != {ws2[cell_address].font}") + if ( + ws1[cell_address].fill.start_color.index != ws2[cell_address].fill.start_color.index + or ws1[cell_address].fill.end_color.index != ws2[cell_address].fill.end_color.index + ): + differences.append(f"Fill: {ws1[cell_address].fill} != {ws2[cell_address].fill}") + if ( + ws1[cell_address].border.left.style != ws2[cell_address].border.left.style + or ws1[cell_address].border.right.style != ws2[cell_address].border.right.style + or ws1[cell_address].border.top.style != ws2[cell_address].border.top.style + or ws1[cell_address].border.bottom.style != ws2[cell_address].border.bottom.style + ): + differences.append(f"Border: {ws1[cell_address].border} != {ws2[cell_address].border}") + if ( + ws1[cell_address].alignment.horizontal != ws2[cell_address].alignment.horizontal + or ws1[cell_address].alignment.vertical != ws2[cell_address].alignment.vertical + ): + differences.append(f"Alignment: {ws1[cell_address].alignment} != {ws2[cell_address].alignment}") + + if differences: + print(f"Differences found at {sheet_name} {cell_address}:") + for difference in differences: + print(f" - {difference}") + return False + + return True + + metadata_manager = MetadataManager() for metadata_name in metadata_manager.metadata_type_names: @@ -10,5 +78,16 @@ filename = f"excel_sheets/{metadata_name.capitalize()}_metadata.xlsx" print(f"Writing {metadata_name} outline to {filename}") if os.path.exists(filename): - os.remove(filename) - metadata_manager.write_metadata_outline_to_excel(metadata_name_or_class=metadata_name, filename=filename) + filename2 = f"excel_sheets/{metadata_name.capitalize()}_metadata2.xlsx" + metadata_manager.write_metadata_outline_to_excel(metadata_name_or_class=metadata_name, filename=filename2) + are_identical = compare_excel_files(filename, filename2) + if are_identical: + print("they're the same") + os.remove(filename2) + else: + print("updating") + os.remove(filename) + os.rename(filename2, filename) + else: + metadata_manager.write_metadata_outline_to_excel(metadata_name_or_class=metadata_name, filename=filename) + print() diff --git a/pydantic_schemas/generators/generate_pydantic_schemas.py b/pydantic_schemas/generators/generate_pydantic_schemas.py index 6344806..a04344c 100644 --- a/pydantic_schemas/generators/generate_pydantic_schemas.py +++ b/pydantic_schemas/generators/generate_pydantic_schemas.py @@ -5,18 +5,6 @@ OUTPUT_DIR = os.path.join("pydantic_schemas") PYTHON_VERSION = "3.11" BASE_CLASS = ".utils.schema_base_model.SchemaBaseModel" -# INPUTS = [ -# "document-schema.json", -# "geospatial-schema.json", -# "image-schema.json", -# "microdata-schema.json", -# "resource-schema.json", -# "script-schema.json", -# "table-schema.json", -# "timeseries-db-schema.json", -# "timeseries-schema.json", -# "video-schema.json", -# ] INPUTS_TO_OUTPUTS = { "document-schema.json": "document_schema.py", @@ -38,7 +26,6 @@ for input_file, output_file in INPUTS_TO_OUTPUTS.items(): print(f"Generating pydantic schema for {input_file}") input_path = os.path.join(SCHEMA_DIR, input_file) - # output_file = os.path.splitext(input_file)[0] + ".py" output_path = os.path.join(OUTPUT_DIR, output_file).replace("-", "_") run( [ @@ -54,6 +41,7 @@ "--use-double-quotes", "--wrap-string-literal", "--collapse-root-models", + "--disable-timestamp", "--base-class", BASE_CLASS, "--output", diff --git a/pydantic_schemas/geospatial_schema.py b/pydantic_schemas/geospatial_schema.py index dbe46aa..3678503 100644 --- a/pydantic_schemas/geospatial_schema.py +++ b/pydantic_schemas/geospatial_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: geospatial-schema.json -# timestamp: 2024-09-13T19:00:22+00:00 from __future__ import annotations diff --git a/pydantic_schemas/image_schema.py b/pydantic_schemas/image_schema.py index 24c7168..a586e22 100644 --- a/pydantic_schemas/image_schema.py +++ b/pydantic_schemas/image_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: image-schema.json -# timestamp: 2024-09-13T19:00:23+00:00 from __future__ import annotations diff --git a/pydantic_schemas/indicator_schema.py b/pydantic_schemas/indicator_schema.py index 7ce366b..8a0b79f 100644 --- a/pydantic_schemas/indicator_schema.py +++ b/pydantic_schemas/indicator_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: timeseries-schema.json -# timestamp: 2024-09-13T19:00:32+00:00 from __future__ import annotations diff --git a/pydantic_schemas/indicators_db_schema.py b/pydantic_schemas/indicators_db_schema.py index 67b860b..c796872 100644 --- a/pydantic_schemas/indicators_db_schema.py +++ b/pydantic_schemas/indicators_db_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: timeseries-db-schema.json -# timestamp: 2024-09-13T19:00:30+00:00 from __future__ import annotations diff --git a/pydantic_schemas/microdata_schema.py b/pydantic_schemas/microdata_schema.py index 2c94423..036a238 100644 --- a/pydantic_schemas/microdata_schema.py +++ b/pydantic_schemas/microdata_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: microdata-schema.json -# timestamp: 2024-09-13T19:00:25+00:00 from __future__ import annotations diff --git a/pydantic_schemas/resource_schema.py b/pydantic_schemas/resource_schema.py index d72de0d..beaf096 100644 --- a/pydantic_schemas/resource_schema.py +++ b/pydantic_schemas/resource_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: resource-schema.json -# timestamp: 2024-09-13T19:00:26+00:00 from __future__ import annotations diff --git a/pydantic_schemas/script_schema.py b/pydantic_schemas/script_schema.py index f785ffe..6af04b5 100644 --- a/pydantic_schemas/script_schema.py +++ b/pydantic_schemas/script_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: script-schema.json -# timestamp: 2024-09-13T19:00:27+00:00 from __future__ import annotations diff --git a/pydantic_schemas/table_schema.py b/pydantic_schemas/table_schema.py index 0795030..4ea8836 100644 --- a/pydantic_schemas/table_schema.py +++ b/pydantic_schemas/table_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: table-schema.json -# timestamp: 2024-09-13T19:00:29+00:00 from __future__ import annotations diff --git a/pydantic_schemas/utils/pydantic_to_excel.py b/pydantic_schemas/utils/pydantic_to_excel.py index f10e0c6..5f621be 100644 --- a/pydantic_schemas/utils/pydantic_to_excel.py +++ b/pydantic_schemas/utils/pydantic_to_excel.py @@ -1,9 +1,12 @@ import copy +import importlib.metadata import json import os from enum import Enum from typing import List, Optional, Tuple, Union +__version__ = importlib.metadata.version("metadataschemas") + import pandas as pd from openpyxl import Workbook, load_workbook from openpyxl.styles import Alignment, Border, Font, PatternFill, Protection, Side @@ -423,7 +426,7 @@ def write_to_single_sheet( title = model_default_name wb = open_or_create_workbook(doc_filepath) ws = create_sheet(wb, "metadata", sheet_number=0) - version = f"{metadata_type} type metadata version 20240812.1" + version = f"{metadata_type} type metadata version {__version__}" current_row = write_title_and_version_info(ws, title, version, protect_title=False) current_row = write_pydantic_to_sheet(ws, ob, current_row, debug=verbose) correct_column_widths(worksheet=ws) @@ -437,7 +440,7 @@ def write_across_many_sheets( ): wb = open_or_create_workbook(doc_filepath) ws = create_sheet(wb, "metadata", sheet_number=0) - version = f"{metadata_type} type metadata version 20240905.1" + version = f"{metadata_type} type metadata version {__version__}" current_row = write_title_and_version_info(ws, title, version, protect_title=False) children = seperate_simple_from_pydantic(ob) diff --git a/pydantic_schemas/video_schema.py b/pydantic_schemas/video_schema.py index 3b376ab..fc23af7 100644 --- a/pydantic_schemas/video_schema.py +++ b/pydantic_schemas/video_schema.py @@ -1,6 +1,5 @@ # generated by datamodel-codegen: # filename: video-schema.json -# timestamp: 2024-09-13T19:00:33+00:00 from __future__ import annotations