diff --git a/excel_sheets/Document_metadata.xlsx b/excel_sheets/Document_metadata.xlsx index 02e0417..4a357c7 100644 Binary files a/excel_sheets/Document_metadata.xlsx and b/excel_sheets/Document_metadata.xlsx differ diff --git a/excel_sheets/Timeseries_metadata.xlsx b/excel_sheets/Indicator_metadata.xlsx similarity index 88% rename from excel_sheets/Timeseries_metadata.xlsx rename to excel_sheets/Indicator_metadata.xlsx index c955f65..3131f10 100644 Binary files a/excel_sheets/Timeseries_metadata.xlsx and b/excel_sheets/Indicator_metadata.xlsx differ diff --git a/excel_sheets/Indicators_db_metadata.xlsx b/excel_sheets/Indicators_db_metadata.xlsx new file mode 100644 index 0000000..91d092f Binary files /dev/null and b/excel_sheets/Indicators_db_metadata.xlsx differ diff --git a/excel_sheets/Survey_metadata.xlsx b/excel_sheets/Microdata_metadata.xlsx similarity index 88% rename from excel_sheets/Survey_metadata.xlsx rename to excel_sheets/Microdata_metadata.xlsx index 0d0eae1..422ac66 100644 Binary files a/excel_sheets/Survey_metadata.xlsx and b/excel_sheets/Microdata_metadata.xlsx differ diff --git a/excel_sheets/Resource_metadata.xlsx b/excel_sheets/Resource_metadata.xlsx index 835494a..65bd3f3 100644 Binary files a/excel_sheets/Resource_metadata.xlsx and b/excel_sheets/Resource_metadata.xlsx differ diff --git a/excel_sheets/Script_metadata.xlsx b/excel_sheets/Script_metadata.xlsx index cff1cbd..a08a0c7 100644 Binary files a/excel_sheets/Script_metadata.xlsx and b/excel_sheets/Script_metadata.xlsx differ diff --git a/excel_sheets/Table_metadata.xlsx b/excel_sheets/Table_metadata.xlsx index 6b2a61c..d0e32ec 100644 Binary files a/excel_sheets/Table_metadata.xlsx and b/excel_sheets/Table_metadata.xlsx differ diff --git a/excel_sheets/Timeseries_db_metadata.xlsx b/excel_sheets/Timeseries_db_metadata.xlsx deleted file mode 100644 index e8e6eb9..0000000 Binary files a/excel_sheets/Timeseries_db_metadata.xlsx and /dev/null differ diff --git a/excel_sheets/Video_metadata.xlsx b/excel_sheets/Video_metadata.xlsx index 741d225..ebfc6a5 100644 Binary files a/excel_sheets/Video_metadata.xlsx and b/excel_sheets/Video_metadata.xlsx differ diff --git a/pydantic_schemas/document_schema.py b/pydantic_schemas/document_schema.py index 514d337..4a4dad3 100644 --- a/pydantic_schemas/document_schema.py +++ b/pydantic_schemas/document_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: document-schema.json -# timestamp: 2024-09-05T20:33:52+00:00 +# timestamp: 2024-09-13T18:34:42+00:00 from __future__ import annotations diff --git a/pydantic_schemas/generators/generate_pydantic_schemas.py b/pydantic_schemas/generators/generate_pydantic_schemas.py index cd58c5b..6344806 100644 --- a/pydantic_schemas/generators/generate_pydantic_schemas.py +++ b/pydantic_schemas/generators/generate_pydantic_schemas.py @@ -5,27 +5,40 @@ OUTPUT_DIR = os.path.join("pydantic_schemas") PYTHON_VERSION = "3.11" BASE_CLASS = ".utils.schema_base_model.SchemaBaseModel" -INPUTS = [ - "document-schema.json", - "geospatial-schema.json", - "image-schema.json", - "microdata-schema.json", - "resource-schema.json", - "script-schema.json", - "table-schema.json", - "timeseries-db-schema.json", - "timeseries-schema.json", - "video-schema.json", -] +# INPUTS = [ +# "document-schema.json", +# "geospatial-schema.json", +# "image-schema.json", +# "microdata-schema.json", +# "resource-schema.json", +# "script-schema.json", +# "table-schema.json", +# "timeseries-db-schema.json", +# "timeseries-schema.json", +# "video-schema.json", +# ] + +INPUTS_TO_OUTPUTS = { + "document-schema.json": "document_schema.py", + "geospatial-schema.json": "geospatial_schema.py", + "image-schema.json": "image_schema.py", + "microdata-schema.json": "microdata_schema.py", + "resource-schema.json": "resource_schema.py", + "script-schema.json": "script_schema.py", + "table-schema.json": "table_schema.py", + "timeseries-db-schema.json": "indicators_db_schema.py", + "timeseries-schema.json": "indicator_schema.py", + "video-schema.json": "video_schema.py", +} if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) -for input_file in INPUTS: +for input_file, output_file in INPUTS_TO_OUTPUTS.items(): print(f"Generating pydantic schema for {input_file}") input_path = os.path.join(SCHEMA_DIR, input_file) - output_file = os.path.splitext(input_file)[0] + ".py" + # output_file = os.path.splitext(input_file)[0] + ".py" output_path = os.path.join(OUTPUT_DIR, output_file).replace("-", "_") run( [ diff --git a/pydantic_schemas/geospatial_schema.py b/pydantic_schemas/geospatial_schema.py index 714f2bc..4aef392 100644 --- a/pydantic_schemas/geospatial_schema.py +++ b/pydantic_schemas/geospatial_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: geospatial-schema.json -# timestamp: 2024-09-05T20:33:54+00:00 +# timestamp: 2024-09-13T18:34:45+00:00 from __future__ import annotations diff --git a/pydantic_schemas/image_schema.py b/pydantic_schemas/image_schema.py index 53db665..84362e4 100644 --- a/pydantic_schemas/image_schema.py +++ b/pydantic_schemas/image_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: image-schema.json -# timestamp: 2024-09-05T20:33:56+00:00 +# timestamp: 2024-09-13T18:34:48+00:00 from __future__ import annotations diff --git a/pydantic_schemas/timeseries_schema.py b/pydantic_schemas/indicator_schema.py similarity index 99% rename from pydantic_schemas/timeseries_schema.py rename to pydantic_schemas/indicator_schema.py index bb7e452..247d78f 100644 --- a/pydantic_schemas/timeseries_schema.py +++ b/pydantic_schemas/indicator_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: timeseries-schema.json -# timestamp: 2024-09-05T20:34:07+00:00 +# timestamp: 2024-09-13T18:35:02+00:00 from __future__ import annotations diff --git a/pydantic_schemas/timeseries_db_schema.py b/pydantic_schemas/indicators_db_schema.py similarity index 99% rename from pydantic_schemas/timeseries_db_schema.py rename to pydantic_schemas/indicators_db_schema.py index 9d2ad49..a77eac9 100644 --- a/pydantic_schemas/timeseries_db_schema.py +++ b/pydantic_schemas/indicators_db_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: timeseries-db-schema.json -# timestamp: 2024-09-05T20:34:05+00:00 +# timestamp: 2024-09-13T18:35:00+00:00 from __future__ import annotations diff --git a/pydantic_schemas/metadata_manager.py b/pydantic_schemas/metadata_manager.py index 0cfe462..70f9c69 100644 --- a/pydantic_schemas/metadata_manager.py +++ b/pydantic_schemas/metadata_manager.py @@ -7,12 +7,12 @@ from . import ( # image_schema, document_schema, geospatial_schema, + indicator_schema, + indicators_db_schema, microdata_schema, resource_schema, script_schema, table_schema, - timeseries_db_schema, - timeseries_schema, video_schema, ) from .utils.excel_to_pydantic import excel_doc_to_pydantic, excel_single_sheet_to_pydantic @@ -24,7 +24,7 @@ class MetadataManager: """ Interface with Excel for creating, saving and updating metadata for various types: - documents, scripts, survey, table, timeseries, timeseries_db, video + document, indicator, indicators_db, microdata, resource, script, table, video Retrieve pydantic model definitions for each metadata type """ @@ -35,10 +35,10 @@ class MetadataManager: # "image":image_schema.ImageDataTypeSchema, "resource": resource_schema.Model, "script": script_schema.ResearchProjectSchemaDraft, - "survey": microdata_schema.MicrodataSchema, + "microdata": microdata_schema.MicrodataSchema, "table": table_schema.Model, - "timeseries": timeseries_schema.TimeseriesSchema, - "timeseries_db": timeseries_db_schema.TimeseriesDatabaseSchema, + "indicator": indicator_schema.TimeseriesSchema, + "indicators_db": indicators_db_schema.TimeseriesDatabaseSchema, "video": video_schema.Model, } @@ -48,10 +48,10 @@ class MetadataManager: # "image":, "resource": write_to_single_sheet, "script": write_across_many_sheets, - "survey": write_across_many_sheets, + "microdata": write_across_many_sheets, "table": write_across_many_sheets, - "timeseries": write_across_many_sheets, - "timeseries_db": write_to_single_sheet, # one sheet + "indicator": write_across_many_sheets, + "indicators_db": write_to_single_sheet, # one sheet "video": write_to_single_sheet, # one sheet } @@ -61,10 +61,10 @@ class MetadataManager: # "image":, "resource": excel_single_sheet_to_pydantic, "script": excel_doc_to_pydantic, - "survey": excel_doc_to_pydantic, + "microdata": excel_doc_to_pydantic, "table": excel_doc_to_pydantic, - "timeseries": excel_doc_to_pydantic, - "timeseries_db": excel_single_sheet_to_pydantic, # one sheet + "indicator": excel_doc_to_pydantic, + "indicators_db": excel_single_sheet_to_pydantic, # one sheet "video": excel_single_sheet_to_pydantic, # one sheet } @@ -80,8 +80,12 @@ def metadata_type_names(self) -> List[str]: def standardize_metadata_name(self, metadata_name: str) -> str: metadata_name = metadata_name.lower() metadata_name = metadata_name.replace("-", "_").replace(" ", "_") - if metadata_name == "microdata" or metadata_name == "survey_microdata": - metadata_name = "survey" + if metadata_name == "survey" or metadata_name == "survey_microdata": + metadata_name = "microdata" + elif metadata_name == "timeseries": + metadata_name = "indicator" + elif metadata_name == "timeseries_db": + metadata_name = "indicators_db" self._raise_if_unsupported_metadata_name(metadata_name=metadata_name) return metadata_name @@ -106,7 +110,7 @@ def write_metadata_outline_to_excel( Args: metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently: - document, script, series, survey, table, timeseries, timeseries_DB, video + document, indicator, indicators_db, microdata, resource, script, table, video Currently not supported: geospatial, image If passed as a BaseModel type, for instance this is what you would do with a template, then the writer @@ -156,7 +160,7 @@ def save_metadata_to_excel( Args: metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently: - document, script, series, survey, table, timeseries, timeseries_DB, video + document, indicator, indicators_db, microdata, resource, script, table, video Currently not supported: geospatial, image If passed as a BaseModel type, for instance this is what you would do with a template, then the writer defaults to a single page. @@ -230,7 +234,7 @@ def _get_metadata_name_from_excel_file(filename: str) -> str: def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[BaseModel]] = None) -> BaseModel: """ Read in metadata from an appropriately formatted Excel file as a pydantic object. - If using standard metadata types (documents, resource, script, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. + If using standard metadata types (document, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. Args: filename (str): The path to the Excel file. diff --git a/pydantic_schemas/microdata_schema.py b/pydantic_schemas/microdata_schema.py index 0829736..89469e9 100644 --- a/pydantic_schemas/microdata_schema.py +++ b/pydantic_schemas/microdata_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: microdata-schema.json -# timestamp: 2024-09-05T20:33:58+00:00 +# timestamp: 2024-09-13T18:34:51+00:00 from __future__ import annotations diff --git a/pydantic_schemas/resource_schema.py b/pydantic_schemas/resource_schema.py index 64fd55e..0201c74 100644 --- a/pydantic_schemas/resource_schema.py +++ b/pydantic_schemas/resource_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: resource-schema.json -# timestamp: 2024-09-05T20:34:00+00:00 +# timestamp: 2024-09-13T18:34:53+00:00 from __future__ import annotations diff --git a/pydantic_schemas/script_schema.py b/pydantic_schemas/script_schema.py index 73db35c..c226999 100644 --- a/pydantic_schemas/script_schema.py +++ b/pydantic_schemas/script_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: script-schema.json -# timestamp: 2024-09-05T20:34:02+00:00 +# timestamp: 2024-09-13T18:34:55+00:00 from __future__ import annotations diff --git a/pydantic_schemas/table_schema.py b/pydantic_schemas/table_schema.py index 58d3859..55a5729 100644 --- a/pydantic_schemas/table_schema.py +++ b/pydantic_schemas/table_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: table-schema.json -# timestamp: 2024-09-05T20:34:03+00:00 +# timestamp: 2024-09-13T18:34:58+00:00 from __future__ import annotations diff --git a/pydantic_schemas/tests/test_metadata_manager.py b/pydantic_schemas/tests/test_metadata_manager.py index 5df4fef..426ab84 100644 --- a/pydantic_schemas/tests/test_metadata_manager.py +++ b/pydantic_schemas/tests/test_metadata_manager.py @@ -4,7 +4,7 @@ @pytest.mark.parametrize( - "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"] + "metadata_name", ["document", "script", "microdata", "table", "indicators_db", "indicator", "video"] ) def test_metadata_by_name(tmpdir, metadata_name): mm = MetadataManager() @@ -30,7 +30,7 @@ def test_metadata_by_name(tmpdir, metadata_name): @pytest.mark.parametrize( - "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"] + "metadata_name", ["document", "script", "microdata", "table", "timeseries_db", "indicator", "video"] ) def test_metadata_by_class(tmpdir, metadata_name): mm = MetadataManager() @@ -59,23 +59,25 @@ def test_standardize_metadata_name(): "survey microdata", "microdata", "table", + "indicators-db", "timeseries-db", - "timeseries-db", - "TimeSeries", + "INdicator", + "timeseries", "VIdeo", ] expecteds = [ "document", "script", - "survey", - "survey", - "survey", - "survey", + "microdata", + "microdata", + "microdata", + "microdata", "table", - "timeseries_db", - "timeseries_db", - "timeseries", + "indicators_db", + "indicators_db", + "indicator", + "indicator", "video", ] diff --git a/pydantic_schemas/tests/test_pydantic_to_excel.py b/pydantic_schemas/tests/test_pydantic_to_excel.py index 6090fd1..e5884bc 100644 --- a/pydantic_schemas/tests/test_pydantic_to_excel.py +++ b/pydantic_schemas/tests/test_pydantic_to_excel.py @@ -7,14 +7,14 @@ from pydantic import BaseModel, Field from pydantic_schemas.document_schema import ScriptSchemaDraft +from pydantic_schemas.indicator_schema import TimeseriesSchema +from pydantic_schemas.indicators_db_schema import TimeseriesDatabaseSchema # from pydantic_schemas.definitions.geospatial_schema import GeospatialSchema # from pydantic_schemas.definitions.image_schema import ImageDataTypeSchema from pydantic_schemas.microdata_schema import MicrodataSchema from pydantic_schemas.script_schema import ResearchProjectSchemaDraft from pydantic_schemas.table_schema import Model as TableModel -from pydantic_schemas.timeseries_db_schema import TimeseriesDatabaseSchema -from pydantic_schemas.timeseries_schema import TimeseriesSchema from pydantic_schemas.utils.excel_to_pydantic import ( excel_doc_to_pydantic, excel_sheet_to_pydantic, diff --git a/pydantic_schemas/tests/test_quick_start.py b/pydantic_schemas/tests/test_quick_start.py index 3e506e1..9d53dc2 100644 --- a/pydantic_schemas/tests/test_quick_start.py +++ b/pydantic_schemas/tests/test_quick_start.py @@ -4,12 +4,8 @@ import pytest from pydantic import AnyUrl, BaseModel, Field, confloat -from pydantic_schemas.utils.quick_start import ( - DEFAULT_URL, - METADATA_TYPES_FILE_MAP, - create_empty_schema_from_path, - make_skeleton, -) +from pydantic_schemas.metadata_manager import MetadataManager +from pydantic_schemas.utils.quick_start import DEFAULT_URL, make_skeleton # create_empty_schema_from_path, def test_simple_strings(): @@ -239,7 +235,9 @@ class BadFieldNames(BaseModel): assert actual == expected, actual -@pytest.mark.parametrize("k, v", [(k, v) for k, v in METADATA_TYPES_FILE_MAP.items()]) -def test_actual_schemas(k, v): - base = "pydantic_schemas.{}" - create_empty_schema_from_path(base.format(k), v, debug=True) +@pytest.mark.parametrize("n", [n for n in MetadataManager().metadata_type_names]) +def test_actual_schemas(n): + if n == "geospatial": + return + klass = MetadataManager().metadata_class_from_name(n) + make_skeleton(klass) diff --git a/pydantic_schemas/utils/quick_start.py b/pydantic_schemas/utils/quick_start.py index de09833..13d11bd 100644 --- a/pydantic_schemas/utils/quick_start.py +++ b/pydantic_schemas/utils/quick_start.py @@ -8,18 +8,6 @@ from .utils import standardize_keys_in_dict -METADATA_TYPES_FILE_MAP = { - "document_schema": "ScriptSchemaDraft", - # "geospatial_schema": "GeospatialSchema", - # "image_schema": "ImageDataTypeSchema", - "microdata_schema": "MicrodataSchema", - "script_schema": "ResearchProjectSchemaDraft", - "table_schema": "Model", - "timeseries_db_schema": "TimeseriesDatabaseSchema", - "timeseries_schema": "TimeseriesSchema", - "video_schema": "Model", -} - DEFAULT_URL = "http://www.example.com" @@ -242,6 +230,6 @@ def make_skeleton(cl: Type[BaseModel], debug=False, indentation=""): return cl(**param_values) -def create_empty_schema_from_path(module_name, class_name, debug=False): - MyClass = getattr(importlib.import_module(module_name), class_name) - return make_skeleton(MyClass, debug=debug) +# def create_empty_schema_from_path(module_name, class_name, debug=False): +# MyClass = getattr(importlib.import_module(module_name), class_name) +# return make_skeleton(MyClass, debug=debug) diff --git a/pydantic_schemas/video_schema.py b/pydantic_schemas/video_schema.py index 64b2904..0d6485b 100644 --- a/pydantic_schemas/video_schema.py +++ b/pydantic_schemas/video_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: video-schema.json -# timestamp: 2024-09-05T20:34:08+00:00 +# timestamp: 2024-09-13T18:35:04+00:00 from __future__ import annotations