Skip to content

Commit

Permalink
rename surveys, timeseries and timeseries_db
Browse files Browse the repository at this point in the history
  • Loading branch information
Gordon Blackadder committed Sep 13, 2024
1 parent ddce25f commit 742c114
Show file tree
Hide file tree
Showing 25 changed files with 84 additions and 79 deletions.
Binary file modified excel_sheets/Document_metadata.xlsx
Binary file not shown.
Binary file not shown.
Binary file added excel_sheets/Indicators_db_metadata.xlsx
Binary file not shown.
Binary file not shown.
Binary file modified excel_sheets/Resource_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Script_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Table_metadata.xlsx
Binary file not shown.
Binary file removed excel_sheets/Timeseries_db_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Video_metadata.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion pydantic_schemas/document_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: document-schema.json
# timestamp: 2024-09-05T20:33:52+00:00
# timestamp: 2024-09-13T18:34:42+00:00

from __future__ import annotations

Expand Down
41 changes: 27 additions & 14 deletions pydantic_schemas/generators/generate_pydantic_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,40 @@
OUTPUT_DIR = os.path.join("pydantic_schemas")
PYTHON_VERSION = "3.11"
BASE_CLASS = ".utils.schema_base_model.SchemaBaseModel"
INPUTS = [
"document-schema.json",
"geospatial-schema.json",
"image-schema.json",
"microdata-schema.json",
"resource-schema.json",
"script-schema.json",
"table-schema.json",
"timeseries-db-schema.json",
"timeseries-schema.json",
"video-schema.json",
]
# INPUTS = [
# "document-schema.json",
# "geospatial-schema.json",
# "image-schema.json",
# "microdata-schema.json",
# "resource-schema.json",
# "script-schema.json",
# "table-schema.json",
# "timeseries-db-schema.json",
# "timeseries-schema.json",
# "video-schema.json",
# ]

INPUTS_TO_OUTPUTS = {
"document-schema.json": "document_schema.py",
"geospatial-schema.json": "geospatial_schema.py",
"image-schema.json": "image_schema.py",
"microdata-schema.json": "microdata_schema.py",
"resource-schema.json": "resource_schema.py",
"script-schema.json": "script_schema.py",
"table-schema.json": "table_schema.py",
"timeseries-db-schema.json": "indicators_db_schema.py",
"timeseries-schema.json": "indicator_schema.py",
"video-schema.json": "video_schema.py",
}


if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)

for input_file in INPUTS:
for input_file, output_file in INPUTS_TO_OUTPUTS.items():
print(f"Generating pydantic schema for {input_file}")
input_path = os.path.join(SCHEMA_DIR, input_file)
output_file = os.path.splitext(input_file)[0] + ".py"
# output_file = os.path.splitext(input_file)[0] + ".py"
output_path = os.path.join(OUTPUT_DIR, output_file).replace("-", "_")
run(
[
Expand Down
2 changes: 1 addition & 1 deletion pydantic_schemas/geospatial_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: geospatial-schema.json
# timestamp: 2024-09-05T20:33:54+00:00
# timestamp: 2024-09-13T18:34:45+00:00

from __future__ import annotations

Expand Down
2 changes: 1 addition & 1 deletion pydantic_schemas/image_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: image-schema.json
# timestamp: 2024-09-05T20:33:56+00:00
# timestamp: 2024-09-13T18:34:48+00:00

from __future__ import annotations

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: timeseries-schema.json
# timestamp: 2024-09-05T20:34:07+00:00
# timestamp: 2024-09-13T18:35:02+00:00

from __future__ import annotations

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: timeseries-db-schema.json
# timestamp: 2024-09-05T20:34:05+00:00
# timestamp: 2024-09-13T18:35:00+00:00

from __future__ import annotations

Expand Down
38 changes: 21 additions & 17 deletions pydantic_schemas/metadata_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from . import ( # image_schema,
document_schema,
geospatial_schema,
indicator_schema,
indicators_db_schema,
microdata_schema,
resource_schema,
script_schema,
table_schema,
timeseries_db_schema,
timeseries_schema,
video_schema,
)
from .utils.excel_to_pydantic import excel_doc_to_pydantic, excel_single_sheet_to_pydantic
Expand All @@ -24,7 +24,7 @@
class MetadataManager:
"""
Interface with Excel for creating, saving and updating metadata for various types:
documents, scripts, survey, table, timeseries, timeseries_db, video
document, indicator, indicators_db, microdata, resource, script, table, video
Retrieve pydantic model definitions for each metadata type
"""
Expand All @@ -35,10 +35,10 @@ class MetadataManager:
# "image":image_schema.ImageDataTypeSchema,
"resource": resource_schema.Model,
"script": script_schema.ResearchProjectSchemaDraft,
"survey": microdata_schema.MicrodataSchema,
"microdata": microdata_schema.MicrodataSchema,
"table": table_schema.Model,
"timeseries": timeseries_schema.TimeseriesSchema,
"timeseries_db": timeseries_db_schema.TimeseriesDatabaseSchema,
"indicator": indicator_schema.TimeseriesSchema,
"indicators_db": indicators_db_schema.TimeseriesDatabaseSchema,
"video": video_schema.Model,
}

Expand All @@ -48,10 +48,10 @@ class MetadataManager:
# "image":,
"resource": write_to_single_sheet,
"script": write_across_many_sheets,
"survey": write_across_many_sheets,
"microdata": write_across_many_sheets,
"table": write_across_many_sheets,
"timeseries": write_across_many_sheets,
"timeseries_db": write_to_single_sheet, # one sheet
"indicator": write_across_many_sheets,
"indicators_db": write_to_single_sheet, # one sheet
"video": write_to_single_sheet, # one sheet
}

Expand All @@ -61,10 +61,10 @@ class MetadataManager:
# "image":,
"resource": excel_single_sheet_to_pydantic,
"script": excel_doc_to_pydantic,
"survey": excel_doc_to_pydantic,
"microdata": excel_doc_to_pydantic,
"table": excel_doc_to_pydantic,
"timeseries": excel_doc_to_pydantic,
"timeseries_db": excel_single_sheet_to_pydantic, # one sheet
"indicator": excel_doc_to_pydantic,
"indicators_db": excel_single_sheet_to_pydantic, # one sheet
"video": excel_single_sheet_to_pydantic, # one sheet
}

Expand All @@ -80,8 +80,12 @@ def metadata_type_names(self) -> List[str]:
def standardize_metadata_name(self, metadata_name: str) -> str:
metadata_name = metadata_name.lower()
metadata_name = metadata_name.replace("-", "_").replace(" ", "_")
if metadata_name == "microdata" or metadata_name == "survey_microdata":
metadata_name = "survey"
if metadata_name == "survey" or metadata_name == "survey_microdata":
metadata_name = "microdata"
elif metadata_name == "timeseries":
metadata_name = "indicator"
elif metadata_name == "timeseries_db":
metadata_name = "indicators_db"
self._raise_if_unsupported_metadata_name(metadata_name=metadata_name)
return metadata_name

Expand All @@ -106,7 +110,7 @@ def write_metadata_outline_to_excel(
Args:
metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently:
document, script, series, survey, table, timeseries, timeseries_DB, video
document, indicator, indicators_db, microdata, resource, script, table, video
Currently not supported:
geospatial, image
If passed as a BaseModel type, for instance this is what you would do with a template, then the writer
Expand Down Expand Up @@ -156,7 +160,7 @@ def save_metadata_to_excel(
Args:
metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently:
document, script, series, survey, table, timeseries, timeseries_DB, video
document, indicator, indicators_db, microdata, resource, script, table, video
Currently not supported:
geospatial, image
If passed as a BaseModel type, for instance this is what you would do with a template, then the writer defaults to a single page.
Expand Down Expand Up @@ -230,7 +234,7 @@ def _get_metadata_name_from_excel_file(filename: str) -> str:
def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[BaseModel]] = None) -> BaseModel:
"""
Read in metadata from an appropriately formatted Excel file as a pydantic object.
If using standard metadata types (documents, resource, script, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided.
If using standard metadata types (document, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided.
Args:
filename (str): The path to the Excel file.
Expand Down
2 changes: 1 addition & 1 deletion pydantic_schemas/microdata_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: microdata-schema.json
# timestamp: 2024-09-05T20:33:58+00:00
# timestamp: 2024-09-13T18:34:51+00:00

from __future__ import annotations

Expand Down
2 changes: 1 addition & 1 deletion pydantic_schemas/resource_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: resource-schema.json
# timestamp: 2024-09-05T20:34:00+00:00
# timestamp: 2024-09-13T18:34:53+00:00

from __future__ import annotations

Expand Down
2 changes: 1 addition & 1 deletion pydantic_schemas/script_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: script-schema.json
# timestamp: 2024-09-05T20:34:02+00:00
# timestamp: 2024-09-13T18:34:55+00:00

from __future__ import annotations

Expand Down
2 changes: 1 addition & 1 deletion pydantic_schemas/table_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: table-schema.json
# timestamp: 2024-09-05T20:34:03+00:00
# timestamp: 2024-09-13T18:34:58+00:00

from __future__ import annotations

Expand Down
24 changes: 13 additions & 11 deletions pydantic_schemas/tests/test_metadata_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


@pytest.mark.parametrize(
"metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"]
"metadata_name", ["document", "script", "microdata", "table", "indicators_db", "indicator", "video"]
)
def test_metadata_by_name(tmpdir, metadata_name):
mm = MetadataManager()
Expand All @@ -30,7 +30,7 @@ def test_metadata_by_name(tmpdir, metadata_name):


@pytest.mark.parametrize(
"metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"]
"metadata_name", ["document", "script", "microdata", "table", "timeseries_db", "indicator", "video"]
)
def test_metadata_by_class(tmpdir, metadata_name):
mm = MetadataManager()
Expand Down Expand Up @@ -59,23 +59,25 @@ def test_standardize_metadata_name():
"survey microdata",
"microdata",
"table",
"indicators-db",
"timeseries-db",
"timeseries-db",
"TimeSeries",
"INdicator",
"timeseries",
"VIdeo",
]

expecteds = [
"document",
"script",
"survey",
"survey",
"survey",
"survey",
"microdata",
"microdata",
"microdata",
"microdata",
"table",
"timeseries_db",
"timeseries_db",
"timeseries",
"indicators_db",
"indicators_db",
"indicator",
"indicator",
"video",
]

Expand Down
4 changes: 2 additions & 2 deletions pydantic_schemas/tests/test_pydantic_to_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from pydantic import BaseModel, Field

from pydantic_schemas.document_schema import ScriptSchemaDraft
from pydantic_schemas.indicator_schema import TimeseriesSchema
from pydantic_schemas.indicators_db_schema import TimeseriesDatabaseSchema

# from pydantic_schemas.definitions.geospatial_schema import GeospatialSchema
# from pydantic_schemas.definitions.image_schema import ImageDataTypeSchema
from pydantic_schemas.microdata_schema import MicrodataSchema
from pydantic_schemas.script_schema import ResearchProjectSchemaDraft
from pydantic_schemas.table_schema import Model as TableModel
from pydantic_schemas.timeseries_db_schema import TimeseriesDatabaseSchema
from pydantic_schemas.timeseries_schema import TimeseriesSchema
from pydantic_schemas.utils.excel_to_pydantic import (
excel_doc_to_pydantic,
excel_sheet_to_pydantic,
Expand Down
18 changes: 8 additions & 10 deletions pydantic_schemas/tests/test_quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,8 @@
import pytest
from pydantic import AnyUrl, BaseModel, Field, confloat

from pydantic_schemas.utils.quick_start import (
DEFAULT_URL,
METADATA_TYPES_FILE_MAP,
create_empty_schema_from_path,
make_skeleton,
)
from pydantic_schemas.metadata_manager import MetadataManager
from pydantic_schemas.utils.quick_start import DEFAULT_URL, make_skeleton # create_empty_schema_from_path,


def test_simple_strings():
Expand Down Expand Up @@ -239,7 +235,9 @@ class BadFieldNames(BaseModel):
assert actual == expected, actual


@pytest.mark.parametrize("k, v", [(k, v) for k, v in METADATA_TYPES_FILE_MAP.items()])
def test_actual_schemas(k, v):
base = "pydantic_schemas.{}"
create_empty_schema_from_path(base.format(k), v, debug=True)
@pytest.mark.parametrize("n", [n for n in MetadataManager().metadata_type_names])
def test_actual_schemas(n):
if n == "geospatial":
return
klass = MetadataManager().metadata_class_from_name(n)
make_skeleton(klass)
18 changes: 3 additions & 15 deletions pydantic_schemas/utils/quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,6 @@

from .utils import standardize_keys_in_dict

METADATA_TYPES_FILE_MAP = {
"document_schema": "ScriptSchemaDraft",
# "geospatial_schema": "GeospatialSchema",
# "image_schema": "ImageDataTypeSchema",
"microdata_schema": "MicrodataSchema",
"script_schema": "ResearchProjectSchemaDraft",
"table_schema": "Model",
"timeseries_db_schema": "TimeseriesDatabaseSchema",
"timeseries_schema": "TimeseriesSchema",
"video_schema": "Model",
}

DEFAULT_URL = "http://www.example.com"


Expand Down Expand Up @@ -242,6 +230,6 @@ def make_skeleton(cl: Type[BaseModel], debug=False, indentation=""):
return cl(**param_values)


def create_empty_schema_from_path(module_name, class_name, debug=False):
MyClass = getattr(importlib.import_module(module_name), class_name)
return make_skeleton(MyClass, debug=debug)
# def create_empty_schema_from_path(module_name, class_name, debug=False):
# MyClass = getattr(importlib.import_module(module_name), class_name)
# return make_skeleton(MyClass, debug=debug)
2 changes: 1 addition & 1 deletion pydantic_schemas/video_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: video-schema.json
# timestamp: 2024-09-05T20:34:08+00:00
# timestamp: 2024-09-13T18:35:04+00:00

from __future__ import annotations

Expand Down

0 comments on commit 742c114

Please sign in to comment.