rename surveys, timeseries and timeseries_db

worldbank · Sep 13, 2024 · 742c114 · 742c114
1 parent ddce25f
commit 742c114
Show file tree

Hide file tree

Showing 25 changed files with 84 additions and 79 deletions.
diff --git a/excel_sheets/Document_metadata.xlsx b/excel_sheets/Document_metadata.xlsx
diff --git a/excel_sheets/Timeseries_metadata.xlsx → excel_sheets/Indicator_metadata.xlsx b/excel_sheets/Timeseries_metadata.xlsx → excel_sheets/Indicator_metadata.xlsx
diff --git a/excel_sheets/Indicators_db_metadata.xlsx b/excel_sheets/Indicators_db_metadata.xlsx
diff --git a/excel_sheets/Survey_metadata.xlsx → excel_sheets/Microdata_metadata.xlsx b/excel_sheets/Survey_metadata.xlsx → excel_sheets/Microdata_metadata.xlsx
diff --git a/excel_sheets/Resource_metadata.xlsx b/excel_sheets/Resource_metadata.xlsx
diff --git a/excel_sheets/Script_metadata.xlsx b/excel_sheets/Script_metadata.xlsx
diff --git a/excel_sheets/Table_metadata.xlsx b/excel_sheets/Table_metadata.xlsx
diff --git a/excel_sheets/Timeseries_db_metadata.xlsx b/excel_sheets/Timeseries_db_metadata.xlsx
diff --git a/excel_sheets/Video_metadata.xlsx b/excel_sheets/Video_metadata.xlsx
diff --git a/pydantic_schemas/document_schema.py b/pydantic_schemas/document_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  document-schema.json
-#   timestamp: 2024-09-05T20:33:52+00:00
+#   timestamp: 2024-09-13T18:34:42+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/generators/generate_pydantic_schemas.py b/pydantic_schemas/generators/generate_pydantic_schemas.py
@@ -5,27 +5,40 @@
 OUTPUT_DIR = os.path.join("pydantic_schemas")
 PYTHON_VERSION = "3.11"
 BASE_CLASS = ".utils.schema_base_model.SchemaBaseModel"
-INPUTS = [
-    "document-schema.json",
-    "geospatial-schema.json",
-    "image-schema.json",
-    "microdata-schema.json",
-    "resource-schema.json",
-    "script-schema.json",
-    "table-schema.json",
-    "timeseries-db-schema.json",
-    "timeseries-schema.json",
-    "video-schema.json",
-]
+# INPUTS = [
+#     "document-schema.json",
+#     "geospatial-schema.json",
+#     "image-schema.json",
+#     "microdata-schema.json",
+#     "resource-schema.json",
+#     "script-schema.json",
+#     "table-schema.json",
+#     "timeseries-db-schema.json",
+#     "timeseries-schema.json",
+#     "video-schema.json",
+# ]
+
+INPUTS_TO_OUTPUTS = {
+    "document-schema.json": "document_schema.py",
+    "geospatial-schema.json": "geospatial_schema.py",
+    "image-schema.json": "image_schema.py",
+    "microdata-schema.json": "microdata_schema.py",
+    "resource-schema.json": "resource_schema.py",
+    "script-schema.json": "script_schema.py",
+    "table-schema.json": "table_schema.py",
+    "timeseries-db-schema.json": "indicators_db_schema.py",
+    "timeseries-schema.json": "indicator_schema.py",
+    "video-schema.json": "video_schema.py",
+}
 
 
 if not os.path.exists(OUTPUT_DIR):
     os.makedirs(OUTPUT_DIR)
 
-for input_file in INPUTS:
+for input_file, output_file in INPUTS_TO_OUTPUTS.items():
     print(f"Generating pydantic schema for {input_file}")
     input_path = os.path.join(SCHEMA_DIR, input_file)
-    output_file = os.path.splitext(input_file)[0] + ".py"
+    # output_file = os.path.splitext(input_file)[0] + ".py"
     output_path = os.path.join(OUTPUT_DIR, output_file).replace("-", "_")
     run(
         [

diff --git a/pydantic_schemas/geospatial_schema.py b/pydantic_schemas/geospatial_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  geospatial-schema.json
-#   timestamp: 2024-09-05T20:33:54+00:00
+#   timestamp: 2024-09-13T18:34:45+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/image_schema.py b/pydantic_schemas/image_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  image-schema.json
-#   timestamp: 2024-09-05T20:33:56+00:00
+#   timestamp: 2024-09-13T18:34:48+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/timeseries_schema.py → pydantic_schemas/indicator_schema.py b/pydantic_schemas/timeseries_schema.py → pydantic_schemas/indicator_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  timeseries-schema.json
-#   timestamp: 2024-09-05T20:34:07+00:00
+#   timestamp: 2024-09-13T18:35:02+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/timeseries_db_schema.py → pydantic_schemas/indicators_db_schema.py b/pydantic_schemas/timeseries_db_schema.py → pydantic_schemas/indicators_db_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  timeseries-db-schema.json
-#   timestamp: 2024-09-05T20:34:05+00:00
+#   timestamp: 2024-09-13T18:35:00+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/metadata_manager.py b/pydantic_schemas/metadata_manager.py
@@ -7,12 +7,12 @@
 from . import (  # image_schema,
     document_schema,
     geospatial_schema,
+    indicator_schema,
+    indicators_db_schema,
     microdata_schema,
     resource_schema,
     script_schema,
     table_schema,
-    timeseries_db_schema,
-    timeseries_schema,
     video_schema,
 )
 from .utils.excel_to_pydantic import excel_doc_to_pydantic, excel_single_sheet_to_pydantic
@@ -24,7 +24,7 @@
 class MetadataManager:
     """
     Interface with Excel for creating, saving and updating metadata for various types:
-      documents, scripts, survey, table, timeseries, timeseries_db, video
+      document, indicator, indicators_db, microdata, resource, script, table, video
 
     Retrieve pydantic model definitions for each metadata type
     """
@@ -35,10 +35,10 @@ class MetadataManager:
         # "image":image_schema.ImageDataTypeSchema,
         "resource": resource_schema.Model,
         "script": script_schema.ResearchProjectSchemaDraft,
-        "survey": microdata_schema.MicrodataSchema,
+        "microdata": microdata_schema.MicrodataSchema,
         "table": table_schema.Model,
-        "timeseries": timeseries_schema.TimeseriesSchema,
-        "timeseries_db": timeseries_db_schema.TimeseriesDatabaseSchema,
+        "indicator": indicator_schema.TimeseriesSchema,
+        "indicators_db": indicators_db_schema.TimeseriesDatabaseSchema,
         "video": video_schema.Model,
     }
 
@@ -48,10 +48,10 @@ class MetadataManager:
         # "image":,
         "resource": write_to_single_sheet,
         "script": write_across_many_sheets,
-        "survey": write_across_many_sheets,
+        "microdata": write_across_many_sheets,
         "table": write_across_many_sheets,
-        "timeseries": write_across_many_sheets,
-        "timeseries_db": write_to_single_sheet,  # one sheet
+        "indicator": write_across_many_sheets,
+        "indicators_db": write_to_single_sheet,  # one sheet
         "video": write_to_single_sheet,  # one sheet
     }
 
@@ -61,10 +61,10 @@ class MetadataManager:
         # "image":,
         "resource": excel_single_sheet_to_pydantic,
         "script": excel_doc_to_pydantic,
-        "survey": excel_doc_to_pydantic,
+        "microdata": excel_doc_to_pydantic,
         "table": excel_doc_to_pydantic,
-        "timeseries": excel_doc_to_pydantic,
-        "timeseries_db": excel_single_sheet_to_pydantic,  # one sheet
+        "indicator": excel_doc_to_pydantic,
+        "indicators_db": excel_single_sheet_to_pydantic,  # one sheet
         "video": excel_single_sheet_to_pydantic,  # one sheet
     }
 
@@ -80,8 +80,12 @@ def metadata_type_names(self) -> List[str]:
     def standardize_metadata_name(self, metadata_name: str) -> str:
         metadata_name = metadata_name.lower()
         metadata_name = metadata_name.replace("-", "_").replace(" ", "_")
-        if metadata_name == "microdata" or metadata_name == "survey_microdata":
-            metadata_name = "survey"
+        if metadata_name == "survey" or metadata_name == "survey_microdata":
+            metadata_name = "microdata"
+        elif metadata_name == "timeseries":
+            metadata_name = "indicator"
+        elif metadata_name == "timeseries_db":
+            metadata_name = "indicators_db"
         self._raise_if_unsupported_metadata_name(metadata_name=metadata_name)
         return metadata_name
 
@@ -106,7 +110,7 @@ def write_metadata_outline_to_excel(
 
         Args:
             metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently:
-                    document, script, series, survey, table, timeseries, timeseries_DB, video
+                    document, indicator, indicators_db, microdata, resource, script, table, video
                 Currently not supported:
                     geospatial, image
                 If passed as a BaseModel type, for instance this is what you would do with a template, then the writer
@@ -156,7 +160,7 @@ def save_metadata_to_excel(
 
         Args:
             metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently:
-                    document, script, series, survey, table, timeseries, timeseries_DB, video
+                    document, indicator, indicators_db, microdata, resource, script, table, video
                 Currently not supported:
                     geospatial, image
                 If passed as a BaseModel type, for instance this is what you would do with a template, then the writer defaults to a single page.
@@ -230,7 +234,7 @@ def _get_metadata_name_from_excel_file(filename: str) -> str:
     def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[BaseModel]] = None) -> BaseModel:
         """
         Read in metadata from an appropriately formatted Excel file as a pydantic object.
-        If using standard metadata types (documents, resource, script, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided.
+        If using standard metadata types (document, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided.
 
         Args:
             filename (str): The path to the Excel file.

diff --git a/pydantic_schemas/microdata_schema.py b/pydantic_schemas/microdata_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  microdata-schema.json
-#   timestamp: 2024-09-05T20:33:58+00:00
+#   timestamp: 2024-09-13T18:34:51+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/resource_schema.py b/pydantic_schemas/resource_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  resource-schema.json
-#   timestamp: 2024-09-05T20:34:00+00:00
+#   timestamp: 2024-09-13T18:34:53+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/script_schema.py b/pydantic_schemas/script_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  script-schema.json
-#   timestamp: 2024-09-05T20:34:02+00:00
+#   timestamp: 2024-09-13T18:34:55+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/table_schema.py b/pydantic_schemas/table_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  table-schema.json
-#   timestamp: 2024-09-05T20:34:03+00:00
+#   timestamp: 2024-09-13T18:34:58+00:00
 
 from __future__ import annotations
 

diff --git a/pydantic_schemas/tests/test_metadata_manager.py b/pydantic_schemas/tests/test_metadata_manager.py
@@ -4,7 +4,7 @@
 
 
 @pytest.mark.parametrize(
-    "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"]
+    "metadata_name", ["document", "script", "microdata", "table", "indicators_db", "indicator", "video"]
 )
 def test_metadata_by_name(tmpdir, metadata_name):
     mm = MetadataManager()
@@ -30,7 +30,7 @@ def test_metadata_by_name(tmpdir, metadata_name):
 
 
 @pytest.mark.parametrize(
-    "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"]
+    "metadata_name", ["document", "script", "microdata", "table", "timeseries_db", "indicator", "video"]
 )
 def test_metadata_by_class(tmpdir, metadata_name):
     mm = MetadataManager()
@@ -59,23 +59,25 @@ def test_standardize_metadata_name():
         "survey microdata",
         "microdata",
         "table",
+        "indicators-db",
         "timeseries-db",
-        "timeseries-db",
-        "TimeSeries",
+        "INdicator",
+        "timeseries",
         "VIdeo",
     ]
 
     expecteds = [
         "document",
         "script",
-        "survey",
-        "survey",
-        "survey",
-        "survey",
+        "microdata",
+        "microdata",
+        "microdata",
+        "microdata",
         "table",
-        "timeseries_db",
-        "timeseries_db",
-        "timeseries",
+        "indicators_db",
+        "indicators_db",
+        "indicator",
+        "indicator",
         "video",
     ]
 

diff --git a/pydantic_schemas/tests/test_pydantic_to_excel.py b/pydantic_schemas/tests/test_pydantic_to_excel.py
@@ -7,14 +7,14 @@
 from pydantic import BaseModel, Field
 
 from pydantic_schemas.document_schema import ScriptSchemaDraft
+from pydantic_schemas.indicator_schema import TimeseriesSchema
+from pydantic_schemas.indicators_db_schema import TimeseriesDatabaseSchema
 
 # from pydantic_schemas.definitions.geospatial_schema import GeospatialSchema
 # from pydantic_schemas.definitions.image_schema import ImageDataTypeSchema
 from pydantic_schemas.microdata_schema import MicrodataSchema
 from pydantic_schemas.script_schema import ResearchProjectSchemaDraft
 from pydantic_schemas.table_schema import Model as TableModel
-from pydantic_schemas.timeseries_db_schema import TimeseriesDatabaseSchema
-from pydantic_schemas.timeseries_schema import TimeseriesSchema
 from pydantic_schemas.utils.excel_to_pydantic import (
     excel_doc_to_pydantic,
     excel_sheet_to_pydantic,

diff --git a/pydantic_schemas/tests/test_quick_start.py b/pydantic_schemas/tests/test_quick_start.py
@@ -4,12 +4,8 @@
 import pytest
 from pydantic import AnyUrl, BaseModel, Field, confloat
 
-from pydantic_schemas.utils.quick_start import (
-    DEFAULT_URL,
-    METADATA_TYPES_FILE_MAP,
-    create_empty_schema_from_path,
-    make_skeleton,
-)
+from pydantic_schemas.metadata_manager import MetadataManager
+from pydantic_schemas.utils.quick_start import DEFAULT_URL, make_skeleton  # create_empty_schema_from_path,
 
 
 def test_simple_strings():
@@ -239,7 +235,9 @@ class BadFieldNames(BaseModel):
     assert actual == expected, actual
 
 
-@pytest.mark.parametrize("k, v", [(k, v) for k, v in METADATA_TYPES_FILE_MAP.items()])
-def test_actual_schemas(k, v):
-    base = "pydantic_schemas.{}"
-    create_empty_schema_from_path(base.format(k), v, debug=True)
+@pytest.mark.parametrize("n", [n for n in MetadataManager().metadata_type_names])
+def test_actual_schemas(n):
+    if n == "geospatial":
+        return
+    klass = MetadataManager().metadata_class_from_name(n)
+    make_skeleton(klass)
diff --git a/pydantic_schemas/utils/quick_start.py b/pydantic_schemas/utils/quick_start.py
@@ -8,18 +8,6 @@
 
 from .utils import standardize_keys_in_dict
 
-METADATA_TYPES_FILE_MAP = {
-    "document_schema": "ScriptSchemaDraft",
-    # "geospatial_schema": "GeospatialSchema",
-    # "image_schema": "ImageDataTypeSchema",
-    "microdata_schema": "MicrodataSchema",
-    "script_schema": "ResearchProjectSchemaDraft",
-    "table_schema": "Model",
-    "timeseries_db_schema": "TimeseriesDatabaseSchema",
-    "timeseries_schema": "TimeseriesSchema",
-    "video_schema": "Model",
-}
-
 DEFAULT_URL = "http://www.example.com"
 
 
@@ -242,6 +230,6 @@ def make_skeleton(cl: Type[BaseModel], debug=False, indentation=""):
     return cl(**param_values)
 
 
-def create_empty_schema_from_path(module_name, class_name, debug=False):
-    MyClass = getattr(importlib.import_module(module_name), class_name)
-    return make_skeleton(MyClass, debug=debug)
+# def create_empty_schema_from_path(module_name, class_name, debug=False):
+# MyClass = getattr(importlib.import_module(module_name), class_name)
+# return make_skeleton(MyClass, debug=debug)
diff --git a/pydantic_schemas/video_schema.py b/pydantic_schemas/video_schema.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  video-schema.json
-#   timestamp: 2024-09-05T20:34:08+00:00
+#   timestamp: 2024-09-13T18:35:04+00:00
 
 from __future__ import annotations