From 5db1610d43c5cd70ee0858d4dc1717df3aa5a840 Mon Sep 17 00:00:00 2001 From: Gordon Blackadder Date: Thu, 22 Aug 2024 14:01:01 -0400 Subject: [PATCH] add readme --- README.md | 79 +++++++++++++- poetry.lock | 103 ++++++++++--------- pydantic_schemas/excel_interface.py | 10 +- pydantic_schemas/utils/excel_to_pydantic.py | 108 ++++++++++++-------- pydantic_schemas/utils/pydantic_to_excel.py | 7 +- pyproject.toml | 3 +- 6 files changed, 208 insertions(+), 102 deletions(-) diff --git a/README.md b/README.md index e95e38f..d429065 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,83 @@ # metadata-schemas -Metadata JSON Schemas +This repository contains both the definitions of Metadata Schemas and a python library for creating schema objects with pydantic and Excel. -View documentation - https://worldbank.github.io/metadata-schemas/ +## Defining Metadata Schemas +The schemas are defined in the JSON Schema format in the folder `schemas`. For more information you can view documentation at https://worldbank.github.io/metadata-schemas/ -## Pydantic +## Python library -To update the pydantic schemas so that they match the json schemas run +To install the library run + +```pip install metadataschemas``` + +### Creating a pydantic metadata object + +To create a timeseries metadata object run + +```python +from metadataschemas import timeseries_schema + +timeseries_metadata = timeseries_schema.TimeseriesSchema(idno='project_idno',series_description=timeseries_schema.SeriesDescription(idno='project_idno', name='project_name')) +``` + +Depending on your IDE, selecting `TimeseriesSchema` could show you what fields the schema contains and their corresponding object definitions. + +There are metadata objects for each of the following metadata types: + +| Metadata Type | Metadata Object | +|------------------|-------------------------------------------------| +| document | `document_schema.ScriptSchemaDraft` | +| geospatial | `geospatial_schema.GeospatialSchema` | +| script | `script_schema.ResearchProjectSchemaDraft` | +| series | `series_schema.Series` | +| survey | `microdata_schema.MicrodataSchema` | +| table | `table_schema.Model` | +| timeseries | `timeseries_schema.TimeseriesSchema` | +| timeseries_db | `timeseries_db_schema.TimeseriesDatabaseSchema` | +| video | `video_schema.Model` | + +### Python - Excel interface + +The Excel interface exists to + +1. Create blank Excel files formatted for a given metadata type +2. Write metadata objects to Excel +3. Read an appropriately formatted Excel file containing metadata into a pydantic metadata object + +To use it run: + +```python +from metadataschemas import ExcelInterface + +ei = ExcelInterface() + +filename = ei.write_outline_metadata_to_excel(metadata_type='timeseries') + +filename = ei.save_metadata_to_excel(metadata_type='timeseries', + object=timeseries_metadata) + +# Then after you have updated the metadata in the Excel file + +updated_timeseries_metadata = ei.read_metadata_excel(filename = timeseries_metadata_filename) +``` + +Note that the Excel interface currently does not support Geospatial metadata. + +The Excel interface also offers a convenient way to get started creating metadata in pydantic by creating an empty pydantic object for a given metadata type which can then be updated as needed. + +```python +survey_metadata = ei.type_to_outline(metadata_type="survey") + +survey_metadata.repositoryid = "repository id" + +survey_metadata.study_desc.title_statement.idno = "project_idno" +``` + + +## Updating Pydantic definitions and Excel sheets + +To update the pydantic schemas so that they match the latest json schemas run `python pydantic_schemas\\generators\\generate_pydantic_schemas.py` diff --git a/poetry.lock b/poetry.lock index f0f2c66..bea39c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -846,56 +846,63 @@ files = [ [[package]] name = "numpy" -version = "2.0.0" +version = "2.1.0" description = "Fundamental package for array computing in Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" files = [ - {file = "numpy-2.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f"}, - {file = "numpy-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2"}, - {file = "numpy-2.0.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238"}, - {file = "numpy-2.0.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514"}, - {file = "numpy-2.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196"}, - {file = "numpy-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1"}, - {file = "numpy-2.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc"}, - {file = "numpy-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787"}, - {file = "numpy-2.0.0-cp310-cp310-win32.whl", hash = "sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98"}, - {file = "numpy-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b"}, - {file = "numpy-2.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5"}, - {file = "numpy-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289"}, - {file = "numpy-2.0.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609"}, - {file = "numpy-2.0.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871"}, - {file = "numpy-2.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4"}, - {file = "numpy-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581"}, - {file = "numpy-2.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995"}, - {file = "numpy-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f"}, - {file = "numpy-2.0.0-cp311-cp311-win32.whl", hash = "sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f"}, - {file = "numpy-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c"}, - {file = "numpy-2.0.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f"}, - {file = "numpy-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85"}, - {file = "numpy-2.0.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2"}, - {file = "numpy-2.0.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e"}, - {file = "numpy-2.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2"}, - {file = "numpy-2.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a"}, - {file = "numpy-2.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95"}, - {file = "numpy-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9"}, - {file = "numpy-2.0.0-cp312-cp312-win32.whl", hash = "sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54"}, - {file = "numpy-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df"}, - {file = "numpy-2.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de"}, - {file = "numpy-2.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb"}, - {file = "numpy-2.0.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f"}, - {file = "numpy-2.0.0-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86"}, - {file = "numpy-2.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a"}, - {file = "numpy-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d"}, - {file = "numpy-2.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4"}, - {file = "numpy-2.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44"}, - {file = "numpy-2.0.0-cp39-cp39-win32.whl", hash = "sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275"}, - {file = "numpy-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65"}, - {file = "numpy-2.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6"}, - {file = "numpy-2.0.0-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a"}, - {file = "numpy-2.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad"}, - {file = "numpy-2.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9"}, - {file = "numpy-2.0.0.tar.gz", hash = "sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864"}, + {file = "numpy-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6326ab99b52fafdcdeccf602d6286191a79fe2fda0ae90573c5814cd2b0bc1b8"}, + {file = "numpy-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0937e54c09f7a9a68da6889362ddd2ff584c02d015ec92672c099b61555f8911"}, + {file = "numpy-2.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:30014b234f07b5fec20f4146f69e13cfb1e33ee9a18a1879a0142fbb00d47673"}, + {file = "numpy-2.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:899da829b362ade41e1e7eccad2cf274035e1cb36ba73034946fccd4afd8606b"}, + {file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08801848a40aea24ce16c2ecde3b756f9ad756586fb2d13210939eb69b023f5b"}, + {file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:398049e237d1aae53d82a416dade04defed1a47f87d18d5bd615b6e7d7e41d1f"}, + {file = "numpy-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0abb3916a35d9090088a748636b2c06dc9a6542f99cd476979fb156a18192b84"}, + {file = "numpy-2.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10e2350aea18d04832319aac0f887d5fcec1b36abd485d14f173e3e900b83e33"}, + {file = "numpy-2.1.0-cp310-cp310-win32.whl", hash = "sha256:f6b26e6c3b98adb648243670fddc8cab6ae17473f9dc58c51574af3e64d61211"}, + {file = "numpy-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f505264735ee074250a9c78247ee8618292091d9d1fcc023290e9ac67e8f1afa"}, + {file = "numpy-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:76368c788ccb4f4782cf9c842b316140142b4cbf22ff8db82724e82fe1205dce"}, + {file = "numpy-2.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f8e93a01a35be08d31ae33021e5268f157a2d60ebd643cfc15de6ab8e4722eb1"}, + {file = "numpy-2.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9523f8b46485db6939bd069b28b642fec86c30909cea90ef550373787f79530e"}, + {file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54139e0eb219f52f60656d163cbe67c31ede51d13236c950145473504fa208cb"}, + {file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ebbf9fbdabed208d4ecd2e1dfd2c0741af2f876e7ae522c2537d404ca895c3"}, + {file = "numpy-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:378cb4f24c7d93066ee4103204f73ed046eb88f9ad5bb2275bb9fa0f6a02bd36"}, + {file = "numpy-2.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8f699a709120b220dfe173f79c73cb2a2cab2c0b88dd59d7b49407d032b8ebd"}, + {file = "numpy-2.1.0-cp311-cp311-win32.whl", hash = "sha256:ffbd6faeb190aaf2b5e9024bac9622d2ee549b7ec89ef3a9373fa35313d44e0e"}, + {file = "numpy-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0af3a5987f59d9c529c022c8c2a64805b339b7ef506509fba7d0556649b9714b"}, + {file = "numpy-2.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fe76d75b345dc045acdbc006adcb197cc680754afd6c259de60d358d60c93736"}, + {file = "numpy-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f358ea9e47eb3c2d6eba121ab512dfff38a88db719c38d1e67349af210bc7529"}, + {file = "numpy-2.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:dd94ce596bda40a9618324547cfaaf6650b1a24f5390350142499aa4e34e53d1"}, + {file = "numpy-2.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b47c551c6724960479cefd7353656498b86e7232429e3a41ab83be4da1b109e8"}, + {file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0756a179afa766ad7cb6f036de622e8a8f16ffdd55aa31f296c870b5679d745"}, + {file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24003ba8ff22ea29a8c306e61d316ac74111cebf942afbf692df65509a05f111"}, + {file = "numpy-2.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b34fa5e3b5d6dc7e0a4243fa0f81367027cb6f4a7215a17852979634b5544ee0"}, + {file = "numpy-2.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4f982715e65036c34897eb598d64aef15150c447be2cfc6643ec7a11af06574"}, + {file = "numpy-2.1.0-cp312-cp312-win32.whl", hash = "sha256:c4cd94dfefbefec3f8b544f61286584292d740e6e9d4677769bc76b8f41deb02"}, + {file = "numpy-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0cdef204199278f5c461a0bed6ed2e052998276e6d8ab2963d5b5c39a0500bc"}, + {file = "numpy-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ab81ccd753859ab89e67199b9da62c543850f819993761c1e94a75a814ed667"}, + {file = "numpy-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442596f01913656d579309edcd179a2a2f9977d9a14ff41d042475280fc7f34e"}, + {file = "numpy-2.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:848c6b5cad9898e4b9ef251b6f934fa34630371f2e916261070a4eb9092ffd33"}, + {file = "numpy-2.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:54c6a63e9d81efe64bfb7bcb0ec64332a87d0b87575f6009c8ba67ea6374770b"}, + {file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652e92fc409e278abdd61e9505649e3938f6d04ce7ef1953f2ec598a50e7c195"}, + {file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab32eb9170bf8ffcbb14f11613f4a0b108d3ffee0832457c5d4808233ba8977"}, + {file = "numpy-2.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:8fb49a0ba4d8f41198ae2d52118b050fd34dace4b8f3fb0ee34e23eb4ae775b1"}, + {file = "numpy-2.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44e44973262dc3ae79e9063a1284a73e09d01b894b534a769732ccd46c28cc62"}, + {file = "numpy-2.1.0-cp313-cp313-win32.whl", hash = "sha256:ab83adc099ec62e044b1fbb3a05499fa1e99f6d53a1dde102b2d85eff66ed324"}, + {file = "numpy-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:de844aaa4815b78f6023832590d77da0e3b6805c644c33ce94a1e449f16d6ab5"}, + {file = "numpy-2.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:343e3e152bf5a087511cd325e3b7ecfd5b92d369e80e74c12cd87826e263ec06"}, + {file = "numpy-2.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f07fa2f15dabe91259828ce7d71b5ca9e2eb7c8c26baa822c825ce43552f4883"}, + {file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5474dad8c86ee9ba9bb776f4b99ef2d41b3b8f4e0d199d4f7304728ed34d0300"}, + {file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1f817c71683fd1bb5cff1529a1d085a57f02ccd2ebc5cd2c566f9a01118e3b7d"}, + {file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a3336fbfa0d38d3deacd3fe7f3d07e13597f29c13abf4d15c3b6dc2291cbbdd"}, + {file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a894c51fd8c4e834f00ac742abad73fc485df1062f1b875661a3c1e1fb1c2f6"}, + {file = "numpy-2.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:9156ca1f79fc4acc226696e95bfcc2b486f165a6a59ebe22b2c1f82ab190384a"}, + {file = "numpy-2.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:624884b572dff8ca8f60fab591413f077471de64e376b17d291b19f56504b2bb"}, + {file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15ef8b2177eeb7e37dd5ef4016f30b7659c57c2c0b57a779f1d537ff33a72c7b"}, + {file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:e5f0642cdf4636198a4990de7a71b693d824c56a757862230454629cf62e323d"}, + {file = "numpy-2.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15976718c004466406342789f31b6673776360f3b1e3c575f25302d7e789575"}, + {file = "numpy-2.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6c1de77ded79fef664d5098a66810d4d27ca0224e9051906e634b3f7ead134c2"}, + {file = "numpy-2.1.0.tar.gz", hash = "sha256:7dc90da0081f7e1da49ec4e398ede6a8e9cc4f5ebe5f9e06b443ed889ee9aaa2"}, ] [[package]] @@ -1714,4 +1721,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "548bd0d8e7fb0a3cc4adbe09b1c592b315164c51ccbae0258fbed5a375349274" +content-hash = "7671a941b5d68d34eb48386ca5fc6a447fbd50677549e35a46fe64869f94d6fb" diff --git a/pydantic_schemas/excel_interface.py b/pydantic_schemas/excel_interface.py index e4bb5a8..4332ee1 100644 --- a/pydantic_schemas/excel_interface.py +++ b/pydantic_schemas/excel_interface.py @@ -140,7 +140,7 @@ def write_outline_metadata_to_excel( An Excel file into which metadata can be entered """ metadata_type = self._process_metadata_type(metadata_type) - self.raise_if_unsupported_metadata_type(metadata_type=metadata_type) + self._raise_if_unsupported_metadata_type(metadata_type=metadata_type) if filename is None: filename = f"{metadata_type}_metadata.xlsx" if not str(filename).endswith(".xlsx"): @@ -174,7 +174,7 @@ def save_metadata_to_excel( An Excel file containing the metadata from the pydantic object. This file can be updated as needed. """ metadata_type = self._process_metadata_type(metadata_type) - self.raise_if_unsupported_metadata_type(metadata_type=metadata_type) + self._raise_if_unsupported_metadata_type(metadata_type=metadata_type) if filename is None: filename = f"{metadata_type}_metadata.xlsx" @@ -236,7 +236,7 @@ def read_metadata_excel(self, filename: str) -> BaseModel: """ metadata_type = self._get_metadata_type_from_excel_file(filename) metadata_type = self._process_metadata_type(metadata_type) - self.raise_if_unsupported_metadata_type(metadata_type=metadata_type) + self._raise_if_unsupported_metadata_type(metadata_type=metadata_type) schema = self._TYPE_TO_SCHEMA[metadata_type] reader = self._TYPE_TO_READER[metadata_type] read_object = reader(filename, schema) @@ -245,7 +245,7 @@ def read_metadata_excel(self, filename: str) -> BaseModel: def inflate_read_data_to_schema(self, metadata_type, read_object): metadata_type = self._process_metadata_type(metadata_type) - self.raise_if_unsupported_metadata_type(metadata_type=metadata_type) + self._raise_if_unsupported_metadata_type(metadata_type=metadata_type) skeleton_object = self.type_to_outline(metadata_type=metadata_type, debug=False) if isinstance(read_object, dict): @@ -263,7 +263,7 @@ def inflate_read_data_to_schema(self, metadata_type, read_object): new_ob = schema(**combined_dict) return new_ob - def raise_if_unsupported_metadata_type(self, metadata_type: str): + def _raise_if_unsupported_metadata_type(self, metadata_type: str): """ If the type is specifically unsupported - geospatial or image - a NotImplementedError is raised If the type is simply unknown then a ValueError is raised. diff --git a/pydantic_schemas/utils/excel_to_pydantic.py b/pydantic_schemas/utils/excel_to_pydantic.py index a8dc618..6387916 100644 --- a/pydantic_schemas/utils/excel_to_pydantic.py +++ b/pydantic_schemas/utils/excel_to_pydantic.py @@ -65,7 +65,7 @@ def find_string_and_count_nans(arr, search_str): # return horizontal_intersection > vertical_intersection -def get_relevant_sub_frame(m: Type[BaseModel], df: pd.DataFrame, name_of_field: Optional[str] = None): +def get_relevant_sub_frame(m: Type[BaseModel], df: pd.DataFrame, name_of_field: Optional[str] = None, debug=False): """ THe dataframe likely contains lots and lots of information about other models. @@ -93,19 +93,21 @@ def get_relevant_sub_frame(m: Type[BaseModel], df: pd.DataFrame, name_of_field: sub = sub.dropna(how="all", axis=0) # drop all null rows sub = sub.dropna(how="all", axis=1) # drop all null columns - print("SubFrame = \n", sub) + if debug: + print("SubFrame = \n", sub) # if is_horizontally_organized(m, sub): # sub = sub.T return sub -def handle_optional(name, annotation, df, from_within_list: bool = False): +def handle_optional(name, annotation, df, from_within_list: bool = False, debug=False): args = [a for a in get_args(annotation) if a is not type(None)] assert len(args) == 1, f"handle_optional encountered {args}" ret = annotation_switch(name, args[0], df, from_within_list=from_within_list) - print(f"optional ret: {ret}") - print(f"isinstance(ret, list): {isinstance(ret, list)}") - # print(f"len(ret): {len(ret)}") + if debug: + print(f"optional ret: {ret}") + print(f"isinstance(ret, list): {isinstance(ret, list)}") + # print(f"len(ret): {len(ret)}") if (isinstance(ret, list) or isinstance(ret, dict)) and len(ret) == 0: return None elif isinstance(ret, str) and ret == "": @@ -114,7 +116,7 @@ def handle_optional(name, annotation, df, from_within_list: bool = False): return ret -def handle_list(name, anno, df): +def handle_list(name, anno, df, debug=False): subtype = get_subtype_of_optional_or_list(anno) if isinstance(subtype, type(BaseModel)): try: @@ -124,25 +126,30 @@ def handle_list(name, anno, df): list_of_subs = [] for c in subframe.columns[1:]: subsubframe = subframe.loc[:, [subframe.columns[0], c]] - print("subsubframe") - print(subsubframe) - print() + if debug: + print("subsubframe") + print(subsubframe) + print() sub = instantiate_pydantic_object(model_type=subtype, df=subsubframe, from_within_list=True) - print(f"instantiated: {sub}") + if debug: + print(f"instantiated: {sub}") list_of_subs.append(sub) return list_of_subs # raise NotImplementedError(f"handle_list - {name}, {anno}, {subframe}") else: values = df.set_index(df.columns[0]).loc[name] - print(f"handle_list anno:{anno}, value: {values}") + if debug: + print(f"handle_list anno:{anno}, value: {values}") return [v for v in values if v is not None] -def handle_list_within_list(name, anno, df): - print(f"handle_list_within_list {name}, {anno}") - print(df) +def handle_list_within_list(name, anno, df, debug=False): + if debug: + print(f"handle_list_within_list {name}, {anno}") + print(df) values = df.set_index(df.columns[0]).loc[name, df.columns[1]] - print(f"values: {values}, {type(values)}") + if debug: + print(f"values: {values}, {type(values)}") if values is None: return [] values = json.loads(values.replace("'", '"').replace("None", "null")) @@ -157,12 +164,14 @@ def handle_list_within_list(name, anno, df): raise NotImplementedError(f"handle_list_within_list unexpected values - {name}, {anno}, {values}, {df}") -def handle_builtin_or_enum(name, anno, df): - print(df) +def handle_builtin_or_enum(name, anno, df, debug=False): + if debug: + print(df) if len(df) == 0: return "" df_indexed = df.set_index(df.columns[0]) - print("handle_builtin_or_enum", df_indexed) + if debug: + print("handle_builtin_or_enum", df_indexed) # return df_indexed.loc[name, df.columns[1]] if name not in df_indexed.index: return "" @@ -194,47 +203,60 @@ def handle_dict(name, anno, df): # raise NotImplementedError(f"Dictionary: {name}, {anno}, {dict_results}, {ret}") -def annotation_switch(name: str, anno, df: pd.DataFrame, from_within_list=False) -> Any: - print(f"annotation_to_value name: {name}") +def annotation_switch(name: str, anno, df: pd.DataFrame, from_within_list=False, debug=False) -> Any: + if debug: + print(f"annotation_to_value name: {name}") if is_optional_annotation(anno): - print("optional") + if debug: + print("optional") return handle_optional(name, anno, df, from_within_list=from_within_list) elif is_dict_annotation(anno): return handle_dict(name, anno, df) elif is_list_annotation(anno): if from_within_list: - print("list within a list") + if debug: + print("list within a list") return handle_list_within_list(name, anno, df) else: - print("list") + if debug: + print("list") return handle_list(name, anno, df) elif isinstance(anno, type(BaseModel)): - print("pydantic") + if debug: + print("pydantic") try: sub = get_relevant_sub_frame(anno, df, name_of_field=name) except IndexError: return make_skeleton(anno) return instantiate_pydantic_object(anno, sub) elif len(get_args(anno)) == 0: - print("builtin or enum") + if debug: + print("builtin or enum") return handle_builtin_or_enum(name, anno, df) else: raise NotImplementedError(anno) -def instantiate_pydantic_object(model_type: Type[BaseModel], df: pd.DataFrame, from_within_list=False) -> BaseModel: +def instantiate_pydantic_object( + model_type: Type[BaseModel], df: pd.DataFrame, from_within_list=False, debug=False +) -> BaseModel: ret = {} - print(f"instantiate_pydantic_object df = {df}") + if debug: + print(f"instantiate_pydantic_object df = {df}") for field_name, field_info in model_type.model_fields.items(): anno = field_info.annotation - print(f"Instantiating field {field_name}, anno {anno} and args {get_args(anno)}") + if debug: + print(f"Instantiating field {field_name}, anno {anno} and args {get_args(anno)}") ret[field_name] = annotation_switch(field_name, anno, df, from_within_list=from_within_list) - print(ret[field_name]) - print() + if debug: + print(ret[field_name]) + print() return model_type(**ret) -def excel_sheet_to_pydantic(filename: str, sheetname: str, model_type: Union[Type[BaseModel], Type[List[BaseModel]]]): +def excel_sheet_to_pydantic( + filename: str, sheetname: str, model_type: Union[Type[BaseModel], Type[List[BaseModel]]], debug=False +): df = pd.read_excel(filename, sheet_name=sheetname, header=None) df = df.where(df.notnull(), None) if sheetname != "metadata": @@ -254,38 +276,40 @@ def excel_sheet_to_pydantic(filename: str, sheetname: str, model_type: Union[Typ if "simple" in children and len(children["simple"]): sub = get_relevant_sub_frame(model_type, df, name_of_field=df.iloc[0, 0]) simple_child_field_type = subset_pydantic_model_type(model_type, children["simple"]) - fields = instantiate_pydantic_object(simple_child_field_type, sub) + fields = instantiate_pydantic_object(simple_child_field_type, sub, debug=debug) for child in children["simple"]: ret[child] = getattr(fields, child) for name in children["pydantic"]: - print(f"Looking to get {name}") + if debug: + print(f"Looking to get {name}") anno = model_type.model_fields[name].annotation ret[name] = annotation_switch(name, anno, df) - print() for k, v in ret.items(): if isinstance(v, list) or isinstance(v, np.ndarray): ret[k] = [elem for elem in v if elem is not None] - print(ret) + if debug: + print(ret) return model_type(**ret) -def excel_single_sheet_to_pydantic(filename: str, model_type: Type[BaseModel]) -> BaseModel: - return excel_sheet_to_pydantic(filename, "metadata", model_type) +def excel_single_sheet_to_pydantic(filename: str, model_type: Type[BaseModel], verbose=False) -> BaseModel: + return excel_sheet_to_pydantic(filename, "metadata", model_type, debug=verbose) -def excel_doc_to_pydantic(filename: str, model_type: Type[BaseModel]) -> BaseModel: +def excel_doc_to_pydantic(filename: str, model_type: Type[BaseModel], verbose=False) -> BaseModel: children = seperate_simple_from_pydantic(model_type) annotations = {k: v.annotation for k, v in model_type.model_fields.items()} ret = {} if len(children["simple"]) > 0: field_type = subset_pydantic_model_type(model_type, children["simple"]) - fields = excel_sheet_to_pydantic(filename, sheetname="metadata", model_type=field_type) + fields = excel_sheet_to_pydantic(filename, sheetname="metadata", model_type=field_type, debug=verbose) for child in children["simple"]: ret[child] = getattr(fields, child) for fieldname in children["pydantic"]: - print(f"Looking to get {fieldname}") + if verbose: + print(f"Looking to get {fieldname}") field_type = annotations[fieldname] - ret[fieldname] = excel_sheet_to_pydantic(filename, sheetname=fieldname, model_type=field_type) + ret[fieldname] = excel_sheet_to_pydantic(filename, sheetname=fieldname, model_type=field_type, debug=verbose) return model_type(**ret) diff --git a/pydantic_schemas/utils/pydantic_to_excel.py b/pydantic_schemas/utils/pydantic_to_excel.py index 27567bd..cd61fda 100644 --- a/pydantic_schemas/utils/pydantic_to_excel.py +++ b/pydantic_schemas/utils/pydantic_to_excel.py @@ -2,17 +2,18 @@ import json import os from enum import Enum -from typing import Dict, List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union import pandas as pd from openpyxl import Workbook, load_workbook from openpyxl.styles import Alignment, Border, Font, PatternFill, Protection, Side from openpyxl.utils.dataframe import dataframe_to_rows +from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.protection import SheetProtection from openpyxl.worksheet.worksheet import Worksheet from pydantic import BaseModel -from pydantic_schemas.utils.utils import ( +from .utils import ( annotation_contains_dict, annotation_contains_list, assert_dict_annotation_is_strings_or_any, @@ -418,6 +419,8 @@ def write_to_single_sheet( doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, verbose=False ): model_default_name = ob.model_json_schema()["title"] + if title is None: + title = model_default_name wb = open_or_create_workbook(doc_filepath) ws = create_sheet(wb, "metadata", sheet_number=0) version = f"{metadata_type} type metadata version 20240812.1" diff --git a/pyproject.toml b/pyproject.toml index 0ba3153..db66af0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metadataschemas" -version = "0.1.15" +version = "0.1.0" description = "" authors = ["Mehmood Asghar ", "Gordon Blackadder "] readme = "README.md" @@ -14,6 +14,7 @@ packages = [ [tool.poetry.dependencies] python = "^3.11" pandas = "^2.2.2" +numpy = "^2.1.0" pydantic = "^2.8.0" openpyxl = "^3.1.5"