From 05f04a448d495ff9f6bf5d5af5ebf38f91e46517 Mon Sep 17 00:00:00 2001 From: Gordon Blackadder Date: Thu, 5 Sep 2024 16:30:05 -0400 Subject: [PATCH] include resource, delete Series, remove template stuff but make the manager work with templates --- README.md | 4 +- excel_sheets/Document_metadata.xlsx | Bin 38853 -> 38854 bytes excel_sheets/Resource_metadata.xlsx | Bin 0 -> 8441 bytes excel_sheets/Script_metadata.xlsx | Bin 40331 -> 40332 bytes excel_sheets/Series_metadata.xlsx | Bin 13580 -> 0 bytes excel_sheets/Survey_metadata.xlsx | Bin 53804 -> 53805 bytes excel_sheets/Table_metadata.xlsx | Bin 38933 -> 38933 bytes excel_sheets/Timeseries_db_metadata.xlsx | Bin 15568 -> 15570 bytes excel_sheets/Timeseries_metadata.xlsx | Bin 51082 -> 51082 bytes excel_sheets/Video_metadata.xlsx | Bin 16591 -> 16592 bytes pydantic_schemas/document_schema.py | 4 +- .../generators/generate_excel_files.py | 14 +- .../generators/generate_pydantic_schemas.py | 3 +- pydantic_schemas/geospatial_schema.py | 4 +- pydantic_schemas/image_schema.py | 4 +- pydantic_schemas/metadata_manager.py | 273 +++++++++++++++++ pydantic_schemas/microdata_schema.py | 4 +- pydantic_schemas/resource_schema.py | 64 ++++ pydantic_schemas/schema_interface.py | 276 ------------------ pydantic_schemas/script_schema.py | 4 +- pydantic_schemas/table_schema.py | 4 +- .../tests/test_excel_interface.py | 18 +- .../tests/test_pydantic_to_excel.py | 2 - pydantic_schemas/timeseries_db_schema.py | 4 +- pydantic_schemas/timeseries_schema.py | 4 +- pydantic_schemas/utils/quick_start.py | 1 - .../{ => utils}/schema_base_model.py | 0 .../utils/template_to_pydantic.py | 189 ------------ pydantic_schemas/utils/utils.py | 67 ++++- pydantic_schemas/video_schema.py | 4 +- pyproject.toml | 5 +- 31 files changed, 434 insertions(+), 518 deletions(-) create mode 100644 excel_sheets/Resource_metadata.xlsx delete mode 100644 excel_sheets/Series_metadata.xlsx create mode 100644 pydantic_schemas/metadata_manager.py create mode 100644 pydantic_schemas/resource_schema.py delete mode 100644 pydantic_schemas/schema_interface.py rename pydantic_schemas/{ => utils}/schema_base_model.py (100%) delete mode 100644 pydantic_schemas/utils/template_to_pydantic.py diff --git a/README.md b/README.md index d429065..2cd459a 100644 --- a/README.md +++ b/README.md @@ -79,8 +79,8 @@ survey_metadata.study_desc.title_statement.idno = "project_idno" To update the pydantic schemas so that they match the latest json schemas run - `python pydantic_schemas\\generators\\generate_pydantic_schemas.py` + `python pydantic_schemas/generators/generate_pydantic_schemas.py` Then to update the Excel sheets run - `python pydantic_schemas\\generators\\generate_excel_files.py` \ No newline at end of file + `python pydantic_schemas/generators/generate_excel_files.py` \ No newline at end of file diff --git a/excel_sheets/Document_metadata.xlsx b/excel_sheets/Document_metadata.xlsx index 8b91933a455d25b5969a048f30d8c67e6d9bc052..c334e93203fe9e068024c705393a82aef00a1546 100644 GIT binary patch delta 835 zcmX@Qp6S?nCf)#VW)=|!1_lm>OAzr^tNy%qrh3YxqX#NvqbW4KQLLEexvdFv*7mvUgyLgOxSXDNuoenozbSnmu7js z-0x(|wT5xc#YAVpu00LL-V*gOdu|!13Ew=PwP@G6)4PA3SlY`~QD0XjJN^7BwMltA zMyhKC)fOx>Tk%k-{zW#YxNGH{zrhaH-oh-hHtTL`R?fa}pCdO@tFm(P$JDTOW=DhW ztYe9<;aL~|H+^At?$)*cGWYB@n8)IEXnuY0eUy-hoEbAmSzz-gMhiA(V2Eti<(SI@ zW}Fqdzzb$9P}*S$W<20-SVCbr2VvqnvI1FrSoT3CaC}Jwapq#1F zO#U%oq52qB9dvUhu$F2*;9_8~%49$@=RyWZ)1*w+$uFn!D1g$Di_-y%GHwQj%~|Mb zpA`euN=??E1`fK(cGIMpCe?%OESM%J57Nu-`M*`4nStSrAOiz0%&|bafiYq7f@v~f zKkuI=&A4mw?P*dmARV6$WX1F|GBBJ5>f%SyvACasp(01WJijPADL+3OTktG@dbRd4 jP*XZ91A`Qb=J?5h(>1}?)lQdY%$huNx*FTL$si>F5^5d_ delta 788 zcmX@Mp6Td%Cf)#VW)=|!1_lm>9TA8UsmV5{zA2^ah~&Y%?(R;NX1-s z>#UnAGjlIXM}^ArWHX_+67P0DFzG*Ez_9+Kvph$a&3UsT_ zoE4cMutL#Y*Ii&{gZ62W`j|hP6aB=BI?5LQ+IRj}(X(T{EPHHi_nkStd|U6D=p`;! z*;Lss-C;G2ed6yNeAsuR-T8ld6C!(sRp#5Qo$UQ7$}Xquwu@+G;L@M9TBqi&NKss( zabNv<|G(82N{_vJYu|gd_oGsP>7QTbUs+KCB64QT9AyEv&0iSJ*_eSLvRRK~E)SS- zPUHeFn6Xf4hb5R16H^C~x|<=)4Ho%ayb{8gUw<0H$n6ei21!jm-uDcwWJCW>Mld61 zA_s&~J}DW@n0$A#H6juwD^4*3YmAyA&3J!u*%T!(Z}t>vCIybk8w81w#LayK!sA1cXk{|PkWX))Lc?P(Af*-w*Znph7K zGBDSx$jw1FmfiDzt3ERW!y7>c240xkfOG?6{N#nxWWdHAm?q7*bMl>OQV>g~N!u^# zXJDwv(J#+0%1+A9&&Fo(@~2m8F9QuqXJufJLNPdQ;^gXyGL!SCL;PJgU79g-@~r7< KY-cBf)BylKlNSR3 diff --git a/excel_sheets/Resource_metadata.xlsx b/excel_sheets/Resource_metadata.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b9fe85f939855ea46db85a1ac44add0832dbc73a GIT binary patch literal 8441 zcmdscbyQSq+xGwi&d|+~Mq26al$KJI4haeA9vVbIau5U#-H5c5fH)vEQbS0Nl(aNM z*L--+dcN}xD>B@Odwx%hvJTKuDs7ouTX-EiUn@AFE6ZnTB}kJ#2K$ zqdtm9*(<#+7uMLYTg_BDe=`C5!;7_@Yt{QO005GIH^Iuq-R7qaZxS`NJNWP;(7nn2 z8x4yEG4D;}GUXI3Nu!LDeU0yrD!ol_lAja79JInAV}^E0} zLP>3QZ8&rdw(oY%KbmWqOur3pEa^$LKkaWw)GJF=dnz=6 zLE$AIPj%Wc1p2@cy|}r9nXZ)Hwm$DRp!H>Ae>(`(X8l-C-%!_f%;C}agHW%CH z9b$oB$VfZBg~fn0e98O9P3DcMd6~z5)FEe{gxfpj{ib4V5opF`CpV<0Yvf|iShvg| zctX*q7iUv|L8dRy))ZB@f9lwEF+D3|gIJfK69{$YqcLC%9&{;L(lUx#(EHrk=3p!P zM^c_mX%(Y4IT zo9r5Fi@zed>h1!L+mGB%5mI)~f?^W_L_3Ec=mb>BH$=~Wem!CW9Y=81>rjKz;h~2r zVN6}$sxA*xTfP(u`eg-53_ec|wlBjd*=or^Bwcu{E#;IXO=v0%x_;yW@0~LnKaFH} zmLzkRptV&9KpY(<7O@H-c203Ow4`hqo}^TpA`na2vF^+*Ge-u2N7^dR%oC+;wRa|x zm06T>;M~niBx3by1OjBTpsa{A2OV0>LNI?yUc^aVcR9&CSzzNQ5PU8!>bT4$U0OfS zKy5nwgjkvSd#1;V$FSr1ts>>&qsJe=6{H$aV5I&wC-hF3t83i%P1{~YrW73+=|D0b zj(kKa3ma*vc#x(l+10(G>8s39x!q)5mNWIl1p8X%5>K{)H9_ijf9wTbf#Brf~a~UHU4P`1Fd;B=;0#EN_cYyVp~OHn^3BTNsgc5BqA3)R9~m zmJ27zSim<261@h;6A4LPn~7dOI~W5cfzd~Zsi$w{yBPp4v&}lx+SkJRo?U(jiWn1g z1w7~@%GXXDQEO8)5yAs!AZefI%)k6u+0 z=cIT}+vcU6O!nT^*%V87Rk|TJGbL0bv{s(w!$nN*?%huCnzb=2xKD4?Ml_l%(ot?&uUsX6PLgjjZ3ViWG@L#Ru~>3Wwh^>^_pU{he4 zzSHXD@bLL+yKZ&Cj#LU=Pt<-s({^4&8lpon65?;3X8WdFM~K|Xv7CiVxzXao#2y#< zytM^)?yVP9x|sV9A6xJncVpdy<_A28Ra$dhO*JuJ9DcosEVik4uHy8$?R4j%_-Fex zGTXA=S(}>n$8?Ct#{1Qu9Hi?gv=CQ@74RH<5-mzRQdO&Ds$%tg)a^%~R`02GJ{E&1 z2{w?X{`PTkrN>R{(FaoR1|m#>rWWy1KO27A;{7>pd@z@Blp*PNP1CKh)r`zazdPP% zsXpx%`1@A-;*ens zB!9WTiy3BJOdFDRhS$1}-^58VSRo@Gti<&qLd$G~m6SZX8Xud9%6)NXC&|GlcH)dH z)G1s~zPt8@@r>vQhb^d#y zj2Rc>$ikzUdHjH{WkyBT_ef+lZUr#B#<7YuJ`gfbAh)b2uBqsr=EsMG7@+X;o{7yx zDy35kj>VLV$KkrjkI5~|BEJ&Y9T5o8CSsfM|K1={u%v}6_3dhhp!#!X{K8fY4CC(w zg!Ik8Q9L04z!eVw{Pt%7@o{l~?qO$RxGr`i9@Rvlk#*|kA}dHupE~lMEdgCI`VYwLYtU3Xtq9b{`>hvUhAR#h_|XWFWz1Rp2avV^llRiZI~|)xhY{*jDW&vdOF}cx%U?ac+?Jxaysu zi0IZ83u=Z4Eu>y+vW=+s=SKU>Y(b9e&Y8{Ut~~rMcPS=Y+cr)cbjSNF-H@|^lIVaS zZ{$UqEDzH6(pm(u^W%r>=Zr&l|C61m(Y^Kp-c?)C1f+K$I^baY)ecqJVgf1{9UyMT zz#q!ojZle2%UHUDRUTv~q|wf8?sd`4rc1c2&cU3=wQVylJA6uzb7BFZgpoV5!d5QT7tfP6S6fM9dz-rJ@Fgf)@tdA$yj~VA7AntgI0~ir;)%)S`;|b z54HMrYbSDL^Xl}%7A=&aNYS0TxW2qPe9~bz&pEbOS`sWgnPj8;2Q9I@aOQCxa@+M%iIUe zQjqoir7^6QgddfoDzLDGePqU^?Md5eox9ZbV#c6$E(*E7UdIsz@UaPG5wB%RS>VGP6WOz|Z0SFNZ%)qK%;B2SWl9-o zX!6W0y~~NR$I!&qExRj_@|KZ_om+la9HokpiM?BKR~BWYk%@!byRM3q9wW2okV@&A ziTLJrZy^~k-PpNS4v}-&;~Kh;ce*{5hCXQIlKlo`3stfp69e*us?3uCflTuEiOE=i zobt+0GIAiRypj%CRBpdcGRC?&$;_stME!ZZ7Cvm?=vl4rB)X=oy)SHKj${i4ln9l# zATD5w`-={EGrW|+I9C4-$OX1yh~ z>%tG|ydwGnxr@rfn&?cf1OxsFTyGMME(JkxDa?-UQ35K8Au_o z$!MU|&eN-N`&@J9*)oZvry%A_Io2VNC+3SbmTHhAW@H=dE+`-@gpnl*WEmFD$O;F! zhlS;`yahR6w%^6|){W?$L?caRUQ0#HIQ^R3?gjo>`(Kgka#jY*j|oh5V+8cNgGO7vc;PFK#r~4U z6{S{bIlrheGu^1N`Y*YK$Oykv;jh8;t3uK07LAEocFe3gAU(`DId(qK1I(BCU z?SEO{Z8jmgU-Ag2Lb7R9FgfNMIp!fSK4wzeooX;GW@;PrE*LWm%6KOVOd6KV$P5SL zhP}?c^A-#&%02DFBEomRQ1R~B19pV z)b7769M~-!iZ+{~tw`09xNK38|C>UsMtK{2G)Lc|yss`d7M^`DY#bQ4 zlI+iP%2?h`Y580LmD7i zw1_hah`~jz^XTc}dvTeay|mQSy?~L_iJ*_cOP9umF7Z3o$2NXxXvdGIE34b$gDC?E zra`9*quaLd4$+b`j}lW^TKeEtXXN4K!iuRNYzCIsN2Gs_szRnpo>yIt`k^*vF^?Qjp3uG(1-7$?F98Qa!V;#m9Pq34?5QLrUTJ`!KV1xH9hdJF-K{p2kU~RgJ~PIHn~dD$(ZeHua8@ zS4KJ9uBM+XY<%68EBP}P= zt-}7$vyMR^%jU_smV;lJ-4`QC=e~A~2Z2##>|O)MjlFCA!dPSr%WMNUYp>{2Pep%t zIveWh4BHlQ^Wp{s4b~oKI^9>AX_o5Xcb|t4>Di}rh(EpCbsWU;aBlR;aZh)$4@!S) z=Kl88{o(sjXQ$8ct{l}8TKx^*m~ZZxBMFCP2OvXPCMv6oYo41$^xvjR(b%&(C%TU- zd}5!oIS+Qrt_eko8fN(9CSnMd<+4t>1*{C~mTi(T+p|ot@2HMI@B#P3wUya_r{oG?{uLTmg9hoYNxPE#Kg-u!bCWo)C2C zaFUp71;q;I?r(d7$qTDkWmH_U(21-4k%aCtpTP=i*EU8{{2=B!r?Y2SVP< zX>n5()avIfr_$j)SuZJMv(fOyh2fPB#FN{pL@d?nT_SUPxy0XlDx1Dju*q8(W%ie- zCF9HJt>rID$A7`?m-i##qngOvQz}0ti{li5REH<`Ei`3RVZ$hbtfcAQFd&-Ha(m4fI>-H3Z?M{a<;g`g71uhcv|c7^&orNxps!z@8R(ebh!!9 zOw+~d!ZH;>31WLhd;#SQZxyXM?=BlJnS6FU5@|Pj%>gaq*f2SJuT>b*M@qtrW1mpF zo)qj?BpYe^ZA-sN7@_i^f@%I><2&Bpjm(Hp2&cI=aN@e`{G7WPE4ny)+Bkcf>p@&? zJWPK&QbiKtrz3Tn7ya;Z5EOHj+u(`4x7mgl-f3E}l4I%O>5_naubx{&@J6Gc`{>x? zg0-yC3fM1UEByqbNY3z5ZY(6pAOe)l?4_zjiZ0HIdRXs%>w8dB0P$U4Ol|B#>Qle^ zP@`>b8w8z80r*odnL&ST34e|4--Ss@h|pUyHeJhtbzZZQG3RvC7G%R2Crs0u9wBY6`*hv@uhI|k z29!?@`=v28gedQF$A7%4D5+bUAZkInO7^-1je$uH_@Bqr*MR=<3%N%4{~cQ2q`o;J z{*x8}2*K$1cj|x5i*J(O9HIV2zHlx6KMYiFGPv17{}+RX&^w?%82rbE`b{o3Ut<2n zB?M!I@((V5z0%wyy_vfIBE5GVfd7`rZxY^2g?|x})BIn}|CJbT65ovTzlc3){~-P! lqW&hIo1ydjMf5-K{twF>bHV@s literal 0 HcmV?d00001 diff --git a/excel_sheets/Script_metadata.xlsx b/excel_sheets/Script_metadata.xlsx index 66475c7a5ed9db328dffcb7b43ba5b5460404a9a..4977bb150363c74533623777791769ebb188750b 100644 GIT binary patch delta 826 zcmeC)&D67-i8sKTnMH(wfq{b|rCfF*?*UFA69sxpnZT%*Q#s6 zQpuQzLkW-f^9CDDt~D{Pzgil5OIJPbhDP)E=gY5W{yeh7C&kIjH+7~%ISYP zzHjA&RS|R3{)+_Yw{bT0$%Vg}vqx9{`fc9T#q;i~?(z6kG+Xr1K9M~?Bx3jf(s#O@ zwb|HQ>a_Ob$bj$<&+os=NAZ8;%$PaK0&ENnbJQ6agc%q%|6$Z)V+MxEW?hb1JYdEt zkz>4I#$=_5_F#s3QXNEUMV>G>SY%)2N(dvX^)!TGF)^GOBsF=`lxJWi)l)Y!f*FR> zIUo$18OdPAB5s@(Y*-SIAM)_INj1MMT&r$;O5@ty=DRN9!oFi&zs8^Aj6X4Cr zB*FlX$4HM_i@B3|85l}i85kshu?qv+8uy8SbWOf7OAu_(8=yh$N?;+m*@6n7$lt$z zMyfXt14DKhx?brdu)-9m!U?RUnh&@b7_2hU70$^6D_k`jV#&VQ(oCBw!9pKq3(A8m zu|M-@%M5M?hHv@kdgCT5%uxrs&1;Udjm|^{hKd~h;*!do)M9Kt-R}On>j2PuVoVGS zK=a_Bfw67!!Z}i4gZ9mlX3U>_YmOY4_j`^slj`)z4%20{(SyyzZiBlqBLf33J2?80 Q3^SMqWS}lq`EN)oz8XQ>4iDgo#rEmm~Iq)T9@;@~t?*Y8m^a zKl%EB(2bU7|LHA=T*}$hCwKkLoIRo6`DeSUonEwN^CP{-SKF>HdeU@Ve)4Pg|E?Kx zGq=CB?==rUw%Nh#!|VG`#8JE-IWuOCvH%+c!yI)624P^JZ2rfn%f<{0kvv|Oa z(;~-s!Hg+N6YZH-0HrjN>UhA6m3hM4V8;H+l@LaD>uCtXa$-0$NNV!rDbK)4YNl>x z1T&1Lb3hojGm;rW22Wl*(;5*8lb_Et1B)uml4iU=*=Cj!n3p(9nn{6Ua=>iS$&+UZ zDuCk5qt;^XWL^e_(pClr3833yU|Zu}5wODdBCM0|L3}a!El_`(5?EM%wxB%FWs&>$ z&q(#=VPMEEL)R}g*?+b=*!7jOrI}{ufwirkEeICc4;0#12@*0e)2qnM3GilQ5@CR( zD3G!CXC7^t!Og(%Eg#+3*vX1>AjW#nk&e`!$iPsMqhDN7nUh+K&7ke>ue%NaJtfA( zzyLH44jLF+r!X+Y7p3MD>+69?G#586Sw#hR=A^^uM88ZL? diff --git a/excel_sheets/Series_metadata.xlsx b/excel_sheets/Series_metadata.xlsx deleted file mode 100644 index ba893f3a196c7b5a6b3dc69cdfd2689f2e080ab6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13580 zcmdUWXH*l~(l%0*D$)dLLFv+)bRtSodKFZf6agtgP&!0Vid4}k2uP8pAPLfYXwoqv zozQzRL=1$QTO!+_|SURAVyg5#kRc zDuq3s?4wcSK+k}HPL8+YPcH~mrf6=8FL=mEchFrP=G|B8>CTSHUI>CpO@_sNWT(#s zaFeyZ^%!{+Aw{duJ6Bb*Iw-E?{1EZ#ieIUcu|VsbrL59NK|s+2C-s@-Kz_AYCSAp3 z5eh=AX%CBO#!@%v(2)Xus+QI0A5(w|fK1njL+?33Lc;LhQ*Z!zJN=wPUXs3Ht0ZkC zp&Jskp8rOG`C-i?g|H`Wd}peJHY=l-u0A!c0~_A1JWVd2uz9fivAA4`nu}2_?~GK^ zr+Rj)<+3mLdlEE^NgP8o3cM8=Luq%;D{9WmaPeHkK~GaYH1}vaFPw9&nNM`i-qAU) zKUO+4*YhkJ-Dvhz!*l*!v)zmW$prV57ZIGL*6$k8J&mtx?(?38CDJ1>KO(kYLq<9s z-O9gEu|)6{#Ll^{+}ge2jwpjuyWE&3S1*XZ?R(uO6%UT!A?h4g2EnjP;;tzZ6zk&gA3ji!6Wr1Z4n-V6cpL4{%I+0)}f}I@9xt4)t8W zHcf?GboJn~ruOoRLI7q%+TvsJAHDe*o$Dl5Gt^oGEi8@curs9?> zNvlFg7ZYRaM0hNvu0M0nu+_|Mb_w3$wQ}U-jVG?|*=*X5b|E&RE|0lLZ7x5eR;6%DjfQQ7=%Xhfa>;r)jY_A#!y zNp-95mK%ys8Sw3&$cJ6Oo_i&$<^ARv#c2}xwthXMV3>Mk%uLJkuU5~75yBNl?3AgU zVcXi_=RZxraNEgsUAZ!WZ$gwlyFtLNMJEbSb!mtfhdwI{nFTLK^sjvOsu!i&Jm9^3 zCrZ#$naNv;)A>p;0*g(`O`FB(DqKvA zeGrXFHO0*vn!L-nsQ;YISs@#zDXze6PW_H~jHUCVe29tiZU$gnqS!8jg9xqMX# z#8d30%TaSCad443bChYFf8tvAog2oNkXqcrAaX^`{@f*Y?~1cwmG|F2vAf0C-tTYl zwVdJb<_EbKYIdF9`jQ@g9!@&V@Z3h8_}R~~&lup`kqSDgsJHhm$#EGrt=F4Z!h7!G zszW1(WZXp)cC-gsbr$Fa3V^H9vZr!VZ&Ecpxp6PEFiKYK9k(?&l}?0JP><(O3U+h+W}N+d_-1U$ic$9 zud~Ap_Ck16F|+9#&P6R_6SY6a_XL)DQ;hfQGu^N3| z$1rAG;qY8F-PU~RtagW5Kz$B`q=Ab zz}-Wu?5y&ftI{uvOYQ(`?YqF6Cuy2|wVM4(Bbi6-n+YX_jFhAr>H(8ypw_+BPyXz4 zieuz8At@Ff%cK4M2g}XIB{^u7S6m&@Ki;0-$c{`wv|f!02(nFae%Wp$%k1I)flpMc z%C353Qd3UE!CO2%Zi`uU}f`v#`fjJ}ohq|!BYithuRq@44AOpDP{iQbOB$vCHPjT%}` zONRzt@ZC@LYqtA=^Xk&3UsPVU-Lk$44aH8*?|Z&bzNVKR@QLBX15Hximz*chvPv%) zZ9IQ=lIcVrq)X#FX}DuP=cAN;nuZ^=HNq^Pi`5jtnxc;*4Q#&hGcw1N&{CXd^`1wg zU)=MH8`&2P^LSz=?Ks5Ref4Rxs*Ly5Q8RuFbX*(@=MUd;_{4vC3*#8714SPL`U z<4;#E584dlNjn@y=3*yjXoClfZsj?OFaUK=1p}U+bcnmXC2O0=Y+s!JvABI)iZ02v41N%Be zFDXV&IaVa&hI1FyE#>RkcU#wI)t`Wvl8#-@5BgjHu}W#0T5ddTFpvLCDeZh zA9axEhwSmQ-PoGlkD3R7RSsrb7l-60rkkP3L+sUb5zbf1| zdO9kHK+USH4TDdt_6R<-Kms>1C-EDY9ZU}jh0X+Co5JA^06_3fFtE&UnNZ3FX97G}MZPB>hQ{d{_fPM5L;qU-G zzo%;8l{gREQNUxlVWDlRHX#iiMFk-2YV5&}VrwfkEBt&x9TM1BcW}6xNWZh!o(#{D z@B<$vKt0t6fLp)`)wQ8{95>z=goak`OfMp%di)Z12&++19j9=5zny5iF@oAx+Suu^OVeEk}PqK8&Mtl z`#x_H2v!C25}E!#P`r8xANf;k?bKRTT7nY-;sU}0V)jTev=~kdDTaTEe@UcQq}Q#N zqxW{Na&J)YND4?2AW5&D)kdKy9_Q#Ou>ix!D5}<~e z8xQlf8~h_)HXX~fB!eCW?`!1kr!RmOa|;nK6%?x(<&y!G4kGt|wCJSU@k=I3W;$>? zWY}fcH#VuXs&v#h1wRdVx<`$n#;{{fV$NbtW4JMLOB}tYdas_*BR6fnwuHSgw}p7A z5ORm#fF9abXB#?9A2Z;5d-G6nK)CV*h7zN=B(+4fB)mk^E7R-KOVgX2lAHpQBtE1- zk{~IxBwEVWp|Lr*Nrk*w<0p`wAdb)YiO4e6%Gqx-mx4!arogqx(9tQC!^733wL9)& zpG(d&*W&^N_O+?leA^j{sFmAkDB?Ii^&)?&%yg>MSJ3gNvj{Ep! zt#o(^!}2NlISjfO9C|DEW4uC#R=NZ4A3=I)t}l~gb15R;-)Hx-{lWUsyi9%(GA$Dg zu4~{3S=&14wt3PmEa1r=Cx#6}j$y`7ES+0YT%uUg?bYqINU=y6Ns&*fO7TnCNr6dC z+o{=chud99q1KYFsyeF;+$gLA*jmQHGQYz|HbGEavqVv(@KI)>^ z$lu$Y?Dpkt@ zD(T^qjdcx;A;7@S&Fv{(n%42u2DwG?WrD1WOZv?T7Y<*6+2na00ASis%7ARjW|EuuQE)$9S>> ziiU@=m*OK2LvDBk57R>Ul@ex1_{%KD!EmiF)Jp0SmNb{b)z6SRU3qQN{3cS9=?SoUR0c;eg547USp47s9VevbKe5Jfl7vuJ??Fy4l2t`bM~p z5e>V#$O?Ibo!BVlNp(sHm6Ogn-Ff`yZ{wJpTiFi&Z-e! z!*iA|+^pn`W4M(O<*RU$5=OJ|TVE(|sSB=9xHW4@QeMf@6Ef|#WZ#j$YXJo`3_<~1 z(2-0V@Bv1Gm>8zD<+b<^l5Y8`^QF+(t6#WAL8C6vNfBWuEJPuq&i{@gK8xi+xJ}7f zck;(P|B-ZWmv{U7 zv1U22W*uthf zK-|#A!wq+L*MuQFpa^#?d&zf4@BsKA9suwpz;C8z0tqHF11{HnKCqHEeDNx7h0l^NJYX#vn&3Z?gREy}zNU5Iz zbKO@Jzo|=#7S|Qe9LRb0IIzV%iF? zu;XPTKW`+!{L6&7FAIJXLk&hN4%K9sW>Ytu=_OCH6OX2N%EKZ`{+frCIXo|m`2+dO zth2J=H@;8>Hs2~?pbbAG#6;c7_LG*#>TK}4ea*{e2m|g$B)+x*IaU8jMilbKwE3vW z+&N43mo4zkL2G`(TAmJmXjW$mz8sP#U?a(NL^7gvU?Y+m^jqq#g@oKD?6mx5<_1)Y zbuj>?7RYjE>xay-sOXOG_Q8A~>)EBxAnhM+LY= zGLo2a(LK;$@OFLhw6Z`zu>)gzZ1DMGdhRLT;{77b;|@OhDgbOM0q`^nkxng6ezajw zm$Cf|;jxMhn2iz#(dEi!;iCO3riIIbO^MtwGqih0UGKm0GvRY=-7sz;k0@U2cm%R4 z86I5QSkIw)3a`;ucTA258IwZ`N)Ux$j0t_qG(~Z=S#>}81+GLl^gd8Av{FbO!TQtp zdhsmyK$Q!}2H1MP%XIOb0~VId5bC!liJzNhdhq%W98VZAbpoHqG-7C@a3448eb~2Z z@SwZ)n3$N44WR@jh!VAqMfI)7aT`A93p^rYN@q*P-4TsHh%I#+1Ayiz7xc7woC9qr zj*Up)Uf23#Z60)06&fuL#QSbzmks4D=3$O1>OqPuR+M*-lp~q%O8puSvUGja(580x zuxmCJvHmhn5CKyL-T-Wbv`n>=9F4~z!cN4Aag@1D7~0&MFgz{40Rm0OgW{kH$4mtv zQv*NEgH>Ik%)Z6V+5)uY7tL6X>$&=4=HChsBG|NLjvwS;RZrk2mRgA;2Ib%Gi*ACd z0z)pIqnC{W+BSyNHNXjb`)erdQs_7B)&*v+B{Q$1l10j=roZXN6&SL5VY47gf)U|t zQLAzIKJ|Fjsj06%5gnq;=5tBdshA@|3kSw2j+QICA=1RmuQ|5;t)N!+@Vz)4xKOv1 z=4j3Wp^CUe`?ClQvyKQ2#D2Zgxy&l5mH-A6pEfkUV?eA``!``=Ya`WI)9NiU#os3S1sy(4iqQwm2;!5ixK-xX!1uc;oJr+T3dL_-ch)rpVm z(bmUxwgyc_r1>{zqPIEIOYc#GRqcVTNPF8O2H}6<8W4&dl={&(p=N_S+`ys==#gnr zx+)Q=>DkSHkWwBJmpM_M9$2-%Peqt*?i>jsY%301u-#<4e4O3T9be6ZF=8~1&A#P2 z#p}E_h8QfPISwbpmadN>GIqWNo#GW#V(Wi-++`+);T52=s=wrq1FS80Vyc0o_%y=E z_76Pn)i)h%Sy^(1$s4XdS3vQ{1;f<0VSm1FQ_zffSYr6Dv8B_Hrs=3< z;IHJRTvF6c9Kikod|GpVZE2FYS&u^2Z4$}#Wc|#QBNpzGv0=a1E!o-uzn>>FU<*)_ zu^xFIRBSqM%=IDCk_8bJ{u$z+$Pc#{C6Md==KY#8$20@$CW1>mvnfpHvSKmgu|><3 z3LUfbXLTAWhKEiS|I1VCN4BSrIS69xNq}*KY-`at6e=ol#3ET9qz$XwWk=ho9>2^8 zDlO`)z{T?ga8Uhl*Fu$xK4UPe~iT3YK^-UJ9t)1C3itex;a~myO z7l}K}BZ^lm9)X%{I6X4-@tEvl`^viG=f2E|xGxh`vH}jgB{^P&VGFVD2!$r#thf$p zlzfj_S}T{Vk0KBiWRKn-G4@Gw*#2FeH9>*Ek=3Q}KhQ-rrCE#MwOHL)^^RH!v3+X# z`vm2buj&p*!$6JQ#ph>QO-CjFFk{+Yd%M35 z7X0i+^np`tr%CdU>Zft`cKpWEJx7$QDe0 z=qz#;dCQ(Eoo8iq7ZNR9&<1((+~h1I?zt&-_j41I8;}?iQ+6HQRUuU;{V}7HI$qZv zo3e8bNTv>eZY(T}#iV;_@|bp0IB7i<^3l<|E+p)v-X)}%mnAC9@1*%&NG>nC9Fp+W zbQnVCltmsTtB}_$MVF(9yQ^uCSsbn7-VO0qe{FoBJDt&#QAg*dk+hD1u#m3K^_#{Q zb@W|@#PV{sAaYJu&j~f3r!#Q%(ra}wfZt3iBaS&C?a#)%UB+TeVer~STu4So1 z&Xw!JG&)*c!W2%|g@r|QG~Wx;<>ftqWR&K+3z6|FNj(_CPWxlv*RPFyET|1cvT=m(|yWj5gv9Ba5adK`;QwsR{(1ZcOup!2p063_1a>hfG@sEoY)})$Y?^ zzk?n^z+qg$>QEmJyOCXso5$0rhOiOV=dl~aN_(dOQFFNE#bIp?bVoM^f5stEeT$Pr zC?0fB?dFA^;X^&u|e;F>8lT#ez*9;Rn^KMNd+ zG6VoOmqClzjaB7>?eqVvGNgOR$VA{HYfI`r7djdoE4sAUF z#6N;-_K?^glgpr$4)~kU&_iSt5V(8jN9aIJqHXal_)P-IKRa3ud}{sq$Vf;wh`p|K zzdBlc9t5~M`TT5#oV4@+rAeN_i$nt?8C{H1uH?qQH_7uTEDs&EV50qa*Zpp~_!uk` zA#nLCr~ZgRp%%n$dM{w((=w~m*qUm=^d-nWkcCfwK3;2mDdbG~I$iPg1A|LuZ2H3H z`sJ&I7hO0sulJL_TT)j@`=qX)^4&uBV=?I7WT0COi_*4pBkPA(dCZSj#{EcTUJVWg z@u+OewA_TIaOmA@-!b66)pg7Jy^2SD_IqH}TWl4FB)WFyUWd8N`Ij$GevUinQ*M$v zT-$6)vM?!s{sK$+Ttz)3tlvxJOXDpvyYwB!m|#ImF^;!#=5y^2I6HL!>$O;1qm*JH z^BAJljo|&{=?fOS@^1H1^OtE|X(6SFi(SV0lJ+RnSTdDJa$!~>{kLvl^bK7DT{{0y zh4CIPW0txmZa1$M+2uYDukNHT;jI>%8XVy;4JI3Tzs0br+hyQnD3z)8bRH2oA*ZJ_ z0>OYt@HSY*H)-h`I*8(WWQ(|xP~p`9J&lnEveu_)<5oHMqUHCLry^Ca;qJSUGFB3N zMC2Mw%N>=G(=Q*BvEeui1oc~Y{JOoMa%4=H9{p5`FJ!9XBfxj_6D4o6-Xgl8^+Mmj zr#k}_iZt{m>s4B%yqf~(&0H%Z<6F#P&^VLangZoXw1nwY1ZceO=|qE8zXoN{dgdsq|tjnnT5ES@^SZAqYWBYG1V`sgOt z&nKU!)r}^dkSWaKAM*~G7BL08fU_x(!`Fdpr%Bhf=uil+{oeUmC8szk<1budrBJui z*c_%Q)eRmMPs}%8paU5Xlh6_hw+m_yJmxTM)FMzW-OW;JgSO_@K!}uT7uUH{BPk@rM({0L=2;| zq$DIX#8+EyCwCtS@t@Ccp6hy&O3_}1F*@1my>BCx`Y>8@HJYaS>2l8Nwj_tPEo}T~ z6ORjnE+HdB3`Q@T<;&@V%yVy-Y_l4>Ay-UN>LzTLasRF5MLFHeV%f6KzHE=45naOY zcZ#)o4p-^<9A4j|PFk<^DR=UtX5K!r3vEs1)wz0W?EWJeT|(akf_gCZ4eZ0W!veYQ z`MsEJo@Z~hHN8Z)l5+K8eIYQljLF7aOrC>%x&QPE0rTY@v+5rFjbz@_uOpgN0-%yT zihuG(&Wo`*8Df9A2yvqHzj)(kC!9S9!5If8@!Fk=MzIcXT*ITzqd32rf z%+;VBcE5^OZRT^HB``r(VZFHs!H$vPt_GX*QGjo6)VEf4)}=S7K?AZa{Af+-U$+Ye= zsk~I$$6|rmYl-#OFHPTJ4b-&Tc_p2E?}mn*zS=c-KjwU@ioF$gt-)?aT$XLv%;RM! ze-&CvN-H!e2l&ZS=G0kTC*#YHFMBf#Rd91&*RWo|_{5OGho`?J<$e^1+E;vgg}u$n zT;<+HRoB1}nVNXKvctw(>|6%hR$|2_v`6Bf_{yx$*A^3TJs@Jt{tMq#}h(hu&Q1`>B>3X3^-TXnV@GZcA9X0JbzVA ztvEDMVUu1m_ygeGRY&2Ai?|Z=}7Iid20{v$tZYTUKfuO`PM}9q|G{KDbpVG0F;O<)yeuu_fj{f9lPUm$X`_D zK;En5ta>BhDl;JB^3bCfmXVYhybrG#FcR|LM4ME2pPCD;2|jbtpVW|In|&{^BJ9?N zm=l5vltb0n&1BhIRv`7!*}NvMpPm2d{M^hoz0RhlP>pwIbGwF>*OiQ?WzpxiL}tx) z10O-+BHcxp8m@J?zAYnNbve<~!|q&5`_O}VB@3INs}o0++kWyzu#OT0{Yj_BO=28|2>x9wST`f z@=KS5;H#~7QY+#y|wml1C?Q1lz$oc9~*6dxAJ>A z^S6~pC(sxFvhtte=6BWKsrx^wYU2MO^4}GIr^5dz3QGKYz(W nU!wkZJHLn0f9$wR{f~iVq)S1(2}43cMSQ3cNxWM6=ez$06&SqC diff --git a/excel_sheets/Survey_metadata.xlsx b/excel_sheets/Survey_metadata.xlsx index 3be5d6f6f46038fe3590ae299ac18b95c354559a..23a4e8173289c49f8997a0f1649880dae7090186 100644 GIT binary patch delta 974 zcmZ3pgn8`}X5IjAW)=|!1_lm>)Ni51E(}ky-J^JwP&lfA*`PXw*?n<4Rvc+vi!>nBk zj&=V@%utCDd0P0*pYg4duRzI%9q0T`-v0D2@$-z8lct(mMn6s6`cS(tnlr4nXI=c? z^o6f;_O4xTTvV25!#SnJ{{OOlj3@yRIWuOCvH%+c!yI)624RNHe;D=In1La(S&!o_ z511h*YQhg@gsMDs05fb;Z}Nf}lEu-YU`Ba|IYeaR)H)u1P=HQgE!BL$#lT>d$uK!l zMPhTg`oadClJiT+$OiCP+4_*+{*Q?0Q3GilQ5@CSHc;v-B73z%q z3=9ra85ksh=>P_{H406ha84a;#4ex_o+^_a7l}@Ob50N>H0iuFlVK`I#Lxtrsk_^J zGc-gQ7`|UYH*Pz zq7W%i(VVWf2&hnMvi}8eoK23qAkAd67OZ!|1wna`CH7|?ZJEK%!0;`ffk6`HXCS?; z@x|l=7c{`$etkhYa{CbmhKd~h;*!do)M9Kt-R}On>i|%r7!v~n&^$P3U=%#Tzz|=Q znp3Q=2O`k|escUpX~tiZt1rrd?V58@n(6AP$sbP1Xrn2N3|zWa*`ASsVI>DBNg_Z4 O6+yfcc@J;`sYw296}#U}yrx-y*^k*ZR?o+}vE_)0 z!1fa#w{vm)$z+=E}_6%hFMy zvO3vJ;H|*B*bTGV(=8gV|5(V+;yQ2r0io>0k%u*W{^t0c^J?CFslF{uNP;EnM9^tL z7qthcj98`r=ggQi^G&jCaDD9c)L6^H!){FX=E=n`dwO|S@0#c(E}?9yY?t0NN$G!D zzHjA&Wf61J>%~v#v~f=IpLgw~YNf8c+3ws$U3(&y{jAkGH8z!Wv z>|OsqbI)$WIF?mS@&A|YV?gnL)5m zyr>C3^Gcvdn95TJFvBkOCNG#FRU9n}W>j>T^YQ`JMDE`|Bh{OSfg!tWvfU!d&DW+z z@qiT=FB0YkGd$O>gfPDDI0IoEITFqclA0`c;u%;8`^g)OV8)|UQV_=f)A@`bw@fZP zYmJDm$y?8wfkoe)m1cY}nd_Vqn5TbEnn{sk^1-R128MbSxj6yej7%a7@EDK0xTiv$ zk)MIVVJZWI1TY=Iz_v!g$rI11gPpP)XoiOh$c)Kv&k2Hs7|%;H8Ki=RjL!=yfIPdq z%{N0sl!4*<6?Bt$iopsi&qMU~0`+EffQ9x$^&$caUGI{qK!sA1xh{Z1ZnESBX(rW0 zV7=ZKAjZW5g{;?tgbXaveF$=w{h3EwW^gkwe9LEGkc4>;NN;O=KKbAU4X}6KT##nm zHksq1j15S~cK6p^2Y@QXm>3v<8sVUUQQ!mvLwr$cPO-ioh(r!dP@E-PlxF-nx#prA v*wDEbrJ1grn*8yUj5eCW$iSs*mF*cB7*=wC(;1QtA15v~t;ryazadROHWq4*7{!wdyA`-ddz1zHF|8Q<6|e z^puKd*UN65fxEY>>qeWiOqYH9|8siu#ADo-*=jh~9(NEu7Ww{_8rS3Sn~f*98#4G_ zl;{XuTvBkIZMmlHVYxkK^Q-y~oc_cpGilo_6P`nN{Exj^vL!9(*ZP2drfi1nk{bam zVFo)kv>ZHIFMTXU>34*0eaQ7vS9AHDTLhcGKVN>m@aLW^rI*JNmMt+o#B${dn`z9G z{#xCKt2F#K*K=j0XErTZd_1(w@7}Zi*KgH=_dF@D+(P^!Mx#taOQ&3YU&c)*O4 zA{ThUj5$iYR$#`Om^z4*Zl*9dSj4$xC4}+5{xpQKw>z8}BsE#Q{~3thG+_rLSn2MG z91zB_Ny%WwWSuG2@Fl|j4{c@J;^smMUa=VlYHYSmx%%W#R+^RaGhIie!4 zUE|}nj%A%fi_7x;qqgO7=*c|(|2aK6bIS8mtTlY+k`+yrt(QfVU==} zWVcI+=AGRMs!P)?8m7N;mJ?XDX%BO^%2pN5#RlIu8(M2`oi+8J@s`OB+6TmR%S0MQ zdDiJ9Zuoe^c+#aT1!U;WI@9R3=Et9GU~B00|R8U9>)wGFyo}i z1zs>?juNjGn6W0N4kD$SDa;KPaV}X2VZ5(D4Por<4rc~QO&0He2BJ4j*ue-^x_crA zgmG+AGMF)0XNomE3MS`IF$Zf~ImHf4znbC*mVL+{@5Wp|U*rnsBN7yV(y+zMn5(SpU&kp5v6wes=dYy=lD7245!|T59w9dHt7M z6__BfLdo6KU1Jl|I-UC9f{*vx3}>FuyVaR>+bsLJ^7Z_X1Gg_fKE0zR>-C9UZeCMN zGz=y@Q(N&+ss5D~r+wEao8P+~(ysXn)O_0Fr+4!8pTy0#)mEK3?K(^Ua?w`pX^-+$ zyN>>T@at;5=&oZo_U=7cA*jJw|IfFU5hWlZXU5D?7GT@_fzh0e85km)wK%%9z>FP6 zAI@9Rz<`H= z2F81nRjif3MtNFGGs;g+wN?l7`hmQ)lUG~IfO)5^rL9=Z7#QM z13VTZ7f-(V$e59VL7Rzzfgff*kZxdnJ6YaF4s48rjWnb0Ej#0TZboJn1pey3 zaE__jLD%w?MUw_gOl1AC2ao3S7H^u6yoKA8$Jy|T@pLnx55InVu+g7?TW{052}V~X zrbu`c%U}7TR4;X#ZJ+4u&u`W<%?zET@bgLLu_{yff>7u zLYP5}$%1CjzyiO_Vi~~#Q5KKD0``{P5P@DRYeZ;HzG!6zR>oy5&3I?Bnza&`=WQ*` zB+D_`nnQ{|z?+dtgaIBGlP3ZduAaQsS_Z7}jJ32ClNkd;d{JsnvA!ONMAHztc=F9h z#*7RM+Dr@#{JW)WdvVBla#E0>+fdw>H-Mb5c4?d!y=+VxDg7U_r!s85(MgXNK! z>b0CDns>?$W_q39z`XwK@_ULOHtk_cd^0u&4$)mYiCaVr)@I%fYbpt*=-_? zqCDqx5(_@wZ_}K4Qt_Jc)%x$X7sG|8nYyDtD#MOxfZ#qhZ#r1;@Jo zBxb0@h&(O)=Fj+6$ycD{!;W))C!T&YqW{x3POo_1tqn_e#Bv_9ce}p+-{uRY z+wNTpms-vJ*mHr(pP%MmQGFjdGiHvm02>3t9CZc;VPKeS{>P}x#taOQ&3YVPxxoxI z5gUFmBUSlu2$-Q=QO5^ntZ579W=;nxW}9Ro2xd4guH)eYibPIeE!BL$#lT>dIoVJ| za`UtG@43JVZtMzY1~DcZ?Ry3mklufY5zKgSfDgj>crcw2Wc=jl!`AS)nLO{XIau(< zVLLD_dBhn^$3y9*Q2I5LHazMKwx=9Q?}gIL#~|XK$1K3|Eyv8j^xk7uVEWfFS1|2# G+yekj?dk>q delta 560 zcmeC`XYT4}<_+*>W)WdvVBla_6~sG{_W%cwid-w%GiBmc?Rr0sMLOaF>Jui+V0k2_ zdM|9P=i(zP?%uvUZS8FnJ(wBsBEu5=!+(lR%jI2wn zM42D97hHB*o>_5BmnHXGRri6@pZH`ZZJTA%bI9h*u{SHWq=mJ;zv07_%^+R!LX%Nj zVov0t7nkPq1{+PTU6sD7{`Ia!tJU&ubR2yC`t{3qi1mHPfd&~+<|2pOhrk##^007jv B=`8>N diff --git a/excel_sheets/Video_metadata.xlsx b/excel_sheets/Video_metadata.xlsx index 1486a557f3a0249209a43ee707a2ebcbd014c15f..d21681aa57007c048d8d45b8d69cb57359705ef2 100644 GIT binary patch delta 590 zcmX@#$atZVkvG7bnMH(wfq{b|y79yPDw%?(N8L(T`s$I2JYUzTsPXBWxDL+|DV&NC%#}cYm9Sxthr&S#p{?* z_r|)(GBfrvbW|uTPrfExD*tZQ1C#IPH?Xh&82p~|l+Au-_cgs~yv+t*CmUL7^7?uG z7hM&YAh1Ho-PB!U6O*1!eQ?3Y`)!6ZPw3t1%(`us{apEae#n8}pD(}OQIqxh#4b0l zDJB{Q6P~H9c&JqWMvF7twQ|Kt}ew^W`#$*udpq1Gp4=Jalp_@z&)qSqWL z((O8W{K2oQ^@6*O-I%-g;*LE=^H{wO&94u>j}jD-Gh^l`Z~n+=$;J!}k5Mwf*)ibcbcdJ-Nut2!YBd~yttv5uV%g!1Rnv>7jnaP8~7G$mf8w0}}bp{4u zV8Fvb1LK3q%JxbMAT=_M6XQbk7#My(XJn9oDFD*j8WlMv+i*xtuD9m_+u3I?&A58< zDtj3)@1(u76_XVMLwr$cPO-iohz#&%WD;S3M{wlg$u}PvGcqt}GchpmBbnU5_W*;`-Y5VVW(l&6; z6hog`r;qtb^Mu{FarQ)(v{-#Yw#wh{#hc9D-q2%?U-SKn~u9cYXkS`dJ&&Ln-l%SihAxk@g5G~`Fdt)FUuZV+kIzFKcCyXMknZ+ znUpG9&>JV~y(i?4?`vG0R6Of%tioO|VHJ7X+)c%wqVC_#m*2EaYjtVdy=RN=b!}ba zV)cIVYxn=I8FSv1-L>z%I`yMYhV+lG`ESKhLLzcz%p7Hb&7T=9*qDJKvRRL#T@TFI zVHUy+Voc_@dIlEwVHL{=7KpHU1QxKh^@a#^+gT$*bMkpRGkH+hf(>S4V3?!Mz#t3^ zco=A4ygymRUI}cJr@b_j0>@-q4ynlv_B>#retT)gRg+iS%Yb>O?4_+3tr!^Mi&Arn z_4PnxfHxzP2m?GSBNtD;`N)`&fkB&zfq@@p36O4Jd^K6#K@Mz;gM&1q*W?%nO|Z*m Zw>h;)19e str: + metadata_name = metadata_name.lower() + metadata_name = metadata_name.replace("-", "_") + if metadata_name == "microdata" or metadata_name == "survey_microdata": + metadata_name = "survey" + self._raise_if_unsupported_metadata_name(metadata_name=metadata_name) + return metadata_name + + def create_metadata_outline( + self, metadata_name_or_class: Union[str, Type[BaseModel]], debug: bool = False + ) -> BaseModel: + if isinstance(metadata_name_or_class, str): + schema = self.metadata_class_from_name(metadata_name_or_class) + else: + schema = metadata_name_or_class + skeleton_object = make_skeleton(schema, debug=debug) + return skeleton_object + + def write_metadata_outline_to_excel( + self, + metadata_name_or_class: Union[str, Type[BaseModel]], + filename: Optional[str] = None, + title: Optional[str] = None, + ) -> str: + """ + Create an Excel file formatted for writing the given metadata_name metadata. + + Args: + metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently: + document, script, series, survey, table, timeseries, timeseries_DB, video + Currently not supported: + geospatial, image + If passed as a BaseModel type, for instance this is what you would do with a template, then the writer + defaults to a single page. + filename (Optional[str]): The path to the Excel file. If None, defaults to {metadata_name}_metadata.xlsx + title (Optional[str]): The title for the Excel sheet. If None, defaults to '{metadata_name} Metadata' + + Returns: + str: filename of metadata file + + Outputs: + An Excel file into which metadata can be entered + """ + if isinstance(metadata_name_or_class, str): + metadata_name = self.standardize_metadata_name(metadata_name_or_class) + if metadata_name == "geospatial": + raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") + skeleton_object = self.create_metadata_outline(metadata_name, debug=False) + writer = self._TYPE_TO_WRITER[metadata_name] + if filename is None: + filename = f"{metadata_name}_metadata.xlsx" + if title is None: + title = f"{metadata_name.capitalize()} Metadata" + else: + skeleton_object = make_skeleton(metadata_name_or_class, debug=False) + writer = write_to_single_sheet + metadata_name = metadata_name_or_class.model_json_schema()["title"] + if filename is None: + filename = f"{metadata_name}_metadata.xlsx" + if title is None: + title = f"{metadata_name.capitalize()} Metadata" + + if not str(filename).endswith(".xlsx"): + filename += ".xlsx" + writer(filename, skeleton_object, metadata_name, title) + return filename + + def save_metadata_to_excel( + self, + metadata_name_or_class: Union[str, Type[BaseModel]], + object: BaseModel, + filename: Optional[str] = None, + title: Optional[str] = None, + ) -> str: + """ + Save an Excel document of the given metadata object. + + Args: + metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently: + document, script, series, survey, table, timeseries, timeseries_DB, video + Currently not supported: + geospatial, image + If passed as a BaseModel type, for instance this is what you would do with a template, then the writer defaults to a single page. + object (BaseModel): The pydantic object to save to the Excel file. + filename (Optional[str]): The path to the Excel file. Defaults to {name}_metadata.xlsx + title (Optional[str]): The title for the Excel sheet. Defaults to '{name} Metadata' + + Returns: + str: filename of metadata file + + Outputs: + An Excel file containing the metadata from the pydantic object. This file can be updated as needed. + """ + if isinstance(metadata_name_or_class, str): + metadata_name = self.standardize_metadata_name(metadata_name_or_class) + if metadata_name == "geospatial": + raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") + schema = self.metadata_class_from_name(metadata_name) + else: + schema = metadata_name_or_class + writer = write_to_single_sheet + metadata_name = metadata_name_or_class.model_json_schema()["title"] + skeleton_object = self.create_metadata_outline(metadata_name_or_class=metadata_name_or_class, debug=False) + + if filename is None: + filename = f"{metadata_name}_metadata.xlsx" + if not str(filename).endswith(".xlsx"): + filename += ".xlsx" + if title is None: + title = f"{metadata_name.capitalize()} Metadata" + + combined_dict = merge_dicts( + skeleton_object.model_dump(), + object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True), + ) + combined_dict = standardize_keys_in_dict(combined_dict) + new_ob = schema(**combined_dict) + + writer = self._TYPE_TO_WRITER[metadata_name] + writer(filename, new_ob, metadata_name, title) + return filename + + @staticmethod + def _get_metadata_name_from_excel_file(filename: str) -> str: + error_message = "Improperly formatted Excel file for metadata" + workbook = load_workbook(filename) + # Select the 'metadata' sheet + try: + sheet = workbook["metadata"] + # Get the value of cell C1 + type_info = sheet["C1"].value + except KeyError: + raise ValueError(f"Sheet 'metadata' not found. {error_message}") + except Exception as e: + raise ValueError(f"Error reading Excel file: {e}") + finally: + # Close the workbook + workbook.close() + + if not type_info or not isinstance(type_info, str): + raise ValueError(f"Cell C3 is empty or not a string. {error_message}") + + cell_values = type_info.split(" ") + + if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata": + raise ValueError(f"Cell C3 is improperly formatted. {error_message}") + + return cell_values[0] + + def read_metadata_from_excel(self, filename: str, metadata_class: Optional[Type[BaseModel]] = None) -> BaseModel: + """ + Read in metadata from an appropriately formatted Excel file as a pydantic object. + If using s standard metadata types (documents, scripts, survey, table, timeseries, timeseries_db, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided. + Args: + filename (str): The path to the Excel file. + metadata_class (Optional type of BaseModel): A pudantic class type correspondong to the type used to write the Excel file + + Returns: + BaseModel: a pydantic object containing the metadata from the file + """ + metadata_name = self._get_metadata_name_from_excel_file(filename) + try: + metadata_name = self.standardize_metadata_name(metadata_name) + schema = self._TYPE_TO_SCHEMA[metadata_name] + reader = self._TYPE_TO_READER[metadata_name] + except ValueError: + if metadata_class is None: + raise ValueError( + f"'{metadata_name}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())} or try passing in the metadata_class" + ) + schema = metadata_class + reader = excel_single_sheet_to_pydantic + read_object = reader(filename, schema) + + skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=False) + + read_object_dict = read_object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True) + combined_dict = merge_dicts( + skeleton_object.model_dump(), + read_object_dict, + ) + combined_dict = standardize_keys_in_dict(combined_dict) + new_ob = schema(**combined_dict) + return new_ob + + def _raise_if_unsupported_metadata_name(self, metadata_name: str): + """ + If the type is specifically unsupported - geospatial or image - a NotImplementedError is raised + If the type is simply unknown then a ValueError is raised. + """ + if metadata_name == "image": + raise NotImplementedError("Due to an issue with image metadata schema definition causing __root__ errors") + if metadata_name not in self._TYPE_TO_SCHEMA.keys(): + raise ValueError(f"'{metadata_name}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())}") diff --git a/pydantic_schemas/microdata_schema.py b/pydantic_schemas/microdata_schema.py index f3bb4d3..28646c2 100644 --- a/pydantic_schemas/microdata_schema.py +++ b/pydantic_schemas/microdata_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: microdata-schema.json -# timestamp: 2024-07-24T21:06:25+00:00 +# timestamp: 2024-08-29T18:53:43+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field, constr -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class AccessPolicy(Enum): diff --git a/pydantic_schemas/resource_schema.py b/pydantic_schemas/resource_schema.py new file mode 100644 index 0000000..de2a619 --- /dev/null +++ b/pydantic_schemas/resource_schema.py @@ -0,0 +1,64 @@ +# generated by datamodel-codegen: +# filename: resource-schema.json +# timestamp: 2024-08-29T18:53:45+00:00 + +from __future__ import annotations + +from typing import Optional + +from pydantic import Field + +from .utils.schema_base_model import SchemaBaseModel + + +class Model(SchemaBaseModel): + """ + External resource schema + """ + + dctype: Optional[str] = Field( + "doc/oth", + description=( + "Document types for external resource e.g. `doc/adm` \n* `doc/adm` - Document, Administrative [doc/adm] \n*" + " `doc/anl` - Document, Analytical [doc/anl] \n* `doc/oth` - Document, Other [doc/oth] \n* `doc/qst` -" + " Document, Questionnaire [doc/qst] \n* `doc/ref` - Document, Reference [doc/ref] \n* `doc/rep` - Document," + " Report [doc/rep] \n* `doc/tec` - Document, Technical [doc/tec] \n* `aud` - Audio [aud]\n* `dat` -" + " Database [dat]\n* `map` - Map [map]\n* `dat/micro` - Microdata File [dat/micro]\n* `pic` - Photo [pic]\n*" + " `prg` - Program [prg]\n* `tbl` - Table [tbl]\n* `vid` - Video [vid] \n* `web` - Web Site [web]" + ), + title="Resource type", + ) + dcformat: Optional[str] = Field( + None, + description=( + "Document file format e.g. `application/zip` \n* `application/x-compressed` - Compressed, Generic \n*" + " `application/zip` - Compressed, ZIP \n* `application/x-cspro` - Data, CSPro \n* `application/dbase` -" + " Data, dBase \n* `application/msaccess` - Data, Microsoft Access \n* `application/x-sas` - Data, SAS " + " \n* `application/x-spss` - Data, SPSS \n* `application/x-stata` - Data, Stata \n* `text` - Document," + " Generic \n* `text/html` - Document, HTML \n* `application/msexcel` - Document, Microsoft Excel \n*" + " `application/mspowerpoint` - Document, Microsoft PowerPoint \n* `application/msword` - Document," + " Microsoft Word \n* `application/pdf` - Document, PDF \n* `application/postscript` - Document," + " Postscript \n* `text/plain` - Document, Plain \n* `text/wordperfect` - Document, WordPerfect \n*" + " `image/gif` - Image, GIF \n* `image/jpeg` - Image, JPEG \n* `image/png` - Image, PNG \n*" + " `image/tiff` - Image, TIFF" + ), + title="Resource Format", + ) + title: str = Field(..., description="Title") + author: Optional[str] = Field(None, description="Author") + dcdate: Optional[str] = Field(None, description="Date") + country: Optional[str] = Field(None, description="Country") + language: Optional[str] = Field(None, description="Language") + contributor: Optional[str] = Field(None, description="Contributor") + publisher: Optional[str] = Field(None, description="Publisher") + rights: Optional[str] = Field(None, description="Rights") + description: Optional[str] = Field(None, description="Description") + abstract: Optional[str] = Field(None, description="Abstract") + toc: Optional[str] = Field(None, description="TOC") + filename: Optional[str] = Field( + None, + description=( + "Resource file name or URL. For uploading a file, use the field `file` in formData or use the `Upload file`" + " endpoint." + ), + ) diff --git a/pydantic_schemas/schema_interface.py b/pydantic_schemas/schema_interface.py deleted file mode 100644 index 5bf3a13..0000000 --- a/pydantic_schemas/schema_interface.py +++ /dev/null @@ -1,276 +0,0 @@ -from typing import Dict, Optional, Type - -from openpyxl import load_workbook -from pydantic import BaseModel - -from . import ( # image_schema, - document_schema, - geospatial_schema, - microdata_schema, - script_schema, - table_schema, - timeseries_db_schema, - timeseries_schema, - video_schema, -) -from .utils.excel_to_pydantic import excel_doc_to_pydantic, excel_single_sheet_to_pydantic -from .utils.pydantic_to_excel import write_across_many_sheets, write_to_single_sheet -from .utils.quick_start import make_skeleton -from .utils.template_to_pydantic import pydantic_from_template -from .utils.utils import standardize_keys_in_dict - - -class SchemaInterface: - """ - Interface with Excel for creating, saving and updating metadata for various types: - documents, scripts, survey, table, timeseries, timeseries_db, video - - Retrieve pydantic model definitions for each metadata type - """ - - _TYPE_TO_SCHEMA = { - "document": document_schema.ScriptSchemaDraft, - "geospatial": geospatial_schema.GeospatialSchema, - # "image":image_schema.ImageDataTypeSchema, - "script": script_schema.ResearchProjectSchemaDraft, - "survey": microdata_schema.MicrodataSchema, - "table": table_schema.Model, - "timeseries": timeseries_schema.TimeseriesSchema, - "timeseries_db": timeseries_db_schema.TimeseriesDatabaseSchema, - "video": video_schema.Model, - } - - _TYPE_TO_WRITER = { - "document": write_across_many_sheets, - # "geospatial":, - # "image":, - "script": write_across_many_sheets, - "survey": write_across_many_sheets, - "table": write_across_many_sheets, - "timeseries": write_across_many_sheets, - "timeseries_db": write_to_single_sheet, # one sheet - "video": write_to_single_sheet, # one sheet - } - - _TYPE_TO_READER = { - "document": excel_doc_to_pydantic, - # "geospatial":, - # "image":, - "script": excel_doc_to_pydantic, - "survey": excel_doc_to_pydantic, - "table": excel_doc_to_pydantic, - "timeseries": excel_doc_to_pydantic, - "timeseries_db": excel_single_sheet_to_pydantic, # one sheet - "video": excel_single_sheet_to_pydantic, # one sheet - } - - def get_metadata_class(self, metadata_type: str): - metadata_type = self.standardize_metadata_type_name(metadata_type) - schema = self._TYPE_TO_SCHEMA[metadata_type] - return schema - - def template_to_pydantic( - self, template: Dict, parent_schema_type: str, name: Optional[str] = None - ) -> Type[BaseModel]: - schema = self.get_metadata_class(parent_schema_type) - - return pydantic_from_template(template, schema, name) - - def list_metadata_types(self): - return list(self._TYPE_TO_SCHEMA.keys()) - - @staticmethod - def _merge_dicts(base, update): - if len(update) == 0: - return base - new_dict = {} - for key, base_value in base.items(): - if key in update: - update_value = update[key] - if isinstance(base_value, dict): - if isinstance(update_value, dict) and len(update_value) > 0: - new_dict[key] = SchemaInterface._merge_dicts(base_value, update_value) - else: - new_dict[key] = base_value - elif isinstance(base_value, list): - if isinstance(update_value, list) and len(update_value) > 0: - new_list = [] - min_length = min(len(base_value), len(update_value)) - for i in range(min_length): - if isinstance(base_value[i], dict): - if isinstance(update_value[i], dict): - new_list.append(SchemaInterface._merge_dicts(base_value[i], update_value[i])) - else: - new_list.append(base_value[i]) - else: - new_list.append(update_value[i]) - new_list.extend(update_value[min_length:]) - new_dict[key] = new_list - else: - new_dict[key] = base_value - else: - if update_value is not None: - new_dict[key] = update_value - else: - new_dict[key] = base_value - else: - new_dict[key] = base_value - return new_dict - - def standardize_metadata_type_name(self, metadata_type: str) -> str: - metadata_type = metadata_type.lower() - metadata_type = metadata_type.replace("-", "_") - if metadata_type == "microdata" or metadata_type == "survey_microdata": - metadata_type = "survey" - self._raise_if_unsupported_metadata_type(metadata_type=metadata_type) - return metadata_type - - def type_to_outline(self, metadata_type: str, debug: bool = False) -> BaseModel: - schema = self.get_metadata_class(metadata_type) - skeleton_object = make_skeleton(schema, debug=debug) - return skeleton_object - - def write_outline_metadata_to_excel( - self, metadata_type: str, filename: Optional[str] = None, title: Optional[str] = None - ) -> str: - """ - Create an Excel file formatted for writing the given metadata_type metadata. - - Args: - metadata_type (str): the name of a supported metadata type, currently: - document, script, series, survey, table, timeseries, timeseries_DB, video - Currently not supported: - geospatial, image - filename (Optional[str]): The path to the Excel file. If None, defaults to {metadata_type}_metadata.xlsx - title (Optional[str]): The title for the Excel sheet. If None, defaults to '{metadata_type} Metadata' - - Returns: - str: filename of metadata file - - Outputs: - An Excel file into which metadata can be entered - """ - metadata_type = self.standardize_metadata_type_name(metadata_type) - if metadata_type == "geospatial": - raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") - - if filename is None: - filename = f"{metadata_type}_metadata.xlsx" - if not str(filename).endswith(".xlsx"): - filename += ".xlsx" - if title is None: - title = f"{metadata_type.capitalize()} Metadata" - skeleton_object = self.type_to_outline(metadata_type, debug=False) - writer = self._TYPE_TO_WRITER[metadata_type] - writer(filename, skeleton_object, metadata_type, title) - return filename - - def save_metadata_to_excel( - self, metadata_type: str, object: BaseModel, filename: Optional[str] = None, title: Optional[str] = None - ) -> str: - """ - Save an Excel document of the given metadata_type metadata. - - Args: - metadata_type (str): the name of a supported metadata type, currently: - document, script, series, survey, table, timeseries, timeseries_db, video - Currently not supported: - geospatial, image - object (BaseModel): The pydantic object to save to the Excel file. - filename (Optional[str]): The path to the Excel file. Defaults to {name}_metadata.xlsx - title (Optional[str]): The title for the Excel sheet. Defaults to '{name} Metadata' - - Returns: - str: filename of metadata file - - Outputs: - An Excel file containing the metadata from the pydantic object. This file can be updated as needed. - """ - metadata_type = self.standardize_metadata_type_name(metadata_type) - if metadata_type == "geospatial": - raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel") - - if filename is None: - filename = f"{metadata_type}_metadata.xlsx" - if not str(filename).endswith(".xlsx"): - filename += ".xlsx" - if title is None: - title = f"{metadata_type.capitalize()} Metadata" - - skeleton_object = self.type_to_outline(metadata_type=metadata_type, debug=False) - combined_dict = self._merge_dicts( - skeleton_object.model_dump(), - object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True), - ) - combined_dict = standardize_keys_in_dict(combined_dict) - - schema = self._TYPE_TO_SCHEMA[metadata_type] - new_ob = schema(**combined_dict) - - writer = self._TYPE_TO_WRITER[metadata_type] - writer(filename, new_ob, metadata_type, title) - return filename - - @staticmethod - def _get_metadata_type_from_excel_file(filename: str) -> str: - error_message = "Improperly formatted Excel file for metadata" - workbook = load_workbook(filename) - # Select the 'metadata' sheet - try: - sheet = workbook["metadata"] - # Get the value of cell C1 - type_info = sheet["C1"].value - except KeyError: - raise ValueError(f"Sheet 'metadata' not found. {error_message}") - except Exception as e: - raise ValueError(f"Error reading Excel file: {e}") - finally: - # Close the workbook - workbook.close() - - if not type_info or not isinstance(type_info, str): - raise ValueError(f"Cell C3 is empty or not a string. {error_message}") - - cell_values = type_info.split(" ") - - if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata": - raise ValueError(f"Cell C3 is improperly formatted. {error_message}") - - return cell_values[0] - - def read_metadata_from_excel(self, filename: str) -> BaseModel: - """ - Read in metadata_type metadata from an appropriately formatted Excel file as a pydantic object. - - Args: - filename (str): The path to the Excel file. - - Returns: - BaseModel: a pydantic object containing the metadata from the file - """ - metadata_type = self._get_metadata_type_from_excel_file(filename) - metadata_type = self.standardize_metadata_type_name(metadata_type) - schema = self._TYPE_TO_SCHEMA[metadata_type] - reader = self._TYPE_TO_READER[metadata_type] - read_object = reader(filename, schema) - skeleton_object = self.type_to_outline(metadata_type=metadata_type, debug=False) - - read_object_dict = read_object.model_dump(exclude_none=True, exclude_unset=True, exclude_defaults=True) - combined_dict = self._merge_dicts( - skeleton_object.model_dump(), - read_object_dict, - ) - combined_dict = standardize_keys_in_dict(combined_dict) - schema = self._TYPE_TO_SCHEMA[metadata_type] - new_ob = schema(**combined_dict) - return new_ob - - def _raise_if_unsupported_metadata_type(self, metadata_type: str): - """ - If the type is specifically unsupported - geospatial or image - a NotImplementedError is raised - If the type is simply unknown then a ValueError is raised. - """ - if metadata_type == "image": - raise NotImplementedError("Due to an issue with image metadata schema definition causing __root__ errors") - if metadata_type not in self._TYPE_TO_SCHEMA.keys(): - raise ValueError(f"'{metadata_type}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())}") diff --git a/pydantic_schemas/script_schema.py b/pydantic_schemas/script_schema.py index 17ef719..cb9e2ee 100644 --- a/pydantic_schemas/script_schema.py +++ b/pydantic_schemas/script_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: script-schema.json -# timestamp: 2024-07-24T21:06:27+00:00 +# timestamp: 2024-08-29T18:53:47+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pydantic_schemas/table_schema.py b/pydantic_schemas/table_schema.py index d04c0bb..6c0a88d 100644 --- a/pydantic_schemas/table_schema.py +++ b/pydantic_schemas/table_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: table-schema.json -# timestamp: 2024-07-24T21:06:30+00:00 +# timestamp: 2024-08-29T18:53:48+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pydantic_schemas/tests/test_excel_interface.py b/pydantic_schemas/tests/test_excel_interface.py index 4d6203e..543501d 100644 --- a/pydantic_schemas/tests/test_excel_interface.py +++ b/pydantic_schemas/tests/test_excel_interface.py @@ -1,25 +1,25 @@ import pytest -from pydantic_schemas.schema_interface import SchemaInterface +from pydantic_schemas.metadata_manager import MetadataManager @pytest.mark.parametrize( - "metadata_type", ["document", "script", "series", "survey", "table", "timeseries_db", "timeseries", "video"] + "metadata_name", ["document", "script", "survey", "table", "timeseries_db", "timeseries", "video"] ) -def test_metadata(tmpdir, metadata_type): - ei = SchemaInterface() +def test_metadata(tmpdir, metadata_name): + ei = MetadataManager() # Write empty metadata - filename = ei.write_outline_metadata_to_excel( - metadata_type=metadata_type, filename=tmpdir.join(f"test_{metadata_type}.xlsx"), title=metadata_type + filename = ei.write_metadata_outline_to_excel( + metadata_name_or_class=metadata_name, filename=tmpdir.join(f"test_{metadata_name}.xlsx"), title=metadata_name ) # Read the metadata back tmp = ei.read_metadata_from_excel(filename=filename) # Save the read metadata to a new file - filename2 = tmpdir.join(f"test_{metadata_type}_2.xlsx") - ei.save_metadata_to_excel(metadata_type=metadata_type, object=tmp, filename=filename2, title=metadata_type) + filename2 = tmpdir.join(f"test_{metadata_name}_2.xlsx") + ei.save_metadata_to_excel(metadata_name_or_class=metadata_name, object=tmp, filename=filename2, title=metadata_name) # make an outline object - ei.type_to_outline(metadata_type=metadata_type) + ei.create_metadata_outline(metadata_name_or_class=metadata_name) diff --git a/pydantic_schemas/tests/test_pydantic_to_excel.py b/pydantic_schemas/tests/test_pydantic_to_excel.py index ef64653..6090fd1 100644 --- a/pydantic_schemas/tests/test_pydantic_to_excel.py +++ b/pydantic_schemas/tests/test_pydantic_to_excel.py @@ -12,7 +12,6 @@ # from pydantic_schemas.definitions.image_schema import ImageDataTypeSchema from pydantic_schemas.microdata_schema import MicrodataSchema from pydantic_schemas.script_schema import ResearchProjectSchemaDraft -from pydantic_schemas.series_schema import Series from pydantic_schemas.table_schema import Model as TableModel from pydantic_schemas.timeseries_db_schema import TimeseriesDatabaseSchema from pydantic_schemas.timeseries_schema import TimeseriesSchema @@ -365,7 +364,6 @@ class WithDict(BaseModel): # "Image":ImageDataTypeSchema, "Survey": (MicrodataSchema, write_across_many_sheets, excel_doc_to_pydantic), "Script": (ResearchProjectSchemaDraft, write_across_many_sheets, excel_doc_to_pydantic), - "Series": (Series, write_to_single_sheet, excel_single_sheet_to_pydantic), # should be one sheet "Table": (TableModel, write_across_many_sheets, excel_doc_to_pydantic), "Timeseries_DB": ( TimeseriesDatabaseSchema, diff --git a/pydantic_schemas/timeseries_db_schema.py b/pydantic_schemas/timeseries_db_schema.py index 6c5bc95..add308e 100644 --- a/pydantic_schemas/timeseries_db_schema.py +++ b/pydantic_schemas/timeseries_db_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: timeseries-db-schema.json -# timestamp: 2024-07-24T21:06:31+00:00 +# timestamp: 2024-08-29T18:53:50+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pydantic_schemas/timeseries_schema.py b/pydantic_schemas/timeseries_schema.py index b3f39a2..dd5dcab 100644 --- a/pydantic_schemas/timeseries_schema.py +++ b/pydantic_schemas/timeseries_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: timeseries-schema.json -# timestamp: 2024-07-24T21:06:33+00:00 +# timestamp: 2024-08-29T18:53:52+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Producer(SchemaBaseModel): diff --git a/pydantic_schemas/utils/quick_start.py b/pydantic_schemas/utils/quick_start.py index ece358f..de09833 100644 --- a/pydantic_schemas/utils/quick_start.py +++ b/pydantic_schemas/utils/quick_start.py @@ -14,7 +14,6 @@ # "image_schema": "ImageDataTypeSchema", "microdata_schema": "MicrodataSchema", "script_schema": "ResearchProjectSchemaDraft", - "series_schema": "Series", "table_schema": "Model", "timeseries_db_schema": "TimeseriesDatabaseSchema", "timeseries_schema": "TimeseriesSchema", diff --git a/pydantic_schemas/schema_base_model.py b/pydantic_schemas/utils/schema_base_model.py similarity index 100% rename from pydantic_schemas/schema_base_model.py rename to pydantic_schemas/utils/schema_base_model.py diff --git a/pydantic_schemas/utils/template_to_pydantic.py b/pydantic_schemas/utils/template_to_pydantic.py deleted file mode 100644 index f44d311..0000000 --- a/pydantic_schemas/utils/template_to_pydantic.py +++ /dev/null @@ -1,189 +0,0 @@ -import warnings -from typing import Dict, List, Optional, Tuple, Type - -from pydantic import BaseModel, Field, create_model - -from .utils import get_subtype_of_optional_or_list, is_list_annotation, is_optional_annotation, standardize_keys_in_dict - - -def get_child_field_info_from_dot_annotated_name(name, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "get_child_field_info_from_dot_annotated_name" - name_split = name.split(".") - for key in name_split[:-1]: - parent_schema = parent_schema.model_fields[key].annotation - if is_optional_annotation(parent_schema) or is_list_annotation(parent_schema): - parent_schema = get_subtype_of_optional_or_list(parent_schema) - if not isinstance(parent_schema, type(BaseModel)): - raise KeyError(name) - try: - child_field_info = parent_schema.model_fields[name_split[-1]] - except KeyError as e: - raise KeyError(name) from e - except: - raise ValueError(f"name={name}, parent_schema={parent_schema}") - return child_field_info - - -def define_simple_element(item, parent_schema, element_type=str): - assert isinstance(parent_schema, type(BaseModel)), "define_simple_element" - assert ( - isinstance(item, dict) and "type" in item and item["type"] in ["string", "text", "integer", "number", "boolean"] - ), f"expected string, integer or boolean item, got {item}" - try: - child_field_info = get_child_field_info_from_dot_annotated_name(item["key"], parent_schema) - if "title" in item: - child_field_info.title = item["title"] - if "description" in item: - child_field_info.description = item["description"] - except KeyError as e: - warnings.warn(f"KeyError: {e}. Proceeding since {item['key']} is a string type.", UserWarning) - child_field_info = Field(..., title=item["title"]) - if "help_text" in item: - child_field_info.description = item["help_text"] - if "required" in item and item["required"]: - field_type = element_type, child_field_info - else: - child_field_info.default = None - field_type = Optional[element_type], child_field_info - return {item["key"]: field_type} - - -def get_children_of_props(props, parent_schema) -> Dict[str, Tuple["type_annotation", "field_info"]]: - assert isinstance(parent_schema, type(BaseModel)), "get_children_of_props" - children = {} - for prop in props: - if "prop_key" not in prop: - children.update(template_type_handler(prop, parent_schema)) - else: - name = prop["prop_key"] - try: - child_field_info = get_child_field_info_from_dot_annotated_name(name, parent_schema) - if "title" in prop: - child_field_info.title = prop["title"] - if "help_text" in prop: - child_field_info.description = prop["help_text"] - child_field = child_field_info.annotation, child_field_info - children[prop["key"]] = child_field - except KeyError as e: - children.update(template_type_handler(prop, parent_schema)) - return children - - -def define_array_element(item, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "define_array_element" - assert "type" in item and ( - item["type"] == "array" or item["type"] == "nested_array" - ), f"expected array item but got {item}" - assert "key" in item, f"expected key in item but got {item.keys()}" - if "props" not in item: - warnings.warn(f"array without type found, assuming array of str: {item}") - field_info = Field(..., title=item["title"]) - if "help_text" in item: - field_info.description = item["help_text"] - return {item["key"]: (List[str], field_info)} - else: - children = get_children_of_props(item["props"], parent_schema) - item_element = create_model(f"{item['key']}_item", **children) - return {item["key"]: (List[item_element], item_element)} - - -def define_simple_array_element(item, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "define_simple_array_element" - assert ( - isinstance(item, dict) and "type" in item and item["type"] == "simple_array" - ), f"expected simple_array item, got {item}" - try: - child_field_info = get_child_field_info_from_dot_annotated_name(item["key"], parent_schema) - if "title" in item: - child_field_info.title = item["title"] - if "description" in item: - child_field_info.description = item["description"] - except KeyError as e: - warnings.warn(f"KeyError: {e}. Proceeding since {item['key']} is a simple_array type.", UserWarning) - child_field_info = Field(..., title=item["title"]) - if "help_test" in item: - child_field_info.description = item["help_text"] - if "required" in item and item["required"]: - field_type = List[str], child_field_info - else: - child_field_info.default = None - field_type = Optional[List[str]], child_field_info - return {item["key"]: field_type} - - -def define_from_section_container(item, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "define_from_section_container" - assert ( - isinstance(item, dict) and "type" in item and item["type"] == "section_container" - ), f"expected section_container got {item}" - name = item["key"] - sub_model = create_model(name, **define_group_of_elements(item["items"], parent_schema)) - sub_field = Field(...) - if "title" in item: - sub_field.title = item["title"] - if "required" not in item or not item["required"]: - sub_field.default = None - return {name: (sub_model, sub_field)} - - -def define_group_of_elements(items, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "define_group_of_elements" - elements = {} - for i, item in enumerate(items): - if "is_custom" in item and item["is_custom"] == True: - if "additional" not in elements: - elements["additional"] = {} - elements["additional"].update(template_type_handler(item, parent_schema)) - elements["additional"] = standardize_keys_in_dict(elements["additional"], pascal_to_snake=True) - else: - elements.update(template_type_handler(item, parent_schema)) - elements = standardize_keys_in_dict(elements, pascal_to_snake=True) - if "additional" in elements: - additional = elements.pop("additional") - additional = create_model("additional", **additional) - sub_field = Field(...) - sub_field.title = "additional" - elements["additional"] = additional, sub_field - return elements - - -def template_type_handler(item, parent_schema): - assert isinstance(parent_schema, type(BaseModel)), "template_type_handler" - if item["type"] == "section_container": - return define_from_section_container(item, parent_schema) - elif item["type"] in ["string", "text"]: - return define_simple_element(item, parent_schema, str) - elif item["type"] in ["integer", "number"]: - return define_simple_element(item, parent_schema, int) - elif item["type"] == "boolean": - return define_simple_element(item, parent_schema, bool) - elif item["type"] in ["array", "nested_array"]: - return define_array_element(item, parent_schema) - elif item["type"] == "simple_array": - return define_simple_array_element(item, parent_schema) - elif item["type"] == "section": - warnings.warn(f"encountered section {item['key']}, {item['title']}, ignoring this heirarchy and appending") - if "items" in item: - return define_group_of_elements(item["items"], parent_schema) - elif "props" in item: - return define_group_of_elements(item["props"], parent_schema) - else: - raise ValueError(f"section does not contain items or props, found only {item}") - else: - raise NotImplementedError(f"type {item['type']}, {item}") - - -def pydantic_from_template( - template: Dict, parent_schema: Type[BaseModel], name: Optional[str] = None -) -> Type[BaseModel]: - assert isinstance(parent_schema, type(BaseModel)), "pydantic_from_template" - assert "items" in template, f"expected 'items' in template but got {list(template.keys())}" - m = define_group_of_elements(template["items"], parent_schema) - m = standardize_keys_in_dict(m, pascal_to_snake=True) - if name is None: - if "title" in template: - name = template["title"] - else: - name = "new_model" - name = name.replace(" ", "_").rstrip("_").split(".")[-1] - return create_model(name, **m) diff --git a/pydantic_schemas/utils/utils.py b/pydantic_schemas/utils/utils.py index 1e4a923..a7b3d02 100644 --- a/pydantic_schemas/utils/utils.py +++ b/pydantic_schemas/utils/utils.py @@ -110,6 +110,62 @@ def seperate_simple_from_pydantic(ob: BaseModel) -> Dict[str, Dict]: return {"simple": simple_children, "pydantic": pydantic_children} +def merge_dicts(base, update): + """merge a pair of dicitonaries in which the values are themselves either dictionaries to be merged or lists of + dictionaries to be merged""" + if len(update) == 0: + return base + elif len(base) == 0: + return update + new_dict = {} + for key, base_value in base.items(): + if key in update: + update_value = update[key] + if isinstance(base_value, dict): + if isinstance(update_value, dict): + new_dict[key] = merge_dicts(base_value, update_value) + else: + new_dict[key] = base_value + elif isinstance(base_value, list): + if isinstance(update_value, list) and len(update_value) > 0: + new_list = [] + min_length = min(len(base_value), len(update_value)) + for i in range(min_length): + if isinstance(base_value[i], dict): + if isinstance(update_value[i], dict): + new_list.append(merge_dicts(base_value[i], update_value[i])) + else: + new_list.append(base_value[i]) + else: + new_list.append(update_value[i]) + new_list.extend(update_value[min_length:]) + new_dict[key] = new_list + else: + new_dict[key] = base_value + else: + if update_value is not None: + new_dict[key] = update_value + else: + new_dict[key] = base_value + else: + new_dict[key] = base_value + for key, update_value in update.items(): + if key not in base: + new_dict[key] = update_value + return new_dict + + +def capitalize_first_letter(s): + if s: + return s[0].upper() + s[1:] + return s + + +def split_on_capitals(s): + # Use regular expression to split on capitalized letters + return re.findall(r"[a-z]+|[A-Z][a-z]*", s) + + def _standardize_keys_in_list_of_possible_dicts(lst: List[any], snake_to_pascal, pascal_to_snake) -> List[Any]: new_value = [] for item in lst: @@ -128,17 +184,6 @@ def _standardize_keys_in_list_of_possible_dicts(lst: List[any], snake_to_pascal, return new_value -def capitalize_first_letter(s): - if s: - return s[0].upper() + s[1:] - return s - - -def split_on_capitals(s): - # Use regular expression to split on capitalized letters - return re.findall(r"[a-z]+|[A-Z][a-z]*", s) - - def standardize_keys_in_dict( d: Dict[str, Any], snake_to_pascal: bool = False, pascal_to_snake: bool = False ) -> Dict[str, Any]: diff --git a/pydantic_schemas/video_schema.py b/pydantic_schemas/video_schema.py index f0a26c9..285479a 100644 --- a/pydantic_schemas/video_schema.py +++ b/pydantic_schemas/video_schema.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: video-schema.json -# timestamp: 2024-07-24T21:06:35+00:00 +# timestamp: 2024-08-29T18:53:54+00:00 from __future__ import annotations @@ -9,7 +9,7 @@ from pydantic import Extra, Field -from .schema_base_model import SchemaBaseModel +from .utils.schema_base_model import SchemaBaseModel class Overwrite(Enum): diff --git a/pyproject.toml b/pyproject.toml index bb6a5d6..51d8047 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,12 @@ [tool.poetry] name = "metadataschemas" -version = "0.1.5" +version = "0.1.6" description = "" authors = ["Mehmood Asghar ", "Gordon Blackadder "] readme = "README.md" packages = [ { include = "*_schema.py", from = "pydantic_schemas", to = "metadataschemas"}, - { include = "schema_base_model.py", from = "pydantic_schemas", to = "metadataschemas"}, - { include = "schema_interface.py", from = "pydantic_schemas", to = "metadataschemas"}, + { include = "metadata_manager.py", from = "pydantic_schemas", to = "metadataschemas"}, { include = "utils", from = "pydantic_schemas", to = "metadataschemas"}, ]