diff --git a/pop_tools/data_registry.txt b/pop_tools/data_registry.txt index 653b6716..1b86bae6 100644 --- a/pop_tools/data_registry.txt +++ b/pop_tools/data_registry.txt @@ -10,3 +10,4 @@ g.e20.G.TL319_t13.control.001_hfreq.nc 439eb1abf14737341ead088bfd9a3c1b795dc1f79 g.e20.G.TL319_t13.control.001_hfreq-coarsen.nc 145659813daf1a607d0857c10699c721693333e39aeb270e116103185b7236ae Pac_POP0.1_JRA_IAF_1993-12-6-test.nc fc9c649428c1e62108fad00644805130f58e4063e0219ed677a357c077886f8e Pac_grid_pbc_1301x305x62.tx01_62l.2013-07-13.nc 9ead77ee8b1e352c9b0316664b3bfe9b84e322ca785bfd9a024da54a2b6dc60e +comp-grid.tx9.1v3.20170718.zarr.zip 2c1cd41c1c803c0565bc6120268aac728757207963386b727f00b822b81155da diff --git a/pop_tools/datasets.py b/pop_tools/datasets.py index e41fef70..597d6a15 100644 --- a/pop_tools/datasets.py +++ b/pop_tools/datasets.py @@ -3,6 +3,7 @@ """ import os +from pathlib import Path import pkg_resources import pooch @@ -12,5 +13,44 @@ version_dev='master', base_url='ftp://ftp.cgd.ucar.edu/archive/aletheia-data/cesm-data/ocn/', ) - DATASETS.load_registry(pkg_resources.resource_stream('pop_tools', 'data_registry.txt')) + + +class UnzipZarr(pooch.processors.Unzip): + """ + Processor that unpacks a zarr store zip archive and + returns the zarr store path. + """ + + def __call__(self, fname, action, pooch): + """ + Extract all files from the given archive. + Parameters + ---------- + fname : str + Full path of the zipped file in local storage. + action : str + Indicates what action was taken by :meth:`pooch.Pooch.fetch`: + * ``"download"``: File didn't exist locally and was downloaded + * ``"update"``: Local file was outdated and was re-download + * ``"fetch"``: File exists and is updated so it wasn't downloaded + pooch : :class:`pooch.Pooch` + The instance of :class:`pooch.Pooch` that is calling this. + Returns + ------- + zarr_store : str + A full path to a zarr store in the extracted archive. + """ + extract_dir = fname + self.suffix + if action in ('update', 'download') or not os.path.exists(extract_dir): + # Make sure that the folder with the extracted files exists + if not os.path.exists(extract_dir): + os.makedirs(extract_dir) + self._extract_file(fname, extract_dir) + # Get a list of all file names (including subdirectories) in our folder + # of unzipped files. + fnames = [ + os.path.join(path, fname) for path, _, files in os.walk(extract_dir) for fname in files + ] + # Return the path of the zarr store + return Path(sorted(fnames)[0]).parent.as_posix()