Skip to content

Commit

Permalink
Add UnzipZarr processor & Update registry (#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
andersy005 authored Apr 9, 2020
1 parent 1f484de commit 903a226
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
1 change: 1 addition & 0 deletions pop_tools/data_registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ g.e20.G.TL319_t13.control.001_hfreq.nc 439eb1abf14737341ead088bfd9a3c1b795dc1f79
g.e20.G.TL319_t13.control.001_hfreq-coarsen.nc 145659813daf1a607d0857c10699c721693333e39aeb270e116103185b7236ae
Pac_POP0.1_JRA_IAF_1993-12-6-test.nc fc9c649428c1e62108fad00644805130f58e4063e0219ed677a357c077886f8e
Pac_grid_pbc_1301x305x62.tx01_62l.2013-07-13.nc 9ead77ee8b1e352c9b0316664b3bfe9b84e322ca785bfd9a024da54a2b6dc60e
comp-grid.tx9.1v3.20170718.zarr.zip 2c1cd41c1c803c0565bc6120268aac728757207963386b727f00b822b81155da
42 changes: 41 additions & 1 deletion pop_tools/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import os
from pathlib import Path

import pkg_resources
import pooch
Expand All @@ -12,5 +13,44 @@
version_dev='master',
base_url='ftp://ftp.cgd.ucar.edu/archive/aletheia-data/cesm-data/ocn/',
)

DATASETS.load_registry(pkg_resources.resource_stream('pop_tools', 'data_registry.txt'))


class UnzipZarr(pooch.processors.Unzip):
"""
Processor that unpacks a zarr store zip archive and
returns the zarr store path.
"""

def __call__(self, fname, action, pooch):
"""
Extract all files from the given archive.
Parameters
----------
fname : str
Full path of the zipped file in local storage.
action : str
Indicates what action was taken by :meth:`pooch.Pooch.fetch`:
* ``"download"``: File didn't exist locally and was downloaded
* ``"update"``: Local file was outdated and was re-download
* ``"fetch"``: File exists and is updated so it wasn't downloaded
pooch : :class:`pooch.Pooch`
The instance of :class:`pooch.Pooch` that is calling this.
Returns
-------
zarr_store : str
A full path to a zarr store in the extracted archive.
"""
extract_dir = fname + self.suffix
if action in ('update', 'download') or not os.path.exists(extract_dir):
# Make sure that the folder with the extracted files exists
if not os.path.exists(extract_dir):
os.makedirs(extract_dir)
self._extract_file(fname, extract_dir)
# Get a list of all file names (including subdirectories) in our folder
# of unzipped files.
fnames = [
os.path.join(path, fname) for path, _, files in os.walk(extract_dir) for fname in files
]
# Return the path of the zarr store
return Path(sorted(fnames)[0]).parent.as_posix()

0 comments on commit 903a226

Please sign in to comment.