Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

eNATL Pyramid recipe #10

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/deploy_recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
- name: "Install dependencies"
run: |
python -m pip install --upgrade pip
pip install pangeo-forge-runner
pip install git+https://github.com/leap-stc/pangeo-forge-runner
- name: "Deploy recipes"
run: |
pangeo-forge-runner bake \
Expand Down
8 changes: 5 additions & 3 deletions configs/config_dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
repo_path = os.environ['GITHUB_REPOSITORY']
FEEDSTOCK_NAME = repo_path.split('/')[-1]

c.Bake.prune = 1
c.Bake.prune = 0
c.Bake.bakery_class = "pangeo_forge_runner.bakery.dataflow.DataflowBakery"
c.DataflowBakery.use_dataflow_prime = True
c.DataflowBakery.max_workers = 50
c.DataflowBakery.use_dataflow_prime = False
c.DataflowBakery.machine_type = "e2-highmem-16"
c.DataflowBakery.num_workers = 5
c.DataflowBakery.max_workers = 10
c.DataflowBakery.use_public_ips = True
c.DataflowBakery.service_account_email = (
"[email protected]"
Expand Down
21 changes: 9 additions & 12 deletions feedstock/catalog.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@

# All the information important to cataloging.
"ncviewjs:meta_yaml_url": "https://github.com/leap-stc/proto_feedstock/blob/main/feedstock/meta.yaml"
"ncviewjs:meta_yaml_url": "https://github.com/leap-stc/eNATL_feedstock/blob/main/feedstock/meta.yaml"
tags:
- my-custom-tag
- zarr
- ocean
stores:
- id: "small"
name: "The cool small Proto Dataset"
url: "gs://leap-scratch/data-library/feedstocks/proto_feedstock/small.zarr"
"ncviewjs:rechunking":
- path: "gs://some-bucket/small.zarr"
use_case: "multiscales"

- id: "large"
name: "The even cooler large Proto Dataset" # no pyramids
url: "gs://leap-scratch/data-library/feedstocks/proto_feedstock/large.zarr"
- id: "enatl60-blbt02"
name: "eNATL60"
url: "gs://leap-persistent/data-library/feedstocks/eNATL_feedstock/eNATL60-BLBT02.zarr"
- id: "enatl60-blbt02-pyramid"
name: "eNATL60-pyramid"
url: "gs://leap-persistent/data-library/feedstocks/eNATL_feedstock/eNATL60-BLBT02-pyramid.zarr"
9 changes: 7 additions & 2 deletions feedstock/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ title: "LEAP Data Library"
description: >
eNATL60-TSW-60m is an extraction of a very high resolution oceanic simulation of the North Atlantic performed at MEOM, IGE (FRANCE)
recipes:
- id: eNATL60_BLBT02
- id: eNATL60-BLBT02
object: "eNATL60:eNATL60_BLBT02"
- id: enatl60-blbt02-pyramid
object: "pyramid:pyramid"
provenance:
providers:
- name: "Zenodo"
Expand All @@ -23,4 +25,7 @@ maintainers:
github: jbusecke
- name: "Charles Stern"
orcid: 0000-0002-4078-0852
github: cisaacstern
github: cisaacstern
- name: "Raphael Hagen"
orcid: 0000-0003-1994-1153
github: norlandrhagen
85 changes: 85 additions & 0 deletions feedstock/pyramid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import apache_beam as beam
import xarray as xr
import numpy as np
import xesmf as xe
from pangeo_forge_recipes.transforms import OpenWithXarray
from pangeo_forge_recipes.patterns import FileType, pattern_from_file_sequence
from dataclasses import dataclass
from leap_data_management_utils.data_management_transforms import (
get_catalog_store_urls,
)

# parse the catalog store locations (this is where the data is copied to after successful write (and maybe testing)
catalog_store_urls = get_catalog_store_urls("feedstock/catalog.yaml")


# How many pyramid levels
# https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2023MS003959
# spatial resolution from ds.attrs seems to be 1/60 degree or at equator (~111000 meters * 1/60)
# import morecantile
# tms = morecantile.tms.get("WebMercatorQuad")

# lvls = tms.zoom_for_res(111000.0/60.0)
# > 6
levels = 6


pattern = pattern_from_file_sequence(
[catalog_store_urls["enatl60-blbt02"]],
concat_dim="time",
)


@dataclass
class GenerateWeights(beam.PTransform):
"""Custom PTransform to generate weights for xESMF regridding"""

def _generate_weights(self, ds: xr.Dataset) -> xr.Dataset:
ds = ds.rio.write_crs("EPSG:4326")
# grab sample of dataset for weights
nds = ds.isel(time=0)[["vosaline"]]

lat_min, lat_max = nds.nav_lat.min().values, nds.nav_lat.max().values
lon_min, lon_max = nds.nav_lon.min().values, nds.nav_lon.max().values

# dims at level 6
lat = np.linspace(lat_min, lat_max, 4096)
lon = np.linspace(lon_min, lon_max, 4096)

ds_out = xr.Dataset(
coords={"lat": ("lat", lat), "lon": ("lon", lon)},
data_vars={
"mask": (["lat", "lon"], np.ones((len(lat), len(lon)), dtype=bool))
},
)
weights_local_filename = "enatl_weights_4096.nc"
regridder = xe.Regridder(ds, ds_out, "bilinear", weights=weights_local_filename)
weights_ds = xr.open_dataset("weights_local_filename")
weights_ds.to_netcdf(
f"gs://leap-scratch/data-library/feedstocks/eNATL_regridding/{weights_local_filename}"
)
weights_ds.to_zarr(
"gs://leap-scratch/data-library/feedstocks/eNATL_regridding/enatl_weights_4096.zarr"
)

return weights_ds

def expand(self, pcoll):
return pcoll | "subset" >> beam.MapTuple(lambda k, v: (k, self._subset(v)))


pyramid = (
beam.Create(pattern.items())
| OpenWithXarray(file_type=FileType("zarr"), xarray_open_kwargs={"chunks": {}})
| GenerateWeights()
# | StoreToPyramid(
# store_name="eNATL60_BLBT02_pyramid.zarr",
# epsg_code="4326",
# pyramid_method="resample",
# pyramid_kwargs={"x": "x", "y": "y"},
# levels=levels,
# combine_dims=pattern.combine_dim_keys,
# )
# | ConsolidateMetadata()
# | Copy(target=catalog_store_urls["enatl60-blbt02-pyramid"])
)
7 changes: 4 additions & 3 deletions feedstock/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pangeo-forge-recipes==0.10.4
pangeo-forge-recipes==0.10.8
gcsfs
apache-beam[gcp]
apache-beam[gcp] >= 2.58.0
leap-data-management-utils==0.0.12
xarray=2024.05.0
xarray==2024.05.0
pangeo-forge-ndpyramid