diff --git a/doc/source/example_notebooks/IS2_data_read-in.ipynb b/doc/source/example_notebooks/IS2_data_read-in.ipynb index 6537777de..836381698 100644 --- a/doc/source/example_notebooks/IS2_data_read-in.ipynb +++ b/doc/source/example_notebooks/IS2_data_read-in.ipynb @@ -426,7 +426,7 @@ "\n", "***ATTENTION: icepyx loads your data by creating an Xarray DataSet for each input granule and then merging them. In some cases, the automatic merge fails and needs to be handled manually. In these cases, icepyx will return a warning with the error message from the failed Xarray merge and a list of per-granule DataSets***\n", "\n", - "This can happen if you unintentionally provide the same granule multiple times with different filenames." + "This can happen if you unintentionally provide the same granule multiple times with different filenames or in segmented products where the rgt+cycle automatically generated `gran_idx` values match. In this latter case, you can simply provide unique `gran_idx` values for each DataSet in `ds` and run `import xarray as xr` and `ds_merged = xr.merge(ds)` to create one merged DataSet." ] }, { diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg index a494bc220..a73320ea1 100644 --- a/doc/source/user_guide/documentation/classes_dev_uml.svg +++ b/doc/source/user_guide/documentation/classes_dev_uml.svg @@ -4,11 +4,11 @@ - + classes_dev_uml - + icepyx.core.Earthdata.Earthdata @@ -18,7 +18,7 @@ capability_url email netrc : NoneType -pswd : NoneType, str +pswd : str, NoneType session : Session uid @@ -29,17 +29,17 @@ icepyx.core.query.GenQuery - -GenQuery - -_end : datetime -_geom_filepath : NoneType -_spat_extent -_start : datetime -extent_type : str - -__init__(spatial_extent, date_range, start_time, end_time) -__str__() + +GenQuery + +_end : datetime +_geom_filepath : NoneType +_spat_extent +_start : datetime +extent_type : str + +__init__(spatial_extent, date_range, start_time, end_time) +__str__() @@ -58,104 +58,104 @@ icepyx.core.query.Query - -Query - -CMRparams -_CMRparams -_about_product -_cust_options : dict -_cycles : list -_email -_file_vars -_granules -_order_vars -_prod : NoneType, str -_readable_granule_name : list -_reqparams -_s3login_credentials -_session : Session -_source : str -_subsetparams : NoneType -_tracks : list -_version -cycles -dataset -dates -end_time -file_vars -granules -order_vars -product -product_version -reqparams -spatial_extent -start_time -tracks - -__init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files) -__str__() -avail_granules(ids, cycles, tracks, s3urls) -download_granules(path, verbose, subset, restart) -earthdata_login(uid, email, s3token) -latest_version() -order_granules(verbose, subset, email) -product_all_info() -product_summary_info() -show_custom_options(dictview) -subsetparams() -visualize_elevation() -visualize_spatial_extent() + +Query + +CMRparams +_CMRparams +_about_product +_cust_options : dict +_cycles : list +_email +_file_vars +_granules +_order_vars +_prod : str, NoneType +_readable_granule_name : list +_reqparams +_s3login_credentials +_session : Session +_source : str +_subsetparams : NoneType +_tracks : list +_version +cycles +dataset +dates +end_time +file_vars +granules +order_vars +product +product_version +reqparams +spatial_extent +start_time +tracks + +__init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files) +__str__() +avail_granules(ids, cycles, tracks, s3urls) +download_granules(path, verbose, subset, restart) +earthdata_login(uid, email, s3token) +latest_version() +order_granules(verbose, subset, email) +product_all_info() +product_summary_info() +show_custom_options(dictview) +subsetparams() +visualize_elevation() +visualize_spatial_extent() icepyx.core.granules.Granules->icepyx.core.query.Query - - -_granules + + +_granules icepyx.core.granules.Granules->icepyx.core.query.Query - - -_granules + + +_granules icepyx.core.icesat2data.Icesat2Data - -Icesat2Data - - -__init__() + +Icesat2Data + + +__init__() icepyx.core.exceptions.NsidcQueryError - -NsidcQueryError - -errmsg -msgtxt : str - -__init__(errmsg, msgtxt) -__str__() + +NsidcQueryError + +errmsg +msgtxt : str + +__init__(errmsg, msgtxt) +__str__() icepyx.core.exceptions.QueryError - -QueryError - - - + +QueryError + + + icepyx.core.exceptions.NsidcQueryError->icepyx.core.exceptions.QueryError - - + + @@ -163,9 +163,9 @@ Parameters -_fmted_keys : NoneType, dict +_fmted_keys : dict, NoneType _poss_keys : dict -_reqtype : str, NoneType +_reqtype : NoneType, str fmted_keys partype poss_keys @@ -180,136 +180,137 @@ icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _CMRparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _reqparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_subsetparams + + +_subsetparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_subsetparams + + +_subsetparams icepyx.core.query.Query->icepyx.core.query.GenQuery - - + + icepyx.core.read.Read - -Read - -_catalog_path : NoneType -_filelist : list, NoneType -_is2catalog : Catalog -_out_obj : Dataset -_pattern : str -_prod : str, NoneType -_read_vars -_source_type : str -data_source : NoneType -is2catalog -vars - -__init__(data_source, product, filename_pattern, catalog, out_obj_type) -_add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) -_build_dataset_template(file) -_build_single_file_dataset(file, groups_list) -_check_source_for_pattern(source, filename_pattern) -_read_single_var(file, grp_path) -load() + +Read + +_catalog_path : NoneType +_filelist : NoneType, list +_is2catalog : Catalog +_out_obj : Dataset +_pattern : str +_prod : str, NoneType +_read_vars +_source_type : str +data_source : NoneType +is2catalog +vars + +__init__(data_source, product, filename_pattern, catalog, out_obj_type) +_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) +_build_dataset_template(file) +_build_single_file_dataset(file, groups_list) +_check_source_for_pattern(source, filename_pattern) +_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) +_read_single_grp(file, grp_path) +load() icepyx.core.variables.Variables - -Variables - -_avail : NoneType, list -_session : NoneType -_vartype -_version : NoneType -path : NoneType -product : NoneType -wanted : NoneType, dict - -__init__(vartype, avail, wanted, session, product, version, path) -_check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list) -_get_combined_list(beam_list, keyword_list) -_get_sum_varlist(var_list, all_vars, defaults) -_iter_paths(sum_varlist, req_vars, vgrp, beam_list, keyword_list) -_iter_vars(sum_varlist, req_vars, vgrp) -append(defaults, var_list, beam_list, keyword_list) -avail(options, internal) -parse_var_list(varlist, tiered) -remove(all, var_list, beam_list, keyword_list) + +Variables + +_avail : NoneType, list +_session : NoneType +_vartype +_version : NoneType +path : NoneType +product : NoneType +wanted : dict, NoneType + +__init__(vartype, avail, wanted, session, product, version, path) +_check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list) +_get_combined_list(beam_list, keyword_list) +_get_sum_varlist(var_list, all_vars, defaults) +_iter_paths(sum_varlist, req_vars, vgrp, beam_list, keyword_list) +_iter_vars(sum_varlist, req_vars, vgrp) +append(defaults, var_list, beam_list, keyword_list) +avail(options, internal) +parse_var_list(varlist, tiered, tiered_vars) +remove(all, var_list, beam_list, keyword_list) icepyx.core.variables.Variables->icepyx.core.query.Query - - + + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - + + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - + + _file_vars icepyx.core.variables.Variables->icepyx.core.read.Read - - + + _read_vars icepyx.core.visualization.Visualize - -Visualize - -bbox : list -cycles : NoneType -date_range : NoneType -product : NoneType, str -tracks : NoneType - -__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) -generate_OA_parameters(): list -grid_bbox(binsize): list -make_request(base_url, payload) -parallel_request_OA(): -query_icesat2_filelist(): tuple -request_OA_data(paras): -viz_elevation(): + +Visualize + +bbox : list +cycles : NoneType +date_range : NoneType +product : str, NoneType +tracks : NoneType + +__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) +generate_OA_parameters(): list +grid_bbox(binsize): list +make_request(base_url, payload) +parallel_request_OA(): +query_icesat2_filelist(): tuple +request_OA_data(paras): +viz_elevation(): diff --git a/doc/source/user_guide/documentation/classes_user_uml.svg b/doc/source/user_guide/documentation/classes_user_uml.svg index 6ad75574f..5901d2fb6 100644 --- a/doc/source/user_guide/documentation/classes_user_uml.svg +++ b/doc/source/user_guide/documentation/classes_user_uml.svg @@ -197,11 +197,11 @@ path : NoneType product : NoneType -wanted : NoneType, dict +wanted : dict, NoneType append(defaults, var_list, beam_list, keyword_list) avail(options, internal) -parse_var_list(varlist, tiered) +parse_var_list(varlist, tiered, tiered_vars) remove(all, var_list, beam_list, keyword_list) @@ -241,7 +241,7 @@ bbox : list cycles : NoneType date_range : NoneType -product : NoneType, str +product : str, NoneType tracks : NoneType generate_OA_parameters(): list diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index 3fed2ef4f..5f4e455c6 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -259,7 +259,7 @@ def _default_varlists(product): else: print( - "THE REQUESTED PRODUCT DOES NOT YET HAVE A DEFAULT LIST SET UP. ONLY DELTA_TIME, LATITUTDE, AND LONGITUDE WILL BE RETURNED" + "THE REQUESTED PRODUCT DOES NOT YET HAVE A DEFAULT LIST SET UP. ONLY DELTA_TIME, LATITUDE, AND LONGITUDE WILL BE RETURNED" ) return common_list diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 244e75978..db8219f8e 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1017,7 +1017,7 @@ def download_granules( by default when subset=True, but additional subsetting options are available. Spatial subsetting returns all data that are within the area of interest (but not complete granules. This eliminates false-positive granules returned by the metadata-level search) - restart: boolean, default false + restart : boolean, default false If previous download was terminated unexpectedly. Run again with restart set to True to continue. **kwargs : key-value pairs Additional parameters to be passed to the subsetter. diff --git a/icepyx/core/read.py b/icepyx/core/read.py index ac6fbc1ca..d222c634c 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -311,9 +311,9 @@ def _check_source_for_pattern(source, filename_pattern): return False, None @staticmethod - def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): + def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): """ - Add the new variable group to the dataset template. + Add the new variables in the group to the dataset template. Parameters ---------- @@ -336,11 +336,9 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): Xarray Dataset with variables from the ds variable group added. """ - wanted_vars = list(wanted_dict.keys()) - if grp_path in ["orbit_info", "ancillary_data"]: grp_spec_vars = [ - wanted_vars[i] + wanted_groups_tiered[-1][i] for i, x in enumerate(wanted_groups_tiered[0]) if x == grp_path ] @@ -389,9 +387,10 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): # add a test for the new function (called here)! grp_spec_vars = [ - k for k, v in wanted_dict.items() if any(grp_path in x for x in v) + k + for k, v in wanted_dict.items() + if any(f"{grp_path}/{k}" in x for x in v) ] - # print(grp_spec_vars) ds = ( ds.reset_coords(drop=False) @@ -400,17 +399,57 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): .assign(gt=(("gran_idx", "spot"), [[gt_str]])) ) - # print(ds) grp_spec_vars.append("gt") is2ds = is2ds.merge( ds[grp_spec_vars], join="outer", combine_attrs="no_conflicts" ) - # print(is2ds) # re-cast some dtypes to make array smaller is2ds["gt"] = is2ds.gt.astype(str) is2ds["spot"] = is2ds.spot.astype(np.uint8) + return is2ds, ds[grp_spec_vars] + + @staticmethod + def _combine_nested_vars(is2ds, ds, grp_path, wanted_dict): + """ + Add the deeply nested variables to a dataset with appropriate coordinate information. + + Parameters + ---------- + is2ds : Xarray dataset + Dataset to add deeply nested variables to. + ds : Xarray dataset + Dataset containing proper dimensions for the variables being added + grp_path : str + hdf5 group path read into ds + wanted_dict : dict + Dictionary with variable names as keys and a list of group + variable paths containing those variables as values. + + Returns + ------- + Xarray Dataset with variables from the ds variable group added. + """ + + grp_spec_vars = [ + k for k, v in wanted_dict.items() if any(f"{grp_path}/{k}" in x for x in v) + ] + + # # Use this to handle issues specific to group paths that are more nested + # tiers = len(wanted_groups_tiered) + # if tiers > 3 and grp_path.count("/") == tiers - 2: + # # Handle attribute conflicts that arose from data descriptions during merging + # for var in grp_spec_vars: + # ds[var].attrs = ds.attrs + # for k in ds[var].attrs.keys(): + # ds.attrs.pop(k) + # # warnings.warn( + # # "Due to the number of layers of variable group paths, some attributes have been dropped from your DataSet during merging", + # # UserWarning, + # # ) + + is2ds = is2ds.assign(ds[grp_spec_vars]) + return is2ds def load(self): @@ -485,7 +524,7 @@ def _build_dataset_template(self, file): ) return is2ds - def _read_single_var(self, file, grp_path): + def _read_single_grp(self, file, grp_path): """ For a given file and variable group path, construct an Intake catalog and use it to read in the data. @@ -519,12 +558,10 @@ def _read_single_var(self, file, grp_path): grp_paths=grp_path, extra_engine_kwargs={"phony_dims": "access"}, ) - ds = grpcat[self._source_type].read() return ds - # NOTE: for non-gridded datasets only def _build_single_file_dataset(self, file, groups_list): """ Create a single xarray dataset with all of the wanted variables/groups from the wanted var list for a single data file/url. @@ -544,7 +581,7 @@ def _build_single_file_dataset(self, file, groups_list): Xarray Dataset """ - file_product = self._read_single_var(file, "/").attrs["identifier_product_type"] + file_product = self._read_single_grp(file, "/").attrs["identifier_product_type"] assert ( file_product == self._prod ), "Your product specification does not match the product specification within your files." @@ -577,13 +614,30 @@ def _build_single_file_dataset(self, file, groups_list): wanted_groups_set = set(wanted_groups) # orbit_info is used automatically as the first group path so the info is available for the rest of the groups wanted_groups_set.remove("orbit_info") + # Note: the sorting is critical for datasets with highly nested groups + wanted_groups_list = ["orbit_info"] + sorted(wanted_groups_set) # returns the wanted groups as a list of lists with group path string elements separated - _, wanted_groups_tiered = Variables.parse_var_list(groups_list, tiered=True) + _, wanted_groups_tiered = Variables.parse_var_list( + groups_list, tiered=True, tiered_vars=True + ) - for grp_path in ["orbit_info"] + list(wanted_groups_set): - ds = self._read_single_var(file, grp_path) - is2ds = Read._add_var_to_ds( + while wanted_groups_list: + grp_path = wanted_groups_list[0] + wanted_groups_list = wanted_groups_list[1:] + ds = self._read_single_grp(file, grp_path) + is2ds, ds = Read._add_vars_to_ds( is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict ) + # if there are any deeper nested variables, get those so they have actual coordinates and add them + if any(grp_path in grp_path2 for grp_path2 in wanted_groups_list): + for grp_path2 in wanted_groups_list: + if grp_path in grp_path2: + sub_ds = self._read_single_grp(file, grp_path2) + ds = Read._combine_nested_vars( + ds, sub_ds, grp_path2, wanted_dict + ) + wanted_groups_list.remove(grp_path2) + is2ds = is2ds.merge(ds, join="outer", combine_attrs="no_conflicts") + return is2ds diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py index ef71dc737..ae12d4d3c 100644 --- a/icepyx/core/variables.py +++ b/icepyx/core/variables.py @@ -135,7 +135,7 @@ def visitor_func(name, node): return self._avail @staticmethod - def parse_var_list(varlist, tiered=True): + def parse_var_list(varlist, tiered=True, tiered_vars=False): """ Parse a list of path strings into tiered lists and names of variables @@ -149,6 +149,11 @@ def parse_var_list(varlist, tiered=True): (e.g. [['orbit_info', 'ancillary_data', 'gt1l'],['none','none','land_ice_segments']]) or a single list of path strings (e.g. ['orbit_info','ancillary_data','gt1l/land_ice_segments']) + tiered_vars : boolean, default False + Whether or not to append a list of the variable names to the nested list of component strings + (e.g. [['orbit_info', 'ancillary_data', 'gt1l'],['none','none','land_ice_segments'], + ['sc_orient','atlas_sdp_gps_epoch','h_li']])) + Examples -------- >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28'], version='1') # doctest: +SKIP @@ -215,7 +220,10 @@ def parse_var_list(varlist, tiered=True): else: num = np.max([v.count("/") for v in varlist]) # print('max needed: ' + str(num)) - paths = [[] for i in range(num)] + if tiered_vars == True: + paths = [[] for i in range(num + 1)] + else: + paths = [[] for i in range(num)] # print(self._cust_options['variables']) for vn in varlist: @@ -237,6 +245,8 @@ def parse_var_list(varlist, tiered=True): for i in range(j, num): paths[i].append("none") i = i + 1 + if tiered_vars == True: + paths[num].append(vkey) return vgrp, paths