diff --git a/fre/app/generate_time_averages/combine.py b/fre/app/generate_time_averages/combine.py index ee3cff863..dc93411cb 100644 --- a/fre/app/generate_time_averages/combine.py +++ b/fre/app/generate_time_averages/combine.py @@ -8,6 +8,7 @@ import subprocess import metomi.isodatetime.parsers +import xarray as xr from ..helpers import change_directory @@ -39,21 +40,29 @@ def form_bronx_directory_name(frequency: str, return frequency_label + '_' + str(interval_object.years) + 'yr' -def check_glob(target: str) -> None: +def merge_netcdfs(source: str, target: str) -> None: """ - Verify that at least one file is resolved by the glob. - Raises FileNotFoundError if no files are found. - - :param target: Glob target to resolve - :type target: str - :raises FileNotFoundError: No files found + Merge a glob string identifying a group of NetCDF files + into one combined NetCDF file. + + :param source: Glob target of input files + :type source: str + :param target: Output file + :type source: str + :raises FileNotFoundError: Input files not found + :raises FileExistsError: Output file already exists :rtype: None """ - files = glob.glob(target) - if len(files) >= 1: - fre_logger.debug("%s has %s files", target, len(files)) + input_files = glob.glob(source) + if len(input_files) >= 1: + fre_logger.debug(f"'{source}' has {len(input_files)} files") else: - raise FileNotFoundError(f"target={target} resolves to no files") + raise FileNotFoundError(f"'{source}' resolves to no files") + if Path(target).exists(): + raise FileExistsError(f"Output file '{target}' already exists") + + ds = xr.open_mfdataset(input_files, compat='override', coords='minimal') + ds.to_netcdf(target, unlimited_dims=['time']) def combine( root_in_dir: str, @@ -107,8 +116,7 @@ def combine( root_in_dir: str, if frequency == 'yr': source = component + '.' + date_string + '.*.nc' target = component + '.' + date_string + '.nc' - check_glob(source) - subprocess.run(['cdo', '-O', 'merge', source, target], check=True) + merge_netcdfs(source, target) fre_logger.debug("Output file created: %s", target) fre_logger.debug("Copying to %s", outdir) subprocess.run(['cp', '-v', target, outdir], check=True) @@ -116,11 +124,5 @@ def combine( root_in_dir: str, for month_int in range(1,13): source = f"{component}.{date_string}.*.{month_int:02d}.nc" target = f"{component}.{date_string}.{month_int:02d}.nc" - check_glob(source) - - # does there exist a python-cdo way of doing the merge? - subprocess.run(['cdo', '-O', 'merge', source, target], check=True) - fre_logger.debug("Output file created: %s", target) - fre_logger.debug("Copying to %s", outdir) - + merge_netcdfs(source, target) subprocess.run(['cp', '-v', target, outdir], check=True) diff --git a/fre/app/mask_atmos_plevel/mask_atmos_plevel.py b/fre/app/mask_atmos_plevel/mask_atmos_plevel.py index d60bd48bb..a43ea272d 100755 --- a/fre/app/mask_atmos_plevel/mask_atmos_plevel.py +++ b/fre/app/mask_atmos_plevel/mask_atmos_plevel.py @@ -103,7 +103,7 @@ def mask_atmos_plevel_subtool(infile: str = None, fre_logger.info("Finished processing %s, wrote %s, pressure_mask is True", var, masked_var) else: - fre_logger.debug("Not processing %s, no pressure_mask attr, nor _unmsk in the variable name", var) + fre_logger.debug("Not processing '%s', no 'pressure_mask' attribute", var) fre_logger.info('Write the output file if any unmasked variables were masked') if ds_out.variables: diff --git a/fre/app/regrid_xy/regrid_xy.py b/fre/app/regrid_xy/regrid_xy.py index 3e261c2e1..a7164f247 100644 --- a/fre/app/regrid_xy/regrid_xy.py +++ b/fre/app/regrid_xy/regrid_xy.py @@ -326,11 +326,18 @@ def regrid_xy(yamlfile: str, datadict["remap_dir"] = remap_dir datadict["input_date"] = input_date[:8] + # add temporal and static history files components = [] for component in yamldict["postprocess"]["components"]: - for this_source in component["sources"]: - if this_source["history_file"] == source: + for temporal_history in component["sources"]: + if temporal_history["history_file"] == source: components.append(component) + try: + for static_history in component["static"]: + if static_history["source"] == source: + components.append(component) + except KeyError: + pass # submit fregrid job for each component for component in components: diff --git a/fre/app/regrid_xy/tests/generate_files.py b/fre/app/regrid_xy/tests/generate_files.py index a39de7b12..8e2d022e5 100644 --- a/fre/app/regrid_xy/tests/generate_files.py +++ b/fre/app/regrid_xy/tests/generate_files.py @@ -152,6 +152,13 @@ def make_data(): history_file = source["history_file"] for i in range(1, ntiles+1): dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") + try: + for static_source in component["static"]: + history_file = static_source["source"] + for i in range(1, ntiles+1): + dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") + except KeyError: + pass def make_all(): diff --git a/fre/app/regrid_xy/tests/test_regrid_xy.py b/fre/app/regrid_xy/tests/test_regrid_xy.py index af5831d3e..74482091f 100644 --- a/fre/app/regrid_xy/tests/test_regrid_xy.py +++ b/fre/app/regrid_xy/tests/test_regrid_xy.py @@ -21,12 +21,14 @@ components = [] pp_input_files = [{"history_file":"pemberley"}, {"history_file":"longbourn"}] +pp_input_static_files = [{"source": "my_static_history"}, {"source": "my_static2"}] components.append({"xyInterp": f"{nxy},{nxy}", "interpMethod": "conserve_order2", "inputRealm": "atmos", "type": f"pride_and_prejudice", "sources": pp_input_files, - "postprocess_on": True} + "postprocess_on": True, + "static": pp_input_static_files} ) emma_input_files = [{"history_file":"hartfield"}, {"history_file":"donwell_abbey"}] components.append({"xyInterp": f"{nxy},{nxy}", @@ -91,10 +93,21 @@ def test_regrid_xy(): source=source, input_date=date+"TTTT") + # regrid the static inputs + for static_source_dict in pp_input_static_files: + source = static_source_dict['source'] + regrid_xy.regrid_xy(yamlfile=str(yamlfile), + input_dir=str(input_dir), + output_dir=str(output_dir), + work_dir=str(work_dir), + remap_dir=str(remap_dir), + source=source, + input_date=date+"TTTT") + #check answers + output_subdir = output_dir/f"{nxy}_{nxy}.conserve_order2" for source_dict in pp_input_files + emma_input_files: # Files are now output to a subdirectory based on grid size and interpolation method - output_subdir = output_dir/f"{nxy}_{nxy}.conserve_order2" outfile = output_subdir/f"{date}.{source_dict['history_file']}.nc" test = xr.load_dataset(outfile) @@ -108,6 +121,19 @@ def test_regrid_xy(): assert np.all(test["darcy"].values==np.float64(2.0)) assert np.all(test["wins"].values==np.float64(3.0)) + for static_source_dict in pp_input_static_files: + outfile = output_subdir/f"{date}.{static_source_dict['source']}.nc" + test = xr.load_dataset(outfile) + + assert "wet_c" not in test + assert "mister" in test + assert "darcy" in test + assert "wins" in test + + assert np.all(test["mister"].values==np.float64(1.0)) + assert np.all(test["darcy"].values==np.float64(2.0)) + assert np.all(test["wins"].values==np.float64(3.0)) + #check answers, these shouldn't have been regridded for source_dict in here_input_files: ifile = source_dict["history_file"]