From a8a5255e27a9777ec30aaa80b37942ff955ba91d Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 2 Dec 2025 16:35:52 -0500 Subject: [PATCH 01/29] Archive Verify (-ens) var creation --- jobs/JGLOBAL_ARCHIVE_VRFY | 23 +- jobs/JGLOBAL_ENS_ARCHIVE_VRFY | 15 +- scripts/exgdas_enkf_earc_vrfy.py | 51 +-- scripts/exglobal_archive_vrfy.py | 51 ++- ush/python/pygfs/task/archive_vars.py | 541 ++++++++++++++++++++++++++ 5 files changed, 603 insertions(+), 78 deletions(-) create mode 100644 ush/python/pygfs/task/archive_vars.py diff --git a/jobs/JGLOBAL_ARCHIVE_VRFY b/jobs/JGLOBAL_ARCHIVE_VRFY index e5965c22706..207fad02bb5 100755 --- a/jobs/JGLOBAL_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ARCHIVE_VRFY @@ -2,26 +2,9 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "arch_vrfy" -c "base arch_vrfy" -############################################## -# Set variables used in the script -############################################## -YMD=${PDY} HH=${cyc} declare_from_tmpl -rx \ - COMIN_ATMOS_ANALYSIS:COM_ATMOS_ANALYSIS_TMPL \ - COMIN_ATMOS_GENESIS:COM_ATMOS_GENESIS_TMPL \ - COMIN_ATMOS_HISTORY:COM_ATMOS_HISTORY_TMPL \ - COMIN_ATMOS_TRACK:COM_ATMOS_TRACK_TMPL \ - COMIN_CHEM_ANALYSIS:COM_CHEM_ANALYSIS_TMPL \ - COMIN_SNOW_ANALYSIS:COM_SNOW_ANALYSIS_TMPL \ - COMIN_OBS:COM_OBS_TMPL \ - COMOUT_ATMOS_TRACK:COM_ATMOS_TRACK_TMPL - -for grid in "0p25" "0p50" "1p00"; do - YMD=${PDY} HH=${cyc} GRID=${grid} declare_from_tmpl -rx \ - "COMIN_ATMOS_GRIB_${grid}:COM_ATMOS_GRIB_GRID_TMPL" -done - ############################################################### # Run archive script +# Note: COM paths are calculated within the Python script ############################################################### ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} @@ -30,10 +13,6 @@ if [[ ${err} -ne 0 ]]; then exit "${err}" fi -############################################## -# End JOB SPECIFIC work -############################################## - ############################################## # Final processing ############################################## diff --git a/jobs/JGLOBAL_ENS_ARCHIVE_VRFY b/jobs/JGLOBAL_ENS_ARCHIVE_VRFY index 832ff9a0277..08260ece951 100755 --- a/jobs/JGLOBAL_ENS_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ENS_ARCHIVE_VRFY @@ -2,16 +2,9 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "earc_vrfy" -c "base earc_vrfy" -############################################## -# Set variables used in the script -############################################## -MEMDIR="ensstat" YMD=${PDY} HH=${cyc} declare_from_tmpl -rx \ - COMIN_ATMOS_ANALYSIS_ENSSTAT:COM_ATMOS_ANALYSIS_TMPL \ - COMIN_ATMOS_HISTORY_ENSSTAT:COM_ATMOS_HISTORY_TMPL \ - COMIN_SNOW_ANALYSIS_ENSSTAT:COM_SNOW_ANALYSIS_TMPL - ############################################################### # Run archive script +# Note: COM paths are calculated within the Python script ############################################################### "${SCRgfs}/exgdas_enkf_earc_vrfy.py" @@ -20,12 +13,6 @@ if [[ ${err} -ne 0 ]]; then exit "${err}" fi -############################################################### - -############################################## -# End JOB SPECIFIC work -############################################## - ############################################## # Final processing ############################################## diff --git a/scripts/exgdas_enkf_earc_vrfy.py b/scripts/exgdas_enkf_earc_vrfy.py index 973a4257b91..8d50b6a8962 100755 --- a/scripts/exgdas_enkf_earc_vrfy.py +++ b/scripts/exgdas_enkf_earc_vrfy.py @@ -3,6 +3,7 @@ import os from pygfs.task.archive import Archive +from pygfs.task.archive_vars import ArchiveVrfy from wxflow import AttrDict, Logger, cast_strdict_as_dtypedict, chdir, logit # initialize root logger @@ -14,37 +15,43 @@ def main(): config = cast_strdict_as_dtypedict(os.environ) - # Instantiate the Archive object + # Instantiate the Archive object for execute_store_products archive = Archive(config) - # Pull out all the configuration keys needed to run the rest of archive steps - keys = ['current_cycle', 'RUN', 'PSLOT', 'ROTDIR', 'PARMgfs', - 'ARCDIR', 'MODE', 'DO_JEDIATMENS', 'DO_FIT2OBS', 'DO_JEDIATMVAR', - 'DO_JEDISNOWDA', 'DO_AERO_ANL', 'DO_PREP_OBS_AERO', 'NET', 'MODE', 'FHOUT_GFS', - 'FHMAX_HF_GFS', 'FHOUT_GFS', 'FHMAX_FITS', 'FHMAX', 'FHOUT', 'FHMAX_GFS', 'DO_GSISOILDA', 'DO_LAND_IAU'] + # Instantiate the ArchiveVrfy object for variable and file set calculation + archive_vars = ArchiveVrfy(config) - archive_dict = AttrDict() - for key in keys: - archive_dict[key] = archive.task_config.get(key) - if archive_dict[key] is None: - print(f"Warning: key ({key}) not found in task_config!") + # Get the NET and RUN type to determine which arcdir method to call + NET = archive.task_config.get('NET', 'gfs') + RUN = archive.task_config.RUN - # Also import all COMIN* directory and template variables - for key in archive.task_config.keys(): - if key.startswith("COMIN"): - archive_dict[key] = archive.task_config[key] + with chdir(config.ROTDIR): - cwd = os.getcwd() + # Determine which system we're archiving for and call the appropriate method + # EnKF runs use the GFS archiving logic with ensemble-specific handling + logger.info(f"Archiving EnKF data for RUN={RUN}, cycle {archive.task_config.current_cycle}") - os.chdir(config.ROTDIR) + if NET == 'gefs': + arcdir_result = archive_vars.gefs_arcdir() + elif NET == 'gcafs': + arcdir_result = archive_vars.gcafs_arcdir() + else: # gfs, gdas, enkfgdas, enkfgfs + arcdir_result = archive_vars.gfs_arcdir() - # Determine which archives to create - arcdir_set = archive.configure_vrfy(archive_dict) + # Extract the file_set and mkdir_list from the result + file_set = arcdir_result['file_set'] + mkdir_list = arcdir_result['mkdir_list'] - # Populate the product archive (ARCDIR) - archive.execute_store_products(arcdir_set) + # Construct the arcdir_set in the format expected by execute_store_products + arcdir_set = { + 'mkdir': mkdir_list, + 'copy': file_set + } - os.chdir(cwd) + logger.info(f"Archiving {len(file_set)} files to {len(mkdir_list)} directories") + + # Populate the product archive (ARCDIR) + archive.execute_store_products(arcdir_set) if __name__ == '__main__': diff --git a/scripts/exglobal_archive_vrfy.py b/scripts/exglobal_archive_vrfy.py index 5054908fc4d..156e6568713 100755 --- a/scripts/exglobal_archive_vrfy.py +++ b/scripts/exglobal_archive_vrfy.py @@ -3,6 +3,7 @@ import os from pygfs.task.archive import Archive +from pygfs.task.archive_vars import ArchiveVrfy from wxflow import AttrDict, Logger, cast_strdict_as_dtypedict, logit, chdir # initialize root logger @@ -14,9 +15,12 @@ def main(): config = cast_strdict_as_dtypedict(os.environ) - # Instantiate the Archive object + # Instantiate the Archive object for execute_store_products archive = Archive(config) + # Instantiate the ArchiveVrfy object for variable and file set calculation + archive_vars = ArchiveVrfy(config) + # update these keys to be 3 digits if they are part of archive.task_config.keys for key in ['OCNRES', 'ICERES']: try: @@ -24,28 +28,35 @@ def main(): except KeyError as ee: logger.info(f"key ({key}) not found in archive.task_config!") - # Pull out all the configuration keys needed to run the rest of archive steps - keys = ['current_cycle', 'RUN', 'PSLOT', 'ROTDIR', 'PARMgfs', - 'ARCDIR', 'MODE', 'DO_JEDIATMENS', 'DO_FIT2OBS', 'DO_JEDIATMVAR', 'FHMIN_GFS', - 'DO_JEDISNOWDA', 'DO_AERO_ANL', 'DO_PREP_OBS_AERO', 'NET', 'MODE', 'FHOUT_GFS', - 'FHMAX_HF_GFS', 'FHOUT_GFS', 'FHMAX_FITS', 'FHMAX', 'FHOUT', 'FHMAX_GFS', 'DO_GSISOILDA', 'DO_LAND_IAU'] - - archive_dict = AttrDict() - for key in keys: - try: - archive_dict[key] = archive.task_config[key] - except KeyError as ee: - logger.warning(f"WARNING: key ({key}) not found in archive.task_config!") - - # Also import all COMIN* and COMOUT* directory and template variables - for key in archive.task_config.keys(): - if key.startswith(("COM_", "COMIN_", "COMOUT_")): - archive_dict[key] = archive.task_config.get(key) + # Get the RUN type and NET to determine which arcdir method to call + RUN = archive.task_config.RUN + NET = archive.task_config.get('NET', 'gfs') with chdir(config.ROTDIR): - # Determine which archives to create - arcdir_set = archive.configure_vrfy(archive_dict) + # Determine which system we're archiving for and call the appropriate method + if NET == 'gefs': + logger.info(f"Archiving GEFS data for cycle {archive.task_config.current_cycle}") + arcdir_result = archive_vars.gefs_arcdir() + elif NET == 'gcafs': + logger.info(f"Archiving GCAFS data for cycle {archive.task_config.current_cycle}") + arcdir_result = archive_vars.gcafs_arcdir() + else: # gfs, gdas (default) + logger.info(f"Archiving GFS/GDAS data for RUN={RUN}, cycle {archive.task_config.current_cycle}") + arcdir_result = archive_vars.gfs_arcdir() + + # Extract the file_set and mkdir_list from the result + file_set = arcdir_result['file_set'] + mkdir_list = arcdir_result['mkdir_list'] + + # Construct the arcdir_set in the format expected by execute_store_products + arcdir_set = { + 'mkdir': mkdir_list, + 'copy': file_set + } + + logger.info(f"Archiving {len(file_set)} files to {len(mkdir_list)} directories") + logger.debug(f"arcdir_set: {arcdir_set}") # Populate the product archive (ARCDIR) archive.execute_store_products(arcdir_set) diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py new file mode 100644 index 00000000000..6f325af618f --- /dev/null +++ b/ush/python/pygfs/task/archive_vars.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +""" +Archive Variables Task + +Overview +-------- +This module constructs cycle-specific COM directory path variables and file sets +required for archiving verification (vrfy) data for GFS, GEFS, and GCAFS systems. + +Architecture +------------ +- One method (_calculate_com_paths) for all COM variables with logic for grids, loops +- Separate method for each arcdir YAML that builds complete file sets: + * gfs_arcdir() - corresponds to gfs_arcdir.yaml.j2 + * gefs_arcdir() - corresponds to gefs_arcdir.yaml.j2 + * gcafs_arcdir() - corresponds to gcafs_arcdir.yaml.j2 + +Key Methods +----------- +_get_cycle_vars(): + Computes cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) + +_calculate_com_paths(): + Generates all COM paths (ROTDIR-based) used across all arcdir YAMLs, + including grid loops (0p25, 0p50, 1p00) and conditional path logic + +gfs_arcdir(): + Complete file set generation for GFS archiving (gfs_arcdir.yaml.j2) + +gefs_arcdir(): + Complete file set generation for GEFS archiving (gefs_arcdir.yaml.j2) + +gcafs_arcdir(): + Complete file set generation for GCAFS archiving (gcafs_arcdir.yaml.j2) + +Logging +------- +All public operational methods are decorated with @logit(logger). +""" +import os +from logging import getLogger +from typing import Any, Dict +from wxflow import Task, logit, to_YMD, to_YMDH, Template, TemplateConstants + +logger = getLogger(__name__.split('.')[-1]) + + +class ArchiveVrfy(Task): + """ + Task class for archive verification operations. + + This class handles archiving for three systems: + - GFS: Global Forecast System + - GEFS: Global Ensemble Forecast System + - GCAFS: Global Climate Analysis Forecast System + + Each system has a corresponding method that builds the complete file set + for archiving, corresponding to the respective arcdir YAML file. + """ + + @logit(logger, name="ArchiveVrfy") + def __init__(self, config: Dict[str, Any]) -> None: + """Constructor for the ArchiveVrfy task + + Parameters + ---------- + config : Dict[str, Any] + Incoming configuration for the task from the environment + """ + super().__init__(config) + + @logit(logger) + def add_general_vars(self) -> None: + """Format general variables for archive operations. + + Updates resolution variables to be 3-digit formatted strings. + This ensures consistent naming conventions for ocean and ice resolution + specifications (e.g., 25 -> '025', 100 -> '100'). + + Variables updated (if present in task_config): + - OCNRES: Ocean resolution + - ICERES: Ice resolution + """ + # Update these keys to be 3 digits if they are part of task_config + for key in ['OCNRES', 'ICERES']: + try: + self.task_config[key] = f"{self.task_config[key]:03d}" + except KeyError: + logger.info(f"key ({key}) not found in task_config") + + @logit(logger) + def get_all_yaml_vars(self) -> Dict[str, Any]: + """Collect all archive variables and build complete arch_dict for YAML templates. + + This method: + 1. Formats general variables (OCNRES, ICERES) + 2. Determines system type (GFS, GEFS, GCAFS) from NET + 3. Dispatches to appropriate system-specific method + 4. Builds complete arch_dict with all task_config and archive variables + + Returns + ------- + Dict[str, Any] + Complete arch_dict ready for configure_vrfy() and Jinja2 templates, + containing all task_config variables plus: + - cycle_HH, cycle_YMDH, cycle_YMD, head: Cycle-specific variables + - COMIN_*: All COM directory paths + - file_set: List of [source, destination] file pairs for archiving + - mkdir_list: List of directories to create + + Notes + ----- + The NET variable determines which archiving method is called: + - NET='gefs' → gefs_arcdir() + - NET='gcafs' → gcafs_arcdir() + - Otherwise → gfs_arcdir() (handles gfs, gdas, enkfgdas, enkfgfs) + """ + # Format general variables (e.g., OCNRES, ICERES to 3-digit strings) + self.add_general_vars() + + NET = self.task_config.get('NET', 'gfs') + RUN = self.task_config.RUN + + # Dispatch to appropriate system-specific method based on NET + if NET == 'gefs': + logger.info(f"Collecting GEFS archive variables for cycle {self.task_config.current_cycle}") + arcdir_result = self.gefs_arcdir() + elif NET == 'gcafs': + logger.info(f"Collecting GCAFS archive variables for cycle {self.task_config.current_cycle}") + arcdir_result = self.gcafs_arcdir() + else: # gfs, gdas, enkfgdas, enkfgfs (default) + logger.info(f"Collecting GFS/GDAS archive variables for RUN={RUN}, cycle {self.task_config.current_cycle}") + arcdir_result = self.gfs_arcdir() + + # Build complete arch_dict with all variables for configure_vrfy and Jinja2 templates + arch_dict = dict(self.task_config) + + # Add cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) + arch_dict.update(arcdir_result['cycle_vars']) + + # Add COM paths (COMIN_ATMOS_ANALYSIS, COMIN_ATMOS_GRIB_*, etc.) + arch_dict.update(arcdir_result['com_paths']) + + # Add file_set and mkdir_list for Jinja2 templates + arch_dict['file_set'] = arcdir_result['file_set'] + arch_dict['mkdir_list'] = arcdir_result['mkdir_list'] + + logger.info(f"Built arch_dict with {len(arch_dict['file_set'])} files to archive in {len(arch_dict['mkdir_list'])} directories") + logger.debug(f"arch_dict keys: {list(arch_dict.keys())}") + + return arch_dict + + @logit(logger) + def _get_cycle_vars(self) -> Dict[str, Any]: + """Calculate cycle-specific variables using wxflow timetools. + + Returns + ------- + Dict[str, Any] + Dictionary containing: + - cycle_HH: Cycle hour (e.g., '00', '06') + - cycle_YMDH: Full cycle timestamp (YYYYMMDDHH) + - cycle_YMD: Cycle date (YYYYMMDD) + - head: System head designation (e.g., 'gfs.t00z.') + """ + current_cycle = self.task_config.current_cycle + + cycle_HH = current_cycle.strftime("%H") + cycle_YMDH = to_YMDH(current_cycle) + cycle_YMD = to_YMD(current_cycle) + + # Build head string (e.g., 'gfs.t00z.') + RUN = self.task_config.RUN + head = f"{RUN}.t{cycle_HH}z." + + return { + 'cycle_HH': cycle_HH, + 'cycle_YMDH': cycle_YMDH, + 'cycle_YMD': cycle_YMD, + 'head': head + } + + @logit(logger) + def _get_template_dict(self) -> Dict[str, str]: + """Create template substitution dictionary for COM path generation. + + This method builds the base dictionary used for template variable substitution. + For GEFS, it includes MEMDIR: 'ensstat' to support ensemble statistics paths. + + Returns + ------- + Dict[str, str] + Template substitution dictionary with keys: + - ROTDIR: Rotating directory path + - RUN: Run type (gfs, gdas, gefs, etc.) + - YMD/PDY: Cycle date (YYYYMMDD) + - HH/cyc: Cycle hour (HH) + - GRID: Grid resolution (added per-call for grid-specific paths) + - MEMDIR: 'ensstat' (GEFS only, for ensemble statistics) + + Examples + -------- + GFS/GDAS: + {'ROTDIR': '/path', 'RUN': 'gfs', 'YMD': '20240101', 'HH': '00', ...} + + GEFS: + {'ROTDIR': '/path', 'RUN': 'gefs', 'YMD': '20240101', 'HH': '00', + 'MEMDIR': 'ensstat', ...} + """ + cycle_vars = self._get_cycle_vars() + + # Base template substitution dictionary + base_dict = { + 'ROTDIR': self.task_config.ROTDIR, + 'RUN': self.task_config.RUN, + 'YMD': cycle_vars['cycle_YMD'], + 'HH': cycle_vars['cycle_HH'], + 'PDY': cycle_vars['cycle_YMD'], + 'cyc': cycle_vars['cycle_HH'] + } + + # GEFS-specific: Add MEMDIR for ensemble statistics + # Corresponds to YAML: '${MEMDIR}': 'ensstat' + if 'gefs' in self.task_config.RUN.lower(): + base_dict['MEMDIR'] = 'ensstat' + + return base_dict + + @logit(logger) + def _calculate_com_paths(self, base_dict: Dict[str, str]) -> Dict[str, str]: + """Calculate all COM paths used across arcdir YAMLs. + + This method generates all ROTDIR-based COM paths with logic for: + - Multiple grids (0p25, 0p50, 1p00) for GRIB files + - GEFS ensemble statistics (COMIN_ATMOS_ENSSTAT_1p00) + - Conditional paths based on RUN, MODE, DO_* flags + - All paths used by gfs_arcdir, gefs_arcdir, gcafs_arcdir YAMLs + + Parameters + ---------- + base_dict : Dict[str, str] + Base template substitution dictionary from _get_template_dict() + + Returns + ------- + Dict[str, str] + Dictionary mapping COM variable names to resolved paths. + Examples: + - COMIN_ATMOS_ANALYSIS: /path/to/analysis + - COMIN_ATMOS_GRIB_0p25: /path/to/grib/0p25 + - COMIN_ATMOS_GRIB_0p50: /path/to/grib/0p50 + - COMIN_ATMOS_GRIB_1p00: /path/to/grib/1p00 + - COMIN_ATMOS_ENSSTAT_1p00: /path/to/ensstat (GEFS only) + """ + com_paths = {} + + # Common paths (always needed) + common_templates = [ + ('COMIN_ATMOS_ANALYSIS', 'COM_ATMOS_ANALYSIS_TMPL'), + ('COMIN_ATMOS_GENESIS', 'COM_ATMOS_GENESIS_TMPL'), + ('COMIN_ATMOS_HISTORY', 'COM_ATMOS_HISTORY_TMPL'), + ('COMIN_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL'), + ('COMIN_CHEM_ANALYSIS', 'COM_CHEM_ANALYSIS_TMPL'), + ('COMIN_SNOW_ANALYSIS', 'COM_SNOW_ANALYSIS_TMPL'), + ('COMIN_OBS', 'COM_OBS_TMPL'), + ('COMOUT_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL'), + ] + + for com_key, template_key in common_templates: + template = self.task_config.get(template_key, '') + if template: + com_paths[com_key] = Template.substitute_string( + template, TemplateConstants.DOLLAR_CURLY_BRACE, base_dict.get) + else: + logger.warning(f"Template {template_key} not found for {com_key}") + com_paths[com_key] = '' + + # Grid-specific paths (loop over grids: 0p25, 0p50, 1p00) + for grid in ["0p25", "0p50", "1p00"]: + grid_dict = base_dict.copy() + grid_dict['GRID'] = grid + + template = self.task_config.get('COM_ATMOS_GRIB_GRID_TMPL', '') + if template: + com_key = f"COMIN_ATMOS_GRIB_{grid}" + com_paths[com_key] = Template.substitute_string( + template, TemplateConstants.DOLLAR_CURLY_BRACE, grid_dict.get) + else: + logger.warning(f"COM_ATMOS_GRIB_GRID_TMPL not found for grid {grid}") + + # GEFS-specific: Ensemble statistics path + # Corresponds to YAML: COMIN_ATMOS_ENSSTAT_1p00 with MEMDIR='ensstat' + if 'gefs' in self.task_config.RUN.lower(): + ensstat_dict = base_dict.copy() + ensstat_dict['GRID'] = '1p00' + # MEMDIR is already in base_dict for GEFS (added by _get_template_dict) + + template = self.task_config.get('COM_ATMOS_GRIB_GRID_TMPL', '') + if template: + com_paths['COMIN_ATMOS_ENSSTAT_1p00'] = Template.substitute_string( + template, TemplateConstants.DOLLAR_CURLY_BRACE, ensstat_dict.get) + else: + logger.warning("COM_ATMOS_GRIB_GRID_TMPL not found for COMIN_ATMOS_ENSSTAT_1p00") + + return com_paths + + @logit(logger) + def gfs_arcdir(self) -> Dict[str, Any]: + """Build complete file set for GFS archiving (gfs_arcdir.yaml.j2). + + This method corresponds to gfs_arcdir.yaml.j2 and builds the complete + file set with all logic, loops, and conditionals for GFS archiving. + + Returns + ------- + Dict[str, Any] + Dictionary containing: + - cycle_vars: Cycle-specific variables + - com_paths: All COM paths + - file_set: List of [source, destination] file pairs + - mkdir_list: List of directories to create + """ + cycle_vars = self._get_cycle_vars() + base_dict = self._get_template_dict() + com_paths = self._calculate_com_paths(base_dict) + + arcdir = self.task_config.ARCDIR + vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") + + file_set = [] + mkdir_list = [arcdir] + + head = cycle_vars['head'] + cycle_YMDH = cycle_vars['cycle_YMDH'] + cycle_YMD = cycle_vars['cycle_YMD'] + cycle_HH = cycle_vars['cycle_HH'] + + RUN = self.task_config.RUN + MODE = self.task_config.get('MODE', 'cycled') + CDUMP = self.task_config.get('CDUMP', RUN) + + # Deterministic files (not enkf) + if "enkf" not in RUN: + # Common deterministic files + det_files = [ + # Log files + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf000.txt", f"{arcdir}/{head}logf000.txt"], + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf001.txt", f"{arcdir}/{head}logf001.txt"], + + # Restart files + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.coupler.res", + f"{arcdir}/{cycle_YMDH}.coupler.res"], + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.fv_core.res.nc", + f"{arcdir}/{cycle_YMDH}.fv_core.res.nc"], + ] + file_set.extend(det_files) + + # Analysis files (cycled mode) + if MODE == "cycled": + det_anl_files = [ + # Analysis files + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atmanl.nc", + f"{arcdir}/{head}atmanl.nc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}sfcanl.nc", + f"{arcdir}/{head}sfcanl.nc"], + + # Radiance diagnostic files + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias", + f"{arcdir}/{head}abias"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_pc", + f"{arcdir}/{head}abias_pc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_air", + f"{arcdir}/{head}abias_air"], + ] + file_set.extend(det_anl_files) + + # GFS-specific files + if RUN == "gfs": + # GRIB2 files for multiple grids + for grid in ["0p25", "0p50", "1p00"]: + com_key = f"COMIN_ATMOS_GRIB_{grid}" + if com_key in com_paths: + # Loop over forecast hours (example: 0 to FHMAX_GFS by FHOUT_GFS) + FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) + FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) + + for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS): + fhr_str = str(fhr).zfill(3) + file_set.append([ + f"{com_paths[com_key]}/{head}pgrb2.{grid}.f{fhr_str}", + f"{arcdir}/{head}pgrb2.{grid}.f{fhr_str}" + ]) + + # Genesis tracker files + if self.task_config.get('DO_GENESIS', False): + file_set.extend([ + [f"{com_paths['COMIN_ATMOS_GENESIS']}/genesis.{cycle_YMDH}.dat", + f"{arcdir}/genesis.{cycle_YMDH}.dat"], + ]) + + # TC tracker files + if self.task_config.get('DO_TRACKER', False): + file_set.extend([ + [f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunix.{cycle_YMDH}", + f"{arcdir}/atcfunix.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_TRACK']}/storms.{cycle_YMDH}", + f"{arcdir}/storms.{cycle_YMDH}"], + ]) + + # Fit2Obs files + if self.task_config.get("DO_FIT2OBS", False): + fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) + mkdir_list.append(fit2obs_dir) + + file_set.extend([ + [f"{com_paths['COMIN_OBS']}/prepbufr.{cycle_YMDH}", + f"{fit2obs_dir}/prepbufr.{cycle_YMDH}"], + [f"{com_paths['COMIN_OBS']}/prepbufr_acft.{cycle_YMDH}", + f"{fit2obs_dir}/prepbufr_acft.{cycle_YMDH}"], + ]) + + # GDAS-specific files + elif RUN == "gdas": + gdas_files = [ + # Analysis increment files + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atminc.nc", + f"{arcdir}/{head}atminc.nc"], + + # Observation files + [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr", + f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr"], + [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles", + f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles"], + ] + file_set.extend(gdas_files) + + else: # Ensemble files (enkfgdas, enkfgfs) + # EnKF ensemble mean and spread files + enkf_files = [ + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}ensmean.nc", + f"{arcdir}/{head}ensmean.nc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}enssprd.nc", + f"{arcdir}/{head}enssprd.nc"], + ] + + # Loop over ensemble members + NMEM_ENS = self.task_config.get('NMEM_ENS', 80) + for mem in range(1, NMEM_ENS + 1): + mem_str = str(mem).zfill(3) + enkf_files.append([ + f"{com_paths['COMIN_ATMOS_ANALYSIS']}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc", + f"{arcdir}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc" + ]) + + file_set.extend(enkf_files) + + return { + 'cycle_vars': cycle_vars, + 'com_paths': com_paths, + 'file_set': file_set, + 'mkdir_list': mkdir_list + } + + @logit(logger) + def gefs_arcdir(self) -> Dict[str, Any]: + """Build complete file set for GEFS archiving (gefs_arcdir.yaml.j2). + + This method corresponds to gefs_arcdir.yaml.j2 and builds the complete + file set for GEFS ensemble forecast archiving. + + Returns + ------- + Dict[str, Any] + Dictionary containing: + - cycle_vars: Cycle-specific variables + - com_paths: All COM paths (includes COMIN_ATMOS_ENSSTAT_1p00) + - file_set: List of [source, destination] file pairs + - mkdir_list: List of directories to create + """ + cycle_vars = self._get_cycle_vars() + base_dict = self._get_template_dict() + com_paths = self._calculate_com_paths(base_dict) + + # GEFS_ARCH is where GEFS ensemble statistics will be archived + # Corresponds to YAML: {% set GEFS_ARCH = ROTDIR ~ "/gefsarch" %} + gefs_arch = os.path.join(self.task_config.ROTDIR, "gefsarch") + + file_set = [] + mkdir_list = [gefs_arch] + + head = cycle_vars['head'] + + # GEFS ensemble statistics files + # Use COMIN_ATMOS_ENSSTAT_1p00 which includes MEMDIR='ensstat' + ensstat_path = com_paths.get('COMIN_ATMOS_ENSSTAT_1p00', '') + + if ensstat_path and os.path.exists(ensstat_path): + # Select ensemble statistics files to archive + # Corresponds to YAML: ensstat_files loop + FHMIN_GFS = self.task_config.get('FHMIN_GFS', 0) + FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) + FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) + + for fhr in range(FHMIN_GFS, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS): + fhr_str = str(fhr).zfill(3) + # Corresponds to YAML: head ~ "mean.pres_." ~ "1p00" ~ ".f" ~ fhr ~ ".grib2" + source_file = f"{ensstat_path}/{head}mean.pres_.1p00.f{fhr_str}.grib2" + file_set.append([source_file, gefs_arch]) + else: + if not ensstat_path: + logger.warning("COMIN_ATMOS_ENSSTAT_1p00 not found in com_paths") + else: + logger.warning(f"COMIN_ATMOS_ENSSTAT_1p00 path does not exist: {ensstat_path}") + + return { + 'cycle_vars': cycle_vars, + 'com_paths': com_paths, + 'file_set': file_set, + 'mkdir_list': mkdir_list + } + + @logit(logger) + def gcafs_arcdir(self) -> Dict[str, Any]: + """Build complete file set for GCAFS archiving (gcafs_arcdir.yaml.j2). + + This method corresponds to gcafs_arcdir.yaml.j2. Currently delegates + to GFS archiving logic as GCAFS uses similar file structure. + + Returns + ------- + Dict[str, Any] + Dictionary containing: + - cycle_vars: Cycle-specific variables + - com_paths: All COM paths + - file_set: List of [source, destination] file pairs + - mkdir_list: List of directories to create + """ + # GCAFS uses same archiving structure as GFS + # If GCAFS-specific logic is needed, implement here + logger.info("GCAFS archiving using GFS archiving logic") + return self.gfs_arcdir() From 34ef1ba28c3c8257880821c370cf2d00169b8e67 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 2 Dec 2025 16:36:38 -0500 Subject: [PATCH 02/29] arch_vrfy var creation yamls and exscripts --- parm/archive/gcafs_arcdir.yaml.j2 | 117 +++----------------- parm/archive/gefs_arcdir.yaml.j2 | 47 +++----- parm/archive/gfs_arcdir.yaml.j2 | 173 +++--------------------------- scripts/exgdas_enkf_earc_vrfy.py | 30 +----- scripts/exglobal_archive_vrfy.py | 38 +------ 5 files changed, 54 insertions(+), 351 deletions(-) diff --git a/parm/archive/gcafs_arcdir.yaml.j2 b/parm/archive/gcafs_arcdir.yaml.j2 index c4a548c4e94..c23649f6e08 100644 --- a/parm/archive/gcafs_arcdir.yaml.j2 +++ b/parm/archive/gcafs_arcdir.yaml.j2 @@ -1,107 +1,20 @@ -{% set cycle_HH = current_cycle | strftime("%H") %} -{% set cycle_YMDH = current_cycle | to_YMDH %} -{% set cycle_YMD = current_cycle | to_YMD %} -{% set head = RUN + ".t" + cycle_HH + "z." %} +# NOTE: Complex logic (cycle vars, COM paths, file set building) has been moved to +# ush/python/pygfs/task/archive_vars.py (ArchiveVrfy.gcafs_arcdir() method) +# This YAML now receives pre-calculated file_set and mkdir_list from Python -# Select data to store in the ARCDIR and VFYARC from deterministic runs -# This file set will contain all source-destination pairs to send to the FileHandler for copying -{% set file_set = [] %} +# The Python code calculates and returns: +# - file_set: List of [source, destination] pairs +# - mkdir_list: List of directories to create +# Note: Currently delegates to GFS archiving logic -# Declare the VFYARC where Fit2Obs data will be sent -{% set VFYARC = ROTDIR ~ "/vrfyarch" %} - -# Deterministic files -{% if "enkf" not in RUN %} - # Common files to be added to both the gcafs and gcdas keys below - {% set det_files = [] %} - - # Deterministic analysis files (generated for cycled experiments) - {% set det_anl_files = [] %} - - {% if DO_AERO_ANL %} - {% do det_anl_files.append([COMIN_CHEM_ANALYSIS ~ "/" ~ head ~ "aerostat.tgz", - ARCDIR ~ "/aerostat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tgz"]) %} - {% endif %} - - {% if DO_PREP_OBS_AERO == True %} - {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroobs", - ARCDIR ~ "/aeroobs." ~ RUN ~ "." ~ cycle_YMDH]) %} - {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroawobs", - ARCDIR ~ "/aeroawobs." ~ RUN ~ "." ~ cycle_YMDH]) %} - {% endif %} - - # GCAFS-specific files - {% set gfs_files = [] %} - {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} - {% do gfs_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", - ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} - {% endfor %} - - # GCAFS Fit2Obs data - {% set fit2obs_files = [] %} - {% for fhr in range(0, FHMAX_FITS + 1, 6) %} - {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} - {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} - {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sfcfile, - VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sfcfile ]) %} - {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sigfile, - VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sigfile ]) %} - {% endfor %} - - # GCDAS-specific files - {% set gdas_files = [] %} - {% for fhr in range(0, FHMAX + 1, FHOUT) %} - {% do gdas_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", - ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} - {% endfor %} - - # Now append the necessary file pairs to file_set - # Common deterministic files - {% set file_set = file_set + det_files %} - {% if MODE == "cycled" %} - {% set file_set = file_set + det_anl_files %} - {% endif %} - - # Run-specific deterministic files - {% if RUN == "gcafs" %} - {% set file_set = file_set + gfs_files %} - # Fit2Obs files - {% if DO_FIT2OBS == True %} - {% set file_set = file_set + fit2obs_files %} - {% endif %} - {% elif RUN == "gcdas" %} - {% set file_set = file_set + gdas_files %} - {% endif %} - -{% else %} # End of deterministic files - - # Ensemble analysis files - {% set enkf_files = [] %} - {% if DO_JEDIATMENS == True %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "atmensstat", - ARCDIR ~ "/atmensstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% else %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "enkfstat.txt", - ARCDIR ~ "/enkfstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "gsistat.ensmean.txt", - ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ~ ".ensmean"]) %} - {% endif %} - - # Construct the final file set - {% set file_set = file_set + enkf_files %} - -{% endif %} - - -# Actually write the yaml +# Directory creation mkdir: - - "{{ ARCDIR }}" - - {% if DO_FIT2OBS == True %} - - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" - {% endif %} +{% for dir_path in mkdir_list %} + - "{{ dir_path }}" +{% endfor %} +# File copying operations copy: - {% for source_dest_pair in file_set %} - - {{ source_dest_pair }} - {% endfor %} +{% for source_dest_pair in file_set %} + - {{ source_dest_pair }} +{% endfor %} diff --git a/parm/archive/gefs_arcdir.yaml.j2 b/parm/archive/gefs_arcdir.yaml.j2 index 2f8da8bd444..a40f9e61971 100644 --- a/parm/archive/gefs_arcdir.yaml.j2 +++ b/parm/archive/gefs_arcdir.yaml.j2 @@ -1,36 +1,19 @@ -{% set cycle_HH = current_cycle | strftime("%H") %} -{% set cycle_YMDH = current_cycle | to_YMDH %} -{% set cycle_YMD = current_cycle | to_YMD %} -{% set head = RUN + ".t" + cycle_HH + "z." %} +# NOTE: Complex logic (cycle vars, COM paths, file set building) has been moved to +# ush/python/pygfs/task/archive_vars.py (ArchiveVrfy.gefs_arcdir() method) +# This YAML now receives pre-calculated file_set and mkdir_list from Python -# Declare the GEFS_ARCH where atmos data will be sent -{% set GEFS_ARCH = ROTDIR ~ "/gefsarch" %} +# The Python code calculates and returns: +# - file_set: List of [source, destination] pairs for GEFS ensstat files +# - mkdir_list: List of directories to create (GEFS_ARCH = ROTDIR/gefsarch) -{% set file_set = [] %} - -{% set tmpl_dict = ({ '${ROTDIR}':ROTDIR, - '${RUN}':RUN, - '${YMD}':cycle_YMD, - '${HH}':cycle_HH, - '${GRID}': '1p00', - '${MEMDIR}': 'ensstat' }) %} - -{% set COMIN_ATMOS_ENSSTAT_1p00 = COM_ATMOS_GRIB_GRID_TMPL | replace_tmpl(tmpl_dict) %} - -# Select ensstat files to copy to the arcdir -{% set ensstat_files = [] %} -{% if path_exists(COMIN_ATMOS_ENSSTAT_1p00) %} - {% for fhr in range(FHMIN_GFS, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS) %} - {% do ensstat_files.append([COMIN_ATMOS_ENSSTAT_1p00 ~ "/" ~ head ~ "mean.pres_." ~ - "1p00" ~ ".f" ~ '%03d'|format(fhr) ~ ".grib2", - GEFS_ARCH]) %} - {% endfor %} -{% endif %} -{% set file_set = ensstat_files %} -# Actually write the yaml +# Directory creation mkdir: - - "{{ GEFS_ARCH }}" +{% for dir_path in mkdir_list %} + - "{{ dir_path }}" +{% endfor %} + +# File copying operations copy: - {% for source_dest_pair in file_set %} - - {{ source_dest_pair }} - {% endfor %} +{% for source_dest_pair in file_set %} + - {{ source_dest_pair }} +{% endfor %} diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index 877691ff5f9..9260b734042 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -1,164 +1,19 @@ -{% set cycle_HH = current_cycle | strftime("%H") %} -{% set cycle_YMDH = current_cycle | to_YMDH %} -{% set cycle_YMD = current_cycle | to_YMD %} -{% set head = RUN + ".t" + cycle_HH + "z." %} +# NOTE: Complex logic (cycle vars, COM paths, file set building) has been moved to +# ush/python/pygfs/task/archive_vars.py (ArchiveVrfy.gfs_arcdir() method) +# This YAML now receives pre-calculated file_set and mkdir_list from Python -# Select data to store in the ARCDIR and VFYARC from deterministic runs -# This file set will contain all source-destination pairs to send to the FileHandler for copying -{% set file_set = [] %} +# The Python code calculates and returns: +# - file_set: List of [source, destination] pairs +# - mkdir_list: List of directories to create -# Declare the VFYARC where Fit2Obs data will be sent -{% set VFYARC = ROTDIR ~ "/vrfyarch" %} - -# Deterministic files -{% if "enkf" not in RUN %} - # Common files to be added to both the gfs and gdas keys below - {% set det_files = [] %} - # Cyclone forecasts, produced for both gdas and gfs cycles - ## Only created if tracking is on and there were systems to track - {% if path_exists(COMIN_ATMOS_TRACK ~ "/atcfunix." ~ RUN ~ "." ~ cycle_YMDH) %} - {% do det_files.append([COMIN_ATMOS_TRACK ~ "/atcfunix." ~ RUN ~ "." ~ cycle_YMDH, - ARCDIR ~"/atcfunix." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do det_files.append([COMIN_ATMOS_TRACK ~ "/atcfunixp." ~ RUN ~ "." ~ cycle_YMDH, - ARCDIR ~ "/atcfunixp." ~ RUN ~ "." ~ cycle_YMDH]) %} - {% endif %} - - # Cyclone tracking data - {% for basin in ["epac", "natl"] %} - {% if path_exists(COMIN_ATMOS_TRACK + "/" + basin) %} - {% do det_files.append([COMIN_ATMOS_TRACK ~ "/" ~ basin, - ARCDIR ~ "/" ~ basin ]) %} - {% endif %} - {% endfor %} - - # Deterministic analysis files (generated for cycled experiments) - {% set det_anl_files = [] %} - # Analysis data (if we are running in cycled mode) - {% do det_anl_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.analysis.grib2", - ARCDIR ~ "/pgbanl." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} - - {% if DO_JEDIATMVAR == True %} - {% do det_anl_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "stat.atm.tar", - ARCDIR ~ "/atmstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% else %} - {% do det_anl_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "gsistat.txt", - ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% endif %} - - {% if DO_JEDISNOWDA == True %} - {% do det_anl_files.append([COMIN_SNOW_ANALYSIS ~ "/" ~ head ~ "snow_analysis.ioda_hofx.tar", - ARCDIR ~ "/snowstat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tar"]) %} - {% endif %} - - {% if DO_AERO_ANL %} - {% do det_anl_files.append([COMIN_CHEM_ANALYSIS ~ "/" ~ head ~ "aerostat.tgz", - ARCDIR ~ "/aerostat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tgz"]) %} - {% endif %} - - {% if DO_PREP_OBS_AERO == True %} - {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroobs", - ARCDIR ~ "/aeroobs." ~ RUN ~ "." ~ cycle_YMDH]) %} - {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroawobs", - ARCDIR ~ "/aeroawobs." ~ RUN ~ "." ~ cycle_YMDH]) %} - {% endif %} - - # GFS-specific files - {% set gfs_files = [] %} - {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} - {% do gfs_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", - ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} - {% endfor %} - - # Cyclone genesis data (only present if there are storms) - {% if path_exists(COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH) %} - {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH, - ARCDIR ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH ]) %} - {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen.altg." ~ cycle_YMDH, - ARCDIR ~ "/storms.gfso.atcf_gen.altg." ~ cycle_YMDH ]) %} - {% endif %} - - {% if path_exists(COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix." ~ cycle_YMDH) %} - {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix." ~ cycle_YMDH, - ARCDIR ~ "/trak.gfso.atcfunix." ~ cycle_YMDH ]) %} - {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix.altg." ~ cycle_YMDH, - ARCDIR ~ "/trak.gfso.atcfunix.altg." ~ cycle_YMDH ]) %} - {% endif %} - - # GFS Fit2Obs data - {% set fit2obs_files = [] %} - {% for fhr in range(0, FHMAX_FITS + 1, 6) %} - {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} - {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} - {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sfcfile, - VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sfcfile ]) %} - {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sigfile, - VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sigfile ]) %} - {% endfor %} - - # GDAS-specific files - {% set gdas_files = [] %} - {% for fhr in range(0, FHMAX + 1, FHOUT) %} - {% do gdas_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", - ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} - {% endfor %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias.txt", - ARCDIR ~ "/abias." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_pc.txt", - ARCDIR ~ "/abias_pc." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_air.txt", - ARCDIR ~ "/abias_air." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_int.txt", - ARCDIR ~ "/abias_int." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "analysis.dtf.a006.nc", - ARCDIR ~ "/dtfanl." ~ RUN ~ "." ~ cycle_YMDH ~ ".nc"]) %} - - # Now append the necessary file pairs to file_set - # Common deterministic files - {% set file_set = file_set + det_files %} - {% if MODE == "cycled" %} - {% set file_set = file_set + det_anl_files %} - {% endif %} - - # Run-specific deterministic files - {% if RUN == "gfs" %} - {% set file_set = file_set + gfs_files %} - # Fit2Obs files - {% if DO_FIT2OBS == True %} - {% set file_set = file_set + fit2obs_files %} - {% endif %} - {% elif RUN == "gdas" %} - {% set file_set = file_set + gdas_files %} - {% endif %} - -{% else %} # End of deterministic files - - # Ensemble analysis files - {% set enkf_files = [] %} - {% if DO_JEDIATMENS == True %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "stat.atm.tar", - ARCDIR ~ "/atmensstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% else %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "enkfstat.txt", - ARCDIR ~ "/enkfstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "gsistat.ensmean.txt", - ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ~ ".ensmean"]) %} - {% endif %} - - # Construct the final file set - {% set file_set = file_set + enkf_files %} - -{% endif %} - - -# Actually write the yaml +# Directory creation mkdir: - - "{{ ARCDIR }}" - - {% if DO_FIT2OBS == True %} - - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" - {% endif %} +{% for dir_path in mkdir_list %} + - "{{ dir_path }}" +{% endfor %} +# File copying operations copy_opt: - {% for source_dest_pair in file_set %} - - {{ source_dest_pair }} - {% endfor %} +{% for source_dest_pair in file_set %} + - {{ source_dest_pair }} +{% endfor %} diff --git a/scripts/exgdas_enkf_earc_vrfy.py b/scripts/exgdas_enkf_earc_vrfy.py index 8d50b6a8962..878a8b3c592 100755 --- a/scripts/exgdas_enkf_earc_vrfy.py +++ b/scripts/exgdas_enkf_earc_vrfy.py @@ -21,34 +21,14 @@ def main(): # Instantiate the ArchiveVrfy object for variable and file set calculation archive_vars = ArchiveVrfy(config) - # Get the NET and RUN type to determine which arcdir method to call - NET = archive.task_config.get('NET', 'gfs') - RUN = archive.task_config.RUN - with chdir(config.ROTDIR): - # Determine which system we're archiving for and call the appropriate method - # EnKF runs use the GFS archiving logic with ensemble-specific handling - logger.info(f"Archiving EnKF data for RUN={RUN}, cycle {archive.task_config.current_cycle}") - - if NET == 'gefs': - arcdir_result = archive_vars.gefs_arcdir() - elif NET == 'gcafs': - arcdir_result = archive_vars.gcafs_arcdir() - else: # gfs, gdas, enkfgdas, enkfgfs - arcdir_result = archive_vars.gfs_arcdir() - - # Extract the file_set and mkdir_list from the result - file_set = arcdir_result['file_set'] - mkdir_list = arcdir_result['mkdir_list'] - - # Construct the arcdir_set in the format expected by execute_store_products - arcdir_set = { - 'mkdir': mkdir_list, - 'copy': file_set - } + # Collect all archive variables in complete arch_dict for YAML templates + # This method handles everything: general vars, NET dispatch, cycle_vars, com_paths, file_set, mkdir_list + arch_dict = archive_vars.get_all_yaml_vars() - logger.info(f"Archiving {len(file_set)} files to {len(mkdir_list)} directories") + # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML + arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) # Populate the product archive (ARCDIR) archive.execute_store_products(arcdir_set) diff --git a/scripts/exglobal_archive_vrfy.py b/scripts/exglobal_archive_vrfy.py index 156e6568713..73d146a6d47 100755 --- a/scripts/exglobal_archive_vrfy.py +++ b/scripts/exglobal_archive_vrfy.py @@ -21,42 +21,14 @@ def main(): # Instantiate the ArchiveVrfy object for variable and file set calculation archive_vars = ArchiveVrfy(config) - # update these keys to be 3 digits if they are part of archive.task_config.keys - for key in ['OCNRES', 'ICERES']: - try: - archive.task_config[key] = f"{archive.task_config[key]:03d}" - except KeyError as ee: - logger.info(f"key ({key}) not found in archive.task_config!") - - # Get the RUN type and NET to determine which arcdir method to call - RUN = archive.task_config.RUN - NET = archive.task_config.get('NET', 'gfs') - with chdir(config.ROTDIR): - # Determine which system we're archiving for and call the appropriate method - if NET == 'gefs': - logger.info(f"Archiving GEFS data for cycle {archive.task_config.current_cycle}") - arcdir_result = archive_vars.gefs_arcdir() - elif NET == 'gcafs': - logger.info(f"Archiving GCAFS data for cycle {archive.task_config.current_cycle}") - arcdir_result = archive_vars.gcafs_arcdir() - else: # gfs, gdas (default) - logger.info(f"Archiving GFS/GDAS data for RUN={RUN}, cycle {archive.task_config.current_cycle}") - arcdir_result = archive_vars.gfs_arcdir() - - # Extract the file_set and mkdir_list from the result - file_set = arcdir_result['file_set'] - mkdir_list = arcdir_result['mkdir_list'] - - # Construct the arcdir_set in the format expected by execute_store_products - arcdir_set = { - 'mkdir': mkdir_list, - 'copy': file_set - } + # Collect all archive variables in complete arch_dict for YAML templates + # This method handles everything: general vars, NET dispatch, cycle_vars, com_paths, file_set, mkdir_list + arch_dict = archive_vars.get_all_yaml_vars() - logger.info(f"Archiving {len(file_set)} files to {len(mkdir_list)} directories") - logger.debug(f"arcdir_set: {arcdir_set}") + # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML + arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) # Populate the product archive (ARCDIR) archive.execute_store_products(arcdir_set) From 295e98a311224d7a7ad1d5fac1341410acac5325 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 2 Dec 2025 17:04:00 -0500 Subject: [PATCH 03/29] create nested function ush/python/pygfs/task/archive_vars.py --- ush/python/pygfs/task/archive_vars.py | 388 +++++++++++++++----------- 1 file changed, 231 insertions(+), 157 deletions(-) diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index 6f325af618f..33d194701d6 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -304,6 +304,178 @@ def _calculate_com_paths(self, base_dict: Dict[str, str]) -> Dict[str, str]: return com_paths + def _build_gfs_list(self, cycle_vars: Dict[str, Any], com_paths: Dict[str, str], + arcdir: str) -> Dict[str, list]: + """Build mkdir list and file set for GFS archiving. + + This method contains nested helper functions to build the directory list + and file set for GFS archiving. + + Parameters + ---------- + cycle_vars : Dict[str, Any] + Cycle-specific variables + com_paths : Dict[str, str] + COM directory paths + arcdir : str + Archive directory path + + Returns + ------- + Dict[str, list] + Dictionary containing 'mkdir_list' and 'file_set' + """ + + def build_mkdir_list() -> list: + """Build list of directories to create for GFS archiving.""" + mkdir_list = [arcdir] + + # Add fit2obs directory if enabled + RUN = self.task_config.RUN + if RUN == "gfs" and self.task_config.get("DO_FIT2OBS", False): + vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") + cycle_YMD = cycle_vars['cycle_YMD'] + cycle_HH = cycle_vars['cycle_HH'] + fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) + mkdir_list.append(fit2obs_dir) + + return mkdir_list + + def build_file_set() -> list: + """Build list of files to archive for GFS.""" + file_set = [] + + head = cycle_vars['head'] + cycle_YMDH = cycle_vars['cycle_YMDH'] + cycle_YMD = cycle_vars['cycle_YMD'] + cycle_HH = cycle_vars['cycle_HH'] + + RUN = self.task_config.RUN + MODE = self.task_config.get('MODE', 'cycled') + CDUMP = self.task_config.get('CDUMP', RUN) + + # Deterministic files (not enkf) + if "enkf" not in RUN: + # Common deterministic files + det_files = [ + # Log files + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf000.txt", f"{arcdir}/{head}logf000.txt"], + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf001.txt", f"{arcdir}/{head}logf001.txt"], + + # Restart files + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.coupler.res", + f"{arcdir}/{cycle_YMDH}.coupler.res"], + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.fv_core.res.nc", + f"{arcdir}/{cycle_YMDH}.fv_core.res.nc"], + ] + file_set.extend(det_files) + + # Analysis files (cycled mode) + if MODE == "cycled": + det_anl_files = [ + # Analysis files + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atmanl.nc", + f"{arcdir}/{head}atmanl.nc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}sfcanl.nc", + f"{arcdir}/{head}sfcanl.nc"], + + # Radiance diagnostic files + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias", + f"{arcdir}/{head}abias"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_pc", + f"{arcdir}/{head}abias_pc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_air", + f"{arcdir}/{head}abias_air"], + ] + file_set.extend(det_anl_files) + + # GFS-specific files + if RUN == "gfs": + # GRIB2 files for multiple grids + for grid in ["0p25", "0p50", "1p00"]: + com_key = f"COMIN_ATMOS_GRIB_{grid}" + if com_key in com_paths: + FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) + FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) + + for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS): + fhr_str = str(fhr).zfill(3) + file_set.append([ + f"{com_paths[com_key]}/{head}pgrb2.{grid}.f{fhr_str}", + f"{arcdir}/{head}pgrb2.{grid}.f{fhr_str}" + ]) + + # Genesis tracker files + if self.task_config.get('DO_GENESIS', False): + file_set.extend([ + [f"{com_paths['COMIN_ATMOS_GENESIS']}/genesis.{cycle_YMDH}.dat", + f"{arcdir}/genesis.{cycle_YMDH}.dat"], + ]) + + # TC tracker files + if self.task_config.get('DO_TRACKER', False): + file_set.extend([ + [f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunix.{cycle_YMDH}", + f"{arcdir}/atcfunix.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_TRACK']}/storms.{cycle_YMDH}", + f"{arcdir}/storms.{cycle_YMDH}"], + ]) + + # Fit2Obs files + if self.task_config.get("DO_FIT2OBS", False): + vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") + fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) + + file_set.extend([ + [f"{com_paths['COMIN_OBS']}/prepbufr.{cycle_YMDH}", + f"{fit2obs_dir}/prepbufr.{cycle_YMDH}"], + [f"{com_paths['COMIN_OBS']}/prepbufr_acft.{cycle_YMDH}", + f"{fit2obs_dir}/prepbufr_acft.{cycle_YMDH}"], + ]) + + # GDAS-specific files + elif RUN == "gdas": + gdas_files = [ + # Analysis increment files + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atminc.nc", + f"{arcdir}/{head}atminc.nc"], + + # Observation files + [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr", + f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr"], + [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles", + f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles"], + ] + file_set.extend(gdas_files) + + else: # Ensemble files (enkfgdas, enkfgfs) + # EnKF ensemble mean and spread files + enkf_files = [ + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}ensmean.nc", + f"{arcdir}/{head}ensmean.nc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}enssprd.nc", + f"{arcdir}/{head}enssprd.nc"], + ] + + # Loop over ensemble members + NMEM_ENS = self.task_config.get('NMEM_ENS', 80) + for mem in range(1, NMEM_ENS + 1): + mem_str = str(mem).zfill(3) + enkf_files.append([ + f"{com_paths['COMIN_ATMOS_ANALYSIS']}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc", + f"{arcdir}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc" + ]) + + file_set.extend(enkf_files) + + return file_set + + # Call nested helper functions + return { + 'mkdir_list': build_mkdir_list(), + 'file_set': build_file_set() + } + @logit(logger) def gfs_arcdir(self) -> Dict[str, Any]: """Build complete file set for GFS archiving (gfs_arcdir.yaml.j2). @@ -325,140 +497,70 @@ def gfs_arcdir(self) -> Dict[str, Any]: com_paths = self._calculate_com_paths(base_dict) arcdir = self.task_config.ARCDIR - vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") - file_set = [] - mkdir_list = [arcdir] + # Build mkdir list and file set using helper method with nested functions + lists = self._build_gfs_list(cycle_vars, com_paths, arcdir) + + return { + 'cycle_vars': cycle_vars, + 'com_paths': com_paths, + 'file_set': lists['file_set'], + 'mkdir_list': lists['mkdir_list'] + } + + def _build_gefs_list(self, cycle_vars: Dict[str, Any], com_paths: Dict[str, str]) -> Dict[str, list]: + """Build mkdir list and file set for GEFS archiving. - head = cycle_vars['head'] - cycle_YMDH = cycle_vars['cycle_YMDH'] - cycle_YMD = cycle_vars['cycle_YMD'] - cycle_HH = cycle_vars['cycle_HH'] + This method contains nested helper functions to build the directory list + and file set for GEFS archiving. - RUN = self.task_config.RUN - MODE = self.task_config.get('MODE', 'cycled') - CDUMP = self.task_config.get('CDUMP', RUN) - - # Deterministic files (not enkf) - if "enkf" not in RUN: - # Common deterministic files - det_files = [ - # Log files - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf000.txt", f"{arcdir}/{head}logf000.txt"], - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf001.txt", f"{arcdir}/{head}logf001.txt"], - - # Restart files - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.coupler.res", - f"{arcdir}/{cycle_YMDH}.coupler.res"], - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.fv_core.res.nc", - f"{arcdir}/{cycle_YMDH}.fv_core.res.nc"], - ] - file_set.extend(det_files) - - # Analysis files (cycled mode) - if MODE == "cycled": - det_anl_files = [ - # Analysis files - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atmanl.nc", - f"{arcdir}/{head}atmanl.nc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}sfcanl.nc", - f"{arcdir}/{head}sfcanl.nc"], - - # Radiance diagnostic files - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias", - f"{arcdir}/{head}abias"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_pc", - f"{arcdir}/{head}abias_pc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_air", - f"{arcdir}/{head}abias_air"], - ] - file_set.extend(det_anl_files) - - # GFS-specific files - if RUN == "gfs": - # GRIB2 files for multiple grids - for grid in ["0p25", "0p50", "1p00"]: - com_key = f"COMIN_ATMOS_GRIB_{grid}" - if com_key in com_paths: - # Loop over forecast hours (example: 0 to FHMAX_GFS by FHOUT_GFS) - FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) - FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) - - for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS): - fhr_str = str(fhr).zfill(3) - file_set.append([ - f"{com_paths[com_key]}/{head}pgrb2.{grid}.f{fhr_str}", - f"{arcdir}/{head}pgrb2.{grid}.f{fhr_str}" - ]) - - # Genesis tracker files - if self.task_config.get('DO_GENESIS', False): - file_set.extend([ - [f"{com_paths['COMIN_ATMOS_GENESIS']}/genesis.{cycle_YMDH}.dat", - f"{arcdir}/genesis.{cycle_YMDH}.dat"], - ]) + Parameters + ---------- + cycle_vars : Dict[str, Any] + Cycle-specific variables + com_paths : Dict[str, str] + COM directory paths - # TC tracker files - if self.task_config.get('DO_TRACKER', False): - file_set.extend([ - [f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunix.{cycle_YMDH}", - f"{arcdir}/atcfunix.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_TRACK']}/storms.{cycle_YMDH}", - f"{arcdir}/storms.{cycle_YMDH}"], - ]) + Returns + ------- + Dict[str, list] + Dictionary containing 'mkdir_list' and 'file_set' + """ + gefs_arch = os.path.join(self.task_config.ROTDIR, "gefsarch") - # Fit2Obs files - if self.task_config.get("DO_FIT2OBS", False): - fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) - mkdir_list.append(fit2obs_dir) + def build_mkdir_list() -> list: + """Build list of directories to create for GEFS archiving.""" + return [gefs_arch] - file_set.extend([ - [f"{com_paths['COMIN_OBS']}/prepbufr.{cycle_YMDH}", - f"{fit2obs_dir}/prepbufr.{cycle_YMDH}"], - [f"{com_paths['COMIN_OBS']}/prepbufr_acft.{cycle_YMDH}", - f"{fit2obs_dir}/prepbufr_acft.{cycle_YMDH}"], - ]) + def build_file_set() -> list: + """Build list of files to archive for GEFS.""" + file_set = [] + head = cycle_vars['head'] - # GDAS-specific files - elif RUN == "gdas": - gdas_files = [ - # Analysis increment files - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atminc.nc", - f"{arcdir}/{head}atminc.nc"], - - # Observation files - [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr", - f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr"], - [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles", - f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles"], - ] - file_set.extend(gdas_files) - - else: # Ensemble files (enkfgdas, enkfgfs) - # EnKF ensemble mean and spread files - enkf_files = [ - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}ensmean.nc", - f"{arcdir}/{head}ensmean.nc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}enssprd.nc", - f"{arcdir}/{head}enssprd.nc"], - ] - - # Loop over ensemble members - NMEM_ENS = self.task_config.get('NMEM_ENS', 80) - for mem in range(1, NMEM_ENS + 1): - mem_str = str(mem).zfill(3) - enkf_files.append([ - f"{com_paths['COMIN_ATMOS_ANALYSIS']}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc", - f"{arcdir}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc" - ]) - - file_set.extend(enkf_files) + # GEFS ensemble statistics files + ensstat_path = com_paths.get('COMIN_ATMOS_ENSSTAT_1p00', '') + + if ensstat_path and os.path.exists(ensstat_path): + FHMIN_GFS = self.task_config.get('FHMIN_GFS', 0) + FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) + FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) + + for fhr in range(FHMIN_GFS, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS): + fhr_str = str(fhr).zfill(3) + source_file = f"{ensstat_path}/{head}mean.pres_.1p00.f{fhr_str}.grib2" + file_set.append([source_file, gefs_arch]) + else: + if not ensstat_path: + logger.warning("COMIN_ATMOS_ENSSTAT_1p00 not found in com_paths") + else: + logger.warning(f"COMIN_ATMOS_ENSSTAT_1p00 path does not exist: {ensstat_path}") + + return file_set + # Call nested helper functions return { - 'cycle_vars': cycle_vars, - 'com_paths': com_paths, - 'file_set': file_set, - 'mkdir_list': mkdir_list + 'mkdir_list': build_mkdir_list(), + 'file_set': build_file_set() } @logit(logger) @@ -481,42 +583,14 @@ def gefs_arcdir(self) -> Dict[str, Any]: base_dict = self._get_template_dict() com_paths = self._calculate_com_paths(base_dict) - # GEFS_ARCH is where GEFS ensemble statistics will be archived - # Corresponds to YAML: {% set GEFS_ARCH = ROTDIR ~ "/gefsarch" %} - gefs_arch = os.path.join(self.task_config.ROTDIR, "gefsarch") - - file_set = [] - mkdir_list = [gefs_arch] - - head = cycle_vars['head'] - - # GEFS ensemble statistics files - # Use COMIN_ATMOS_ENSSTAT_1p00 which includes MEMDIR='ensstat' - ensstat_path = com_paths.get('COMIN_ATMOS_ENSSTAT_1p00', '') - - if ensstat_path and os.path.exists(ensstat_path): - # Select ensemble statistics files to archive - # Corresponds to YAML: ensstat_files loop - FHMIN_GFS = self.task_config.get('FHMIN_GFS', 0) - FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) - FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) - - for fhr in range(FHMIN_GFS, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS): - fhr_str = str(fhr).zfill(3) - # Corresponds to YAML: head ~ "mean.pres_." ~ "1p00" ~ ".f" ~ fhr ~ ".grib2" - source_file = f"{ensstat_path}/{head}mean.pres_.1p00.f{fhr_str}.grib2" - file_set.append([source_file, gefs_arch]) - else: - if not ensstat_path: - logger.warning("COMIN_ATMOS_ENSSTAT_1p00 not found in com_paths") - else: - logger.warning(f"COMIN_ATMOS_ENSSTAT_1p00 path does not exist: {ensstat_path}") + # Build mkdir list and file set using helper method with nested functions + lists = self._build_gefs_list(cycle_vars, com_paths) return { 'cycle_vars': cycle_vars, 'com_paths': com_paths, - 'file_set': file_set, - 'mkdir_list': mkdir_list + 'file_set': lists['file_set'], + 'mkdir_list': lists['mkdir_list'] } @logit(logger) From 662ef4a345b7a5dec0f948616d75df37a3fd5e17 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 2 Dec 2025 23:34:22 -0500 Subject: [PATCH 04/29] add missing key default --- ush/python/pygfs/task/archive_vars.py | 200 ++++++++++++++++---------- 1 file changed, 121 insertions(+), 79 deletions(-) diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index 33d194701d6..97ab46da2a6 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -254,6 +254,11 @@ def _calculate_com_paths(self, base_dict: Dict[str, str]) -> Dict[str, str]: """ com_paths = {} + # Helper function to get template variables with empty string default + def get_with_default(key): + """Return value from base_dict, or empty string if key not found.""" + return base_dict.get(key, '') + # Common paths (always needed) common_templates = [ ('COMIN_ATMOS_ANALYSIS', 'COM_ATMOS_ANALYSIS_TMPL'), @@ -270,7 +275,7 @@ def _calculate_com_paths(self, base_dict: Dict[str, str]) -> Dict[str, str]: template = self.task_config.get(template_key, '') if template: com_paths[com_key] = Template.substitute_string( - template, TemplateConstants.DOLLAR_CURLY_BRACE, base_dict.get) + template, TemplateConstants.DOLLAR_CURLY_BRACE, get_with_default) else: logger.warning(f"Template {template_key} not found for {com_key}") com_paths[com_key] = '' @@ -280,11 +285,16 @@ def _calculate_com_paths(self, base_dict: Dict[str, str]) -> Dict[str, str]: grid_dict = base_dict.copy() grid_dict['GRID'] = grid + # Helper function for grid_dict with empty string default + def get_grid_with_default(key): + """Return value from grid_dict, or empty string if key not found.""" + return grid_dict.get(key, '') + template = self.task_config.get('COM_ATMOS_GRIB_GRID_TMPL', '') if template: com_key = f"COMIN_ATMOS_GRIB_{grid}" com_paths[com_key] = Template.substitute_string( - template, TemplateConstants.DOLLAR_CURLY_BRACE, grid_dict.get) + template, TemplateConstants.DOLLAR_CURLY_BRACE, get_grid_with_default) else: logger.warning(f"COM_ATMOS_GRIB_GRID_TMPL not found for grid {grid}") @@ -295,10 +305,15 @@ def _calculate_com_paths(self, base_dict: Dict[str, str]) -> Dict[str, str]: ensstat_dict['GRID'] = '1p00' # MEMDIR is already in base_dict for GEFS (added by _get_template_dict) + # Helper function for ensstat_dict with empty string default + def get_ensstat_with_default(key): + """Return value from ensstat_dict, or empty string if key not found.""" + return ensstat_dict.get(key, '') + template = self.task_config.get('COM_ATMOS_GRIB_GRID_TMPL', '') if template: com_paths['COMIN_ATMOS_ENSSTAT_1p00'] = Template.substitute_string( - template, TemplateConstants.DOLLAR_CURLY_BRACE, ensstat_dict.get) + template, TemplateConstants.DOLLAR_CURLY_BRACE, get_ensstat_with_default) else: logger.warning("COM_ATMOS_GRIB_GRID_TMPL not found for COMIN_ATMOS_ENSSTAT_1p00") @@ -357,37 +372,43 @@ def build_file_set() -> list: # Deterministic files (not enkf) if "enkf" not in RUN: # Common deterministic files - det_files = [ - # Log files - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf000.txt", f"{arcdir}/{head}logf000.txt"], - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf001.txt", f"{arcdir}/{head}logf001.txt"], - - # Restart files - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.coupler.res", - f"{arcdir}/{cycle_YMDH}.coupler.res"], - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.fv_core.res.nc", - f"{arcdir}/{cycle_YMDH}.fv_core.res.nc"], - ] - file_set.extend(det_files) + if com_paths.get('COMIN_ATMOS_HISTORY'): + det_files = [ + # Log files + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf000.txt", f"{arcdir}/{head}logf000.txt"], + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf001.txt", f"{arcdir}/{head}logf001.txt"], + + # Restart files + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.coupler.res", + f"{arcdir}/{cycle_YMDH}.coupler.res"], + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.fv_core.res.nc", + f"{arcdir}/{cycle_YMDH}.fv_core.res.nc"], + ] + file_set.extend(det_files) + else: + logger.warning("COMIN_ATMOS_HISTORY path not available, skipping history/restart files") # Analysis files (cycled mode) if MODE == "cycled": - det_anl_files = [ - # Analysis files - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atmanl.nc", - f"{arcdir}/{head}atmanl.nc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}sfcanl.nc", - f"{arcdir}/{head}sfcanl.nc"], - - # Radiance diagnostic files - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias", - f"{arcdir}/{head}abias"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_pc", - f"{arcdir}/{head}abias_pc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_air", - f"{arcdir}/{head}abias_air"], - ] - file_set.extend(det_anl_files) + if com_paths.get('COMIN_ATMOS_ANALYSIS'): + det_anl_files = [ + # Analysis files + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atmanl.nc", + f"{arcdir}/{head}atmanl.nc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}sfcanl.nc", + f"{arcdir}/{head}sfcanl.nc"], + + # Radiance diagnostic files + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias", + f"{arcdir}/{head}abias"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_pc", + f"{arcdir}/{head}abias_pc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_air", + f"{arcdir}/{head}abias_air"], + ] + file_set.extend(det_anl_files) + else: + logger.warning("COMIN_ATMOS_ANALYSIS path not available for cycled mode, skipping analysis files") # GFS-specific files if RUN == "gfs": @@ -407,66 +428,87 @@ def build_file_set() -> list: # Genesis tracker files if self.task_config.get('DO_GENESIS', False): - file_set.extend([ - [f"{com_paths['COMIN_ATMOS_GENESIS']}/genesis.{cycle_YMDH}.dat", - f"{arcdir}/genesis.{cycle_YMDH}.dat"], - ]) + if com_paths.get('COMIN_ATMOS_GENESIS'): + file_set.extend([ + [f"{com_paths['COMIN_ATMOS_GENESIS']}/genesis.{cycle_YMDH}.dat", + f"{arcdir}/genesis.{cycle_YMDH}.dat"], + ]) + else: + logger.warning("DO_GENESIS enabled but COMIN_ATMOS_GENESIS path not available") # TC tracker files if self.task_config.get('DO_TRACKER', False): - file_set.extend([ - [f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunix.{cycle_YMDH}", - f"{arcdir}/atcfunix.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_TRACK']}/storms.{cycle_YMDH}", - f"{arcdir}/storms.{cycle_YMDH}"], - ]) + if com_paths.get('COMIN_ATMOS_TRACK'): + file_set.extend([ + [f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunix.{cycle_YMDH}", + f"{arcdir}/atcfunix.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_TRACK']}/storms.{cycle_YMDH}", + f"{arcdir}/storms.{cycle_YMDH}"], + ]) + else: + logger.warning("DO_TRACKER enabled but COMIN_ATMOS_TRACK path not available") # Fit2Obs files if self.task_config.get("DO_FIT2OBS", False): - vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") - fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) - - file_set.extend([ - [f"{com_paths['COMIN_OBS']}/prepbufr.{cycle_YMDH}", - f"{fit2obs_dir}/prepbufr.{cycle_YMDH}"], - [f"{com_paths['COMIN_OBS']}/prepbufr_acft.{cycle_YMDH}", - f"{fit2obs_dir}/prepbufr_acft.{cycle_YMDH}"], - ]) + if com_paths.get('COMIN_OBS'): + vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") + fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) + + file_set.extend([ + [f"{com_paths['COMIN_OBS']}/prepbufr.{cycle_YMDH}", + f"{fit2obs_dir}/prepbufr.{cycle_YMDH}"], + [f"{com_paths['COMIN_OBS']}/prepbufr_acft.{cycle_YMDH}", + f"{fit2obs_dir}/prepbufr_acft.{cycle_YMDH}"], + ]) + else: + logger.warning("DO_FIT2OBS enabled but COMIN_OBS path not available, skipping fit2obs files") # GDAS-specific files elif RUN == "gdas": - gdas_files = [ - # Analysis increment files - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atminc.nc", - f"{arcdir}/{head}atminc.nc"], - - # Observation files - [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr", - f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr"], - [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles", - f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles"], - ] + gdas_files = [] + + # Analysis increment files + if com_paths.get('COMIN_ATMOS_ANALYSIS'): + gdas_files.append([ + f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atminc.nc", + f"{arcdir}/{head}atminc.nc" + ]) + + # Observation files + if com_paths.get('COMIN_OBS'): + gdas_files.extend([ + [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr", + f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr"], + [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles", + f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles"], + ]) + else: + logger.warning("COMIN_OBS path not available for GDAS, skipping observation files") + file_set.extend(gdas_files) else: # Ensemble files (enkfgdas, enkfgfs) - # EnKF ensemble mean and spread files - enkf_files = [ - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}ensmean.nc", - f"{arcdir}/{head}ensmean.nc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}enssprd.nc", - f"{arcdir}/{head}enssprd.nc"], - ] - - # Loop over ensemble members - NMEM_ENS = self.task_config.get('NMEM_ENS', 80) - for mem in range(1, NMEM_ENS + 1): - mem_str = str(mem).zfill(3) - enkf_files.append([ - f"{com_paths['COMIN_ATMOS_ANALYSIS']}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc", - f"{arcdir}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc" - ]) - - file_set.extend(enkf_files) + if com_paths.get('COMIN_ATMOS_ANALYSIS'): + # EnKF ensemble mean and spread files + enkf_files = [ + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}ensmean.nc", + f"{arcdir}/{head}ensmean.nc"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}enssprd.nc", + f"{arcdir}/{head}enssprd.nc"], + ] + + # Loop over ensemble members + NMEM_ENS = self.task_config.get('NMEM_ENS', 80) + for mem in range(1, NMEM_ENS + 1): + mem_str = str(mem).zfill(3) + enkf_files.append([ + f"{com_paths['COMIN_ATMOS_ANALYSIS']}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc", + f"{arcdir}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc" + ]) + + file_set.extend(enkf_files) + else: + logger.warning("COMIN_ATMOS_ANALYSIS path not available for EnKF, skipping ensemble files") return file_set From 45c23c6f387b3892328ccd0ad68af30b63b8a5d9 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Wed, 3 Dec 2025 00:14:44 -0500 Subject: [PATCH 05/29] update ush/python/pygfs/task/archive_vars.py --- ush/python/pygfs/task/archive_vars.py | 462 +++++++++++++++++++------- 1 file changed, 349 insertions(+), 113 deletions(-) diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index 97ab46da2a6..13a378946a2 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -317,6 +317,24 @@ def get_ensstat_with_default(key): else: logger.warning("COM_ATMOS_GRIB_GRID_TMPL not found for COMIN_ATMOS_ENSSTAT_1p00") + # EnKF-specific: Analysis ensemble statistics path + # Uses COM_ATMOS_ANALYSIS_TMPL with MEMDIR='ensstat' for enkfgdas/enkfgfs + if 'enkf' in self.task_config.RUN.lower(): + ensstat_anl_dict = base_dict.copy() + ensstat_anl_dict['MEMDIR'] = 'ensstat' + + # Helper function for ensstat_anl_dict with empty string default + def get_ensstat_anl_with_default(key): + """Return value from ensstat_anl_dict, or empty string if key not found.""" + return ensstat_anl_dict.get(key, '') + + template = self.task_config.get('COM_ATMOS_ANALYSIS_TMPL', '') + if template: + com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT'] = Template.substitute_string( + template, TemplateConstants.DOLLAR_CURLY_BRACE, get_ensstat_anl_with_default) + else: + logger.warning("COM_ATMOS_ANALYSIS_TMPL not found for COMIN_ATMOS_ANALYSIS_ENSSTAT") + return com_paths def _build_gfs_list(self, cycle_vars: Dict[str, Any], com_paths: Dict[str, str], @@ -371,144 +389,188 @@ def build_file_set() -> list: # Deterministic files (not enkf) if "enkf" not in RUN: - # Common deterministic files - if com_paths.get('COMIN_ATMOS_HISTORY'): - det_files = [ - # Log files - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf000.txt", f"{arcdir}/{head}logf000.txt"], - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{head}logf001.txt", f"{arcdir}/{head}logf001.txt"], - - # Restart files - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.coupler.res", - f"{arcdir}/{cycle_YMDH}.coupler.res"], - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{cycle_YMDH}.fv_core.res.nc", - f"{arcdir}/{cycle_YMDH}.fv_core.res.nc"], - ] - file_set.extend(det_files) - else: - logger.warning("COMIN_ATMOS_HISTORY path not available, skipping history/restart files") + # Common deterministic files - Cyclone tracking + det_files = [] + if com_paths.get('COMIN_ATMOS_TRACK'): + # TC tracker files (only if they exist) + atcfunix_file = f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunix.{RUN}.{cycle_YMDH}" + if os.path.exists(atcfunix_file): + det_files.extend([ + [atcfunix_file, f"{arcdir}/atcfunix.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunixp.{RUN}.{cycle_YMDH}", + f"{arcdir}/atcfunixp.{RUN}.{cycle_YMDH}"], + ]) + + # Basin tracking data + for basin in ["epac", "natl"]: + basin_dir = os.path.join(com_paths['COMIN_ATMOS_TRACK'], basin) + if os.path.exists(basin_dir): + det_files.append([basin_dir, f"{arcdir}/{basin}"]) + + file_set.extend(det_files) # Analysis files (cycled mode) if MODE == "cycled": + det_anl_files = [] + + # Analysis grib file + if com_paths.get('COMIN_ATMOS_GRIB_1p00'): + det_anl_files.append([ + f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.analysis.grib2", + f"{arcdir}/pgbanl.{RUN}.{cycle_YMDH}.grib2" + ]) + if com_paths.get('COMIN_ATMOS_ANALYSIS'): - det_anl_files = [ - # Analysis files - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atmanl.nc", - f"{arcdir}/{head}atmanl.nc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}sfcanl.nc", - f"{arcdir}/{head}sfcanl.nc"], - - # Radiance diagnostic files - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias", - f"{arcdir}/{head}abias"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_pc", - f"{arcdir}/{head}abias_pc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_air", - f"{arcdir}/{head}abias_air"], - ] - file_set.extend(det_anl_files) - else: - logger.warning("COMIN_ATMOS_ANALYSIS path not available for cycled mode, skipping analysis files") + # GSI or JEDI atmospheric statistics + if self.task_config.get('DO_JEDIATMVAR', False): + det_anl_files.append([ + f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}stat.atm.tar", + f"{arcdir}/atmstat.{RUN}.{cycle_YMDH}" + ]) + else: + det_anl_files.append([ + f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}gsistat.txt", + f"{arcdir}/gsistat.{RUN}.{cycle_YMDH}" + ]) + + # Snow DA statistics + if self.task_config.get('DO_JEDISNOWDA', False) and com_paths.get('COMIN_SNOW_ANALYSIS'): + det_anl_files.append([ + f"{com_paths['COMIN_SNOW_ANALYSIS']}/{head}snow_analysis.ioda_hofx.tar", + f"{arcdir}/snowstat.{RUN}.{cycle_YMDH}.tar" + ]) + + # Aerosol DA statistics + if self.task_config.get('DO_AERO_ANL', False) and com_paths.get('COMIN_CHEM_ANALYSIS'): + det_anl_files.append([ + f"{com_paths['COMIN_CHEM_ANALYSIS']}/{head}aerostat.tgz", + f"{arcdir}/aerostat.{RUN}.{cycle_YMDH}.tgz" + ]) + + # Aerosol observation files + if self.task_config.get('DO_PREP_OBS_AERO', False) and com_paths.get('COMIN_OBS'): + det_anl_files.extend([ + [f"{com_paths['COMIN_OBS']}/{head}aeroobs", + f"{arcdir}/aeroobs.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_OBS']}/{head}aeroawobs", + f"{arcdir}/aeroawobs.{RUN}.{cycle_YMDH}"], + ]) + + file_set.extend(det_anl_files) # GFS-specific files if RUN == "gfs": - # GRIB2 files for multiple grids - for grid in ["0p25", "0p50", "1p00"]: - com_key = f"COMIN_ATMOS_GRIB_{grid}" - if com_key in com_paths: - FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) - FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) - - for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS): - fhr_str = str(fhr).zfill(3) - file_set.append([ - f"{com_paths[com_key]}/{head}pgrb2.{grid}.f{fhr_str}", - f"{arcdir}/{head}pgrb2.{grid}.f{fhr_str}" - ]) + gfs_files = [] + + # GRIB2 forecast files (only 1p00 grid for archive) + if com_paths.get('COMIN_ATMOS_GRIB_1p00'): + FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) + FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) + + for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS): + fhr_str = str(fhr).zfill(3) + fhr_archive = str(fhr).zfill(2) # Archive uses 2-digit format + gfs_files.append([ + f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.f{fhr_str}.grib2", + f"{arcdir}/pgbf{fhr_archive}.{RUN}.{cycle_YMDH}.grib2" + ]) - # Genesis tracker files - if self.task_config.get('DO_GENESIS', False): - if com_paths.get('COMIN_ATMOS_GENESIS'): - file_set.extend([ - [f"{com_paths['COMIN_ATMOS_GENESIS']}/genesis.{cycle_YMDH}.dat", - f"{arcdir}/genesis.{cycle_YMDH}.dat"], + # Cyclone genesis data (only if files exist) + if com_paths.get('COMIN_ATMOS_GENESIS'): + genesis_file = f"{com_paths['COMIN_ATMOS_GENESIS']}/storms.gfso.atcf_gen.{cycle_YMDH}" + if os.path.exists(genesis_file): + gfs_files.extend([ + [genesis_file, f"{arcdir}/storms.gfso.atcf_gen.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_GENESIS']}/storms.gfso.atcf_gen.altg.{cycle_YMDH}", + f"{arcdir}/storms.gfso.atcf_gen.altg.{cycle_YMDH}"], ]) - else: - logger.warning("DO_GENESIS enabled but COMIN_ATMOS_GENESIS path not available") - - # TC tracker files - if self.task_config.get('DO_TRACKER', False): - if com_paths.get('COMIN_ATMOS_TRACK'): - file_set.extend([ - [f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunix.{cycle_YMDH}", - f"{arcdir}/atcfunix.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_TRACK']}/storms.{cycle_YMDH}", - f"{arcdir}/storms.{cycle_YMDH}"], + + trak_file = f"{com_paths['COMIN_ATMOS_GENESIS']}/trak.gfso.atcfunix.{cycle_YMDH}" + if os.path.exists(trak_file): + gfs_files.extend([ + [trak_file, f"{arcdir}/trak.gfso.atcfunix.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_GENESIS']}/trak.gfso.atcfunix.altg.{cycle_YMDH}", + f"{arcdir}/trak.gfso.atcfunix.altg.{cycle_YMDH}"], ]) - else: - logger.warning("DO_TRACKER enabled but COMIN_ATMOS_TRACK path not available") - # Fit2Obs files + # Fit2Obs files (atm and sfc forecast history files) if self.task_config.get("DO_FIT2OBS", False): - if com_paths.get('COMIN_OBS'): + if com_paths.get('COMIN_ATMOS_HISTORY'): vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) - file_set.extend([ - [f"{com_paths['COMIN_OBS']}/prepbufr.{cycle_YMDH}", - f"{fit2obs_dir}/prepbufr.{cycle_YMDH}"], - [f"{com_paths['COMIN_OBS']}/prepbufr_acft.{cycle_YMDH}", - f"{fit2obs_dir}/prepbufr_acft.{cycle_YMDH}"], - ]) + FHMAX_FITS = self.task_config.get('FHMAX_FITS', 180) + for fhr in range(0, FHMAX_FITS + 1, 6): + fhr_str = str(fhr).zfill(3) + sfcfile = f"{head}sfc.f{fhr_str}.nc" + sigfile = f"{head}atm.f{fhr_str}.nc" + gfs_files.extend([ + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{sfcfile}", + f"{fit2obs_dir}/{sfcfile}"], + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{sigfile}", + f"{fit2obs_dir}/{sigfile}"], + ]) else: - logger.warning("DO_FIT2OBS enabled but COMIN_OBS path not available, skipping fit2obs files") + logger.warning("DO_FIT2OBS enabled but COMIN_ATMOS_HISTORY path not available") + + file_set.extend(gfs_files) # GDAS-specific files elif RUN == "gdas": gdas_files = [] - # Analysis increment files - if com_paths.get('COMIN_ATMOS_ANALYSIS'): - gdas_files.append([ - f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}atminc.nc", - f"{arcdir}/{head}atminc.nc" - ]) + # GRIB2 forecast files + if com_paths.get('COMIN_ATMOS_GRIB_1p00'): + FHMAX = self.task_config.get('FHMAX', 9) + FHOUT = self.task_config.get('FHOUT', 3) + + for fhr in range(0, FHMAX + 1, FHOUT): + fhr_str = str(fhr).zfill(3) + fhr_archive = str(fhr).zfill(2) # Archive uses 2-digit format + gdas_files.append([ + f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.f{fhr_str}.grib2", + f"{arcdir}/pgbf{fhr_archive}.{RUN}.{cycle_YMDH}.grib2" + ]) - # Observation files - if com_paths.get('COMIN_OBS'): + # Radiance bias correction files + if com_paths.get('COMIN_ATMOS_ANALYSIS'): gdas_files.extend([ - [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr", - f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr"], - [f"{com_paths['COMIN_OBS']}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles", - f"{arcdir}/{CDUMP}.t{cycle_HH}z.prepbufr.acft_profiles"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias.txt", + f"{arcdir}/abias.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_pc.txt", + f"{arcdir}/abias_pc.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_air.txt", + f"{arcdir}/abias_air.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_int.txt", + f"{arcdir}/abias_int.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}analysis.dtf.a006.nc", + f"{arcdir}/dtfanl.{RUN}.{cycle_YMDH}.nc"], ]) - else: - logger.warning("COMIN_OBS path not available for GDAS, skipping observation files") file_set.extend(gdas_files) - else: # Ensemble files (enkfgdas, enkfgfs) - if com_paths.get('COMIN_ATMOS_ANALYSIS'): - # EnKF ensemble mean and spread files - enkf_files = [ - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}ensmean.nc", - f"{arcdir}/{head}ensmean.nc"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}enssprd.nc", - f"{arcdir}/{head}enssprd.nc"], - ] - - # Loop over ensemble members - NMEM_ENS = self.task_config.get('NMEM_ENS', 80) - for mem in range(1, NMEM_ENS + 1): - mem_str = str(mem).zfill(3) + else: # Ensemble files (enkfgdas, enkfgfs) - only statistics archived + enkf_files = [] + + # EnKF ensemble statistics (from ensstat directory) + if com_paths.get('COMIN_ATMOS_ANALYSIS_ENSSTAT'): + if self.task_config.get('DO_JEDIATMENS', False): + # JEDI ensemble statistics enkf_files.append([ - f"{com_paths['COMIN_ATMOS_ANALYSIS']}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc", - f"{arcdir}/mem{mem_str}/{head}atmanl.mem{mem_str}.nc" + f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}stat.atm.tar", + f"{arcdir}/atmensstat.{RUN}.{cycle_YMDH}" + ]) + else: + # GSI EnKF statistics + enkf_files.extend([ + [f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}enkfstat.txt", + f"{arcdir}/enkfstat.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}gsistat.ensmean.txt", + f"{arcdir}/gsistat.{RUN}.{cycle_YMDH}.ensmean"], ]) - file_set.extend(enkf_files) else: - logger.warning("COMIN_ATMOS_ANALYSIS path not available for EnKF, skipping ensemble files") + logger.warning("COMIN_ATMOS_ANALYSIS_ENSSTAT path not available for EnKF, skipping ensemble statistics") return file_set @@ -635,12 +697,175 @@ def gefs_arcdir(self) -> Dict[str, Any]: 'mkdir_list': lists['mkdir_list'] } + def _build_gcafs_list(self, cycle_vars: Dict[str, Any], com_paths: Dict[str, str], + arcdir: str) -> Dict[str, list]: + """Build mkdir list and file set for GCAFS archiving. + + This method contains nested helper functions to build the directory list + and file set for GCAFS archiving. GCAFS is simpler than GFS - mainly + forecast files and optional aerosol files. + + Parameters + ---------- + cycle_vars : Dict[str, Any] + Cycle-specific variables + com_paths : Dict[str, str] + COM directory paths + arcdir : str + Archive directory path + + Returns + ------- + Dict[str, list] + Dictionary containing 'mkdir_list' and 'file_set' + """ + + def build_mkdir_list() -> list: + """Build list of directories to create for GCAFS archiving.""" + mkdir_list = [arcdir] + + # Add fit2obs directory if enabled + RUN = self.task_config.RUN + if self.task_config.get("DO_FIT2OBS", False): + vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") + cycle_YMD = cycle_vars['cycle_YMD'] + cycle_HH = cycle_vars['cycle_HH'] + fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) + mkdir_list.append(fit2obs_dir) + + return mkdir_list + + def build_file_set() -> list: + """Build list of files to archive for GCAFS.""" + file_set = [] + + head = cycle_vars['head'] + cycle_YMDH = cycle_vars['cycle_YMDH'] + cycle_YMD = cycle_vars['cycle_YMD'] + cycle_HH = cycle_vars['cycle_HH'] + + RUN = self.task_config.RUN + MODE = self.task_config.get('MODE', 'cycled') + + # Deterministic files (not enkf) + if "enkf" not in RUN: + # Analysis files (cycled mode) - only aerosol for GCAFS + if MODE == "cycled": + det_anl_files = [] + + # Aerosol DA statistics + if self.task_config.get('DO_AERO_ANL', False) and com_paths.get('COMIN_CHEM_ANALYSIS'): + det_anl_files.append([ + f"{com_paths['COMIN_CHEM_ANALYSIS']}/{head}aerostat.tgz", + f"{arcdir}/aerostat.{RUN}.{cycle_YMDH}.tgz" + ]) + + # Aerosol observation files + if self.task_config.get('DO_PREP_OBS_AERO', False) and com_paths.get('COMIN_OBS'): + det_anl_files.extend([ + [f"{com_paths['COMIN_OBS']}/{head}aeroobs", + f"{arcdir}/aeroobs.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_OBS']}/{head}aeroawobs", + f"{arcdir}/aeroawobs.{RUN}.{cycle_YMDH}"], + ]) + + file_set.extend(det_anl_files) + + # GCAFS-specific forecast files + if RUN == "gcafs": + gcafs_files = [] + + # GRIB2 forecast files (only 1p00 grid for archive) + if com_paths.get('COMIN_ATMOS_GRIB_1p00'): + FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) + FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) + + for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS): + fhr_str = str(fhr).zfill(3) + fhr_archive = str(fhr).zfill(2) # Archive uses 2-digit format + gcafs_files.append([ + f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.f{fhr_str}.grib2", + f"{arcdir}/pgbf{fhr_archive}.{RUN}.{cycle_YMDH}.grib2" + ]) + + # Fit2Obs files (atm and sfc forecast history files) + if self.task_config.get("DO_FIT2OBS", False): + if com_paths.get('COMIN_ATMOS_HISTORY'): + vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") + fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) + + FHMAX_FITS = self.task_config.get('FHMAX_FITS', 180) + for fhr in range(0, FHMAX_FITS + 1, 6): + fhr_str = str(fhr).zfill(3) + sfcfile = f"{head}sfc.f{fhr_str}.nc" + sigfile = f"{head}atm.f{fhr_str}.nc" + gcafs_files.extend([ + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{sfcfile}", + f"{fit2obs_dir}/{sfcfile}"], + [f"{com_paths['COMIN_ATMOS_HISTORY']}/{sigfile}", + f"{fit2obs_dir}/{sigfile}"], + ]) + else: + logger.warning("DO_FIT2OBS enabled but COMIN_ATMOS_HISTORY path not available") + + file_set.extend(gcafs_files) + + # GCDAS-specific forecast files + elif RUN == "gcdas": + gcdas_files = [] + + # GRIB2 forecast files + if com_paths.get('COMIN_ATMOS_GRIB_1p00'): + FHMAX = self.task_config.get('FHMAX', 9) + FHOUT = self.task_config.get('FHOUT', 3) + + for fhr in range(0, FHMAX + 1, FHOUT): + fhr_str = str(fhr).zfill(3) + fhr_archive = str(fhr).zfill(2) # Archive uses 2-digit format + gcdas_files.append([ + f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.f{fhr_str}.grib2", + f"{arcdir}/pgbf{fhr_archive}.{RUN}.{cycle_YMDH}.grib2" + ]) + + file_set.extend(gcdas_files) + + else: # Ensemble files (enkfgcafs, enkfgcdas) - only statistics archived + enkf_files = [] + + # EnKF ensemble statistics (from ensstat directory) + if com_paths.get('COMIN_ATMOS_ANALYSIS_ENSSTAT'): + if self.task_config.get('DO_JEDIATMENS', False): + # JEDI ensemble statistics - NOTE: GCAFS uses different filename + enkf_files.append([ + f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}atmensstat", + f"{arcdir}/atmensstat.{RUN}.{cycle_YMDH}" + ]) + else: + # GSI EnKF statistics + enkf_files.extend([ + [f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}enkfstat.txt", + f"{arcdir}/enkfstat.{RUN}.{cycle_YMDH}"], + [f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}gsistat.ensmean.txt", + f"{arcdir}/gsistat.{RUN}.{cycle_YMDH}.ensmean"], + ]) + file_set.extend(enkf_files) + else: + logger.warning("COMIN_ATMOS_ANALYSIS_ENSSTAT path not available for EnKF, skipping ensemble statistics") + + return file_set + + # Call nested helper functions + return { + 'mkdir_list': build_mkdir_list(), + 'file_set': build_file_set() + } + @logit(logger) def gcafs_arcdir(self) -> Dict[str, Any]: """Build complete file set for GCAFS archiving (gcafs_arcdir.yaml.j2). - This method corresponds to gcafs_arcdir.yaml.j2. Currently delegates - to GFS archiving logic as GCAFS uses similar file structure. + This method corresponds to gcafs_arcdir.yaml.j2. GCAFS is simpler than GFS, + archiving mainly forecast files and optional aerosol files. Returns ------- @@ -651,7 +876,18 @@ def gcafs_arcdir(self) -> Dict[str, Any]: - file_set: List of [source, destination] file pairs - mkdir_list: List of directories to create """ - # GCAFS uses same archiving structure as GFS - # If GCAFS-specific logic is needed, implement here - logger.info("GCAFS archiving using GFS archiving logic") - return self.gfs_arcdir() + cycle_vars = self._get_cycle_vars() + base_dict = self._get_template_dict() + com_paths = self._calculate_com_paths(base_dict) + + arcdir = self.task_config.ARCDIR + + # Build mkdir list and file set using helper method with nested functions + lists = self._build_gcafs_list(cycle_vars, com_paths, arcdir) + + return { + 'cycle_vars': cycle_vars, + 'com_paths': com_paths, + 'file_set': lists['file_set'], + 'mkdir_list': lists['mkdir_list'] + } From 8c0bee54a3db0fee2b90f7cdc6c81568233f180c Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Thu, 4 Dec 2025 00:01:46 +0000 Subject: [PATCH 06/29] update all files --- parm/archive/gcafs_arcdir.yaml.j2 | 114 +++- parm/archive/gefs_arcdir.yaml.j2 | 35 +- parm/archive/gfs_arcdir.yaml.j2 | 164 ++++- scripts/exgdas_enkf_earc_vrfy.py | 2 +- ush/python/pygfs/task/archive_vars.py | 919 ++++++-------------------- 5 files changed, 462 insertions(+), 772 deletions(-) diff --git a/parm/archive/gcafs_arcdir.yaml.j2 b/parm/archive/gcafs_arcdir.yaml.j2 index c23649f6e08..5cb27e4d830 100644 --- a/parm/archive/gcafs_arcdir.yaml.j2 +++ b/parm/archive/gcafs_arcdir.yaml.j2 @@ -1,20 +1,104 @@ -# NOTE: Complex logic (cycle vars, COM paths, file set building) has been moved to -# ush/python/pygfs/task/archive_vars.py (ArchiveVrfy.gcafs_arcdir() method) -# This YAML now receives pre-calculated file_set and mkdir_list from Python +# Variables provided by archive_vars.py: +# - cycle_HH, cycle_YMDH, cycle_YMD, head +# - VFYARC +# - All COMIN_* paths -# The Python code calculates and returns: -# - file_set: List of [source, destination] pairs -# - mkdir_list: List of directories to create -# Note: Currently delegates to GFS archiving logic +# Select data to store in the ARCDIR and VFYARC from deterministic runs +# This file set will contain all source-destination pairs to send to the FileHandler for copying +{% set file_set = [] %} -# Directory creation +# Deterministic files +{% if "enkf" not in RUN %} + # Common files to be added to both the gcafs and gcdas keys below + {% set det_files = [] %} + + # Deterministic analysis files (generated for cycled experiments) + {% set det_anl_files = [] %} + + {% if DO_AERO_ANL %} + {% do det_anl_files.append([COMIN_CHEM_ANALYSIS ~ "/" ~ head ~ "aerostat.tgz", + ARCDIR ~ "/aerostat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tgz"]) %} + {% endif %} + + {% if DO_PREP_OBS_AERO == True %} + {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroobs", + ARCDIR ~ "/aeroobs." ~ RUN ~ "." ~ cycle_YMDH]) %} + {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroawobs", + ARCDIR ~ "/aeroawobs." ~ RUN ~ "." ~ cycle_YMDH]) %} + {% endif %} + + # GCAFS-specific files + {% set gfs_files = [] %} + {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} + {% do gfs_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", + ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} + {% endfor %} + + # GCAFS Fit2Obs data + {% set fit2obs_files = [] %} + {% for fhr in range(0, FHMAX_FITS + 1, 6) %} + {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} + {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} + {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sfcfile, + VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sfcfile ]) %} + {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sigfile, + VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sigfile ]) %} + {% endfor %} + + # GCDAS-specific files + {% set gdas_files = [] %} + {% for fhr in range(0, FHMAX + 1, FHOUT) %} + {% do gdas_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", + ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} + {% endfor %} + + # Now append the necessary file pairs to file_set + # Common deterministic files + {% set file_set = file_set + det_files %} + {% if MODE == "cycled" %} + {% set file_set = file_set + det_anl_files %} + {% endif %} + + # Run-specific deterministic files + {% if RUN == "gcafs" %} + {% set file_set = file_set + gfs_files %} + # Fit2Obs files + {% if DO_FIT2OBS == True %} + {% set file_set = file_set + fit2obs_files %} + {% endif %} + {% elif RUN == "gcdas" %} + {% set file_set = file_set + gdas_files %} + {% endif %} + +{% else %} # End of deterministic files + + # Ensemble analysis files + {% set enkf_files = [] %} + {% if DO_JEDIATMENS == True %} + {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "atmensstat", + ARCDIR ~ "/atmensstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% else %} + {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "enkfstat.txt", + ARCDIR ~ "/enkfstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "gsistat.ensmean.txt", + ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ~ ".ensmean"]) %} + {% endif %} + + # Construct the final file set + {% set file_set = file_set + enkf_files %} + +{% endif %} + + +# Actually write the yaml mkdir: -{% for dir_path in mkdir_list %} - - "{{ dir_path }}" -{% endfor %} + - "{{ ARCDIR }}" + + {% if DO_FIT2OBS == True %} + - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" + {% endif %} -# File copying operations copy: -{% for source_dest_pair in file_set %} - - {{ source_dest_pair }} -{% endfor %} + {% for source_dest_pair in file_set %} + - {{ source_dest_pair }} + {% endfor %} diff --git a/parm/archive/gefs_arcdir.yaml.j2 b/parm/archive/gefs_arcdir.yaml.j2 index a40f9e61971..1d63e2b1954 100644 --- a/parm/archive/gefs_arcdir.yaml.j2 +++ b/parm/archive/gefs_arcdir.yaml.j2 @@ -1,19 +1,24 @@ -# NOTE: Complex logic (cycle vars, COM paths, file set building) has been moved to -# ush/python/pygfs/task/archive_vars.py (ArchiveVrfy.gefs_arcdir() method) -# This YAML now receives pre-calculated file_set and mkdir_list from Python +# Variables provided by archive_vars.py: +# - cycle_HH, cycle_YMDH, cycle_YMD, head +# - VFYARC (archive directory) +# - COMIN_ATMOS_ENSSTAT_1p00 (calculated in Python with MEMDIR='ensstat') -# The Python code calculates and returns: -# - file_set: List of [source, destination] pairs for GEFS ensstat files -# - mkdir_list: List of directories to create (GEFS_ARCH = ROTDIR/gefsarch) +{% set file_set = [] %} -# Directory creation +# Select ensstat files to copy to the arcdir +{% set ensstat_files = [] %} +{% if path_exists(COMIN_ATMOS_ENSSTAT_1p00) %} + {% for fhr in range(FHMIN_GFS, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS) %} + {% do ensstat_files.append([COMIN_ATMOS_ENSSTAT_1p00 ~ "/" ~ head ~ "mean.pres_." ~ + "1p00" ~ ".f" ~ '%03d'|format(fhr) ~ ".grib2", + VFYARC]) %} + {% endfor %} +{% endif %} +{% set file_set = ensstat_files %} +# Actually write the yaml mkdir: -{% for dir_path in mkdir_list %} - - "{{ dir_path }}" -{% endfor %} - -# File copying operations + - "{{ VFYARC }}" copy: -{% for source_dest_pair in file_set %} - - {{ source_dest_pair }} -{% endfor %} + {% for source_dest_pair in file_set %} + - {{ source_dest_pair }} + {% endfor %} diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index 9260b734042..82dcde907a7 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -1,19 +1,151 @@ -# NOTE: Complex logic (cycle vars, COM paths, file set building) has been moved to -# ush/python/pygfs/task/archive_vars.py (ArchiveVrfy.gfs_arcdir() method) -# This YAML now receives pre-calculated file_set and mkdir_list from Python +# Variables provided by archive_vars.py: +# - cycle_HH, cycle_YMDH, cycle_YMD, head +# - VFYARC, GEFS_ARCH +# - All COMIN_* paths -# The Python code calculates and returns: -# - file_set: List of [source, destination] pairs -# - mkdir_list: List of directories to create +# Select data to store in the ARCDIR and VFYARC from deterministic runs +# This file set will contain all source-destination pairs to send to the FileHandler for copying +{% set file_set = [] %} -# Directory creation +# Deterministic files +{% if "enkf" not in RUN %} + # Common files to be added to both the gfs and gdas keys below + {% set det_files = [] %} + # Cyclone forecasts, produced for both gdas and gfs cycles + ## Only created if tracking is on and there were systems to track + {% if path_exists(COMIN_ATMOS_TRACK ~ "/atcfunix." ~ RUN ~ "." ~ cycle_YMDH) %} + {% do det_files.append([COMIN_ATMOS_TRACK ~ "/atcfunix." ~ RUN ~ "." ~ cycle_YMDH, + ARCDIR ~"/atcfunix." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% do det_files.append([COMIN_ATMOS_TRACK ~ "/atcfunixp." ~ RUN ~ "." ~ cycle_YMDH, + ARCDIR ~ "/atcfunixp." ~ RUN ~ "." ~ cycle_YMDH]) %} + {% endif %} + + # Cyclone tracking data + {% for basin in ["epac", "natl"] %} + {% if path_exists(COMIN_ATMOS_TRACK + "/" + basin) %} + {% do det_files.append([COMIN_ATMOS_TRACK ~ "/" ~ basin, + ARCDIR ~ "/" ~ basin ]) %} + {% endif %} + {% endfor %} + + # Deterministic analysis files (generated for cycled experiments) + {% set det_anl_files = [] %} + # Analysis data (if we are running in cycled mode) + {% do det_anl_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.analysis.grib2", + ARCDIR ~ "/pgbanl." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} + + {% if DO_JEDIATMVAR == True %} + {% do det_anl_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "stat.atm.tar", + ARCDIR ~ "/atmstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% else %} + {% do det_anl_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "gsistat.txt", + ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% endif %} + + {% if DO_JEDISNOWDA == True %} + {% do det_anl_files.append([COMIN_SNOW_ANALYSIS ~ "/" ~ head ~ "snow_analysis.ioda_hofx.tar", + ARCDIR ~ "/snowstat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tar"]) %} + {% endif %} + + {% if DO_AERO_ANL %} + {% do det_anl_files.append([COMIN_CHEM_ANALYSIS ~ "/" ~ head ~ "aerostat.tgz", + ARCDIR ~ "/aerostat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tgz"]) %} + {% endif %} + + {% if DO_PREP_OBS_AERO == True %} + {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroobs", + ARCDIR ~ "/aeroobs." ~ RUN ~ "." ~ cycle_YMDH]) %} + {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroawobs", + ARCDIR ~ "/aeroawobs." ~ RUN ~ "." ~ cycle_YMDH]) %} + {% endif %} + + # GFS-specific files + {% set gfs_files = [] %} + {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} + {% do gfs_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", + ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} + {% endfor %} + + # Cyclone genesis data (only present if there are storms) + {% if path_exists(COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH) %} + {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH, + ARCDIR ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH ]) %} + {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen.altg." ~ cycle_YMDH, + ARCDIR ~ "/storms.gfso.atcf_gen.altg." ~ cycle_YMDH ]) %} + {% endif %} + + {% if path_exists(COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix." ~ cycle_YMDH) %} + {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix." ~ cycle_YMDH, + ARCDIR ~ "/trak.gfso.atcfunix." ~ cycle_YMDH ]) %} + {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix.altg." ~ cycle_YMDH, + ARCDIR ~ "/trak.gfso.atcfunix.altg." ~ cycle_YMDH ]) %} + {% endif %} + + # GFS Fit2Obs data + {% set fit2obs_files = [] %} + {% for fhr in range(0, FHMAX_FITS + 1, 6) %} + {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} + {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} + {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sfcfile, + VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sfcfile ]) %} + {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sigfile, + VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sigfile ]) %} + {% endfor %} + + # GDAS-specific files + {% set gdas_files = [] %} + {% for fhr in range(0, FHMAX + 1, FHOUT) %} + {% do gdas_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", + ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} + {% endfor %} + + # Now append the necessary file pairs to file_set + # Common deterministic files + {% set file_set = file_set + det_files %} + {% if MODE == "cycled" %} + {% set file_set = file_set + det_anl_files %} + {% endif %} + + # Run-specific deterministic files + {% if RUN == "gfs" %} + {% set file_set = file_set + gfs_files %} + # Fit2Obs files + {% if DO_FIT2OBS == True %} + {% set file_set = file_set + fit2obs_files %} + {% endif %} + {% elif RUN == "gdas" %} + {% set file_set = file_set + gdas_files %} + {% endif %} + +{% else %} # End of deterministic files + + # Ensemble analysis files + {% set enkf_files = [] %} + {% if DO_JEDIATMENS == True %} + {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "stat.atm.tar", + ARCDIR ~ "/atmensstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% else %} + {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "enkfstat.txt", + ARCDIR ~ "/enkfstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "gsistat.ensmean.txt", + ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ~ ".ensmean"]) %} + {% endif %} + + # Construct the final file set + {% set file_set = file_set + enkf_files %} + +{% endif %} + + +# Actually write the yaml mkdir: -{% for dir_path in mkdir_list %} - - "{{ dir_path }}" -{% endfor %} - -# File copying operations -copy_opt: -{% for source_dest_pair in file_set %} - - {{ source_dest_pair }} -{% endfor %} + - "{{ ARCDIR }}" + + {% if DO_FIT2OBS == True %} + - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" + {% endif %} + +copy: + {% for source_dest_pair in file_set %} + - {{ source_dest_pair }} + {% endfor %} diff --git a/scripts/exgdas_enkf_earc_vrfy.py b/scripts/exgdas_enkf_earc_vrfy.py index 878a8b3c592..77255faded9 100755 --- a/scripts/exgdas_enkf_earc_vrfy.py +++ b/scripts/exgdas_enkf_earc_vrfy.py @@ -18,7 +18,7 @@ def main(): # Instantiate the Archive object for execute_store_products archive = Archive(config) - # Instantiate the ArchiveVrfy object for variable and file set calculation + # Instantiate the ArchiveVrfy object to collect variables to render YAML archive_vars = ArchiveVrfy(config) with chdir(config.ROTDIR): diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index 13a378946a2..c64d3abfb94 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -4,34 +4,42 @@ Overview -------- -This module constructs cycle-specific COM directory path variables and file sets -required for archiving verification (vrfy) data for GFS, GEFS, and GCAFS systems. +This module provides variables needed by YAML templates for archiving verification +(vrfy) data for GFS, GEFS, and GCAFS systems. File set generation logic (loops, +conditionals, path construction) is handled by the YAML templates themselves. Architecture ------------ -- One method (_calculate_com_paths) for all COM variables with logic for grids, loops -- Separate method for each arcdir YAML that builds complete file sets: - * gfs_arcdir() - corresponds to gfs_arcdir.yaml.j2 - * gefs_arcdir() - corresponds to gefs_arcdir.yaml.j2 - * gcafs_arcdir() - corresponds to gcafs_arcdir.yaml.j2 +Python provides VARIABLES → YAML templates build FILE SETS + +Python Code Responsibilities: + - Compute cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) + - Calculate COM directory paths with grid loops (0p25, 0p50, 1p00) + - Extract configuration keys (RUN, DO_* flags, FHMAX*, etc.) + - Provide complete arch_dict to YAML templates + +YAML Template Responsibilities (parm/archive/*_arcdir.yaml.j2): + - Build file sets with source → destination mappings + - Handle loops (forecast hours, grids, basins) + - Apply conditionals (DO_* flags, MODE, RUN type) + - Create mkdir lists for directory creation Key Methods ----------- +get_all_yaml_vars(): + Main entry point - collects all variables for YAML templates + +add_config_vars(): + Extracts configuration keys and COM* template variables + _get_cycle_vars(): Computes cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) _calculate_com_paths(): - Generates all COM paths (ROTDIR-based) used across all arcdir YAMLs, - including grid loops (0p25, 0p50, 1p00) and conditional path logic - -gfs_arcdir(): - Complete file set generation for GFS archiving (gfs_arcdir.yaml.j2) + Generates all COM paths (ROTDIR-based) with grid loops and conditional logic -gefs_arcdir(): - Complete file set generation for GEFS archiving (gefs_arcdir.yaml.j2) - -gcafs_arcdir(): - Complete file set generation for GCAFS archiving (gcafs_arcdir.yaml.j2) +_get_template_dict(): + Creates base template substitution dictionary Logging ------- @@ -49,13 +57,14 @@ class ArchiveVrfy(Task): """ Task class for archive verification operations. - This class handles archiving for three systems: + This class provides variables for YAML templates that handle archiving + for three systems: - GFS: Global Forecast System - GEFS: Global Ensemble Forecast System - GCAFS: Global Climate Analysis Forecast System - Each system has a corresponding method that builds the complete file set - for archiving, corresponding to the respective arcdir YAML file. + The YAML templates (parm/archive/*_arcdir.yaml.j2) contain all file set + generation logic. This class only provides the variables they need. """ @logit(logger, name="ArchiveVrfy") @@ -70,85 +79,112 @@ def __init__(self, config: Dict[str, Any]) -> None: super().__init__(config) @logit(logger) - def add_general_vars(self) -> None: - """Format general variables for archive operations. + def get_all_yaml_vars(self) -> Dict[str, Any]: + """Collect all variables needed for YAML templates. - Updates resolution variables to be 3-digit formatted strings. - This ensures consistent naming conventions for ocean and ice resolution - specifications (e.g., 25 -> '025', 100 -> '100'). + This method provides only the VARIABLES needed by the YAML templates + (cycle vars, COM paths, config keys). The YAML templates handle all + file set generation logic (loops, conditionals, path construction). - Variables updated (if present in task_config): - - OCNRES: Ocean resolution - - ICERES: Ice resolution + Returns + ------- + Dict[str, Any] + Dictionary containing variables for Jinja2 templates: + - cycle_HH, cycle_YMDH, cycle_YMD, head: Cycle-specific variables + - COMIN_*, COMOUT_*, COM_*: All COM directory paths + - Config keys: RUN, PSLOT, ROTDIR, DO_* flags, FHMAX*, etc. + + Notes + ----- + File set generation (mkdir lists, copy operations) is handled entirely + by the YAML templates. This method only provides the variables they need. """ - # Update these keys to be 3 digits if they are part of task_config - for key in ['OCNRES', 'ICERES']: - try: - self.task_config[key] = f"{self.task_config[key]:03d}" - except KeyError: - logger.info(f"key ({key}) not found in task_config") + # Build arch_dict with variables for Jinja2 templates + arch_dict = {} + + # Add config variables (config keys, COM* variables) + arch_dict.update(self.add_config_vars()) + + # Add cycle-specific variables + arch_dict.update(self._get_cycle_vars()) + + # Add COM paths + base_dict = self._get_template_dict() + template_specs = self._get_com_template_specs() + arch_dict.update(self._construct_com_paths(base_dict, template_specs)) + + logger.info(f"Collected {len(arch_dict)} variables for YAML templates") + logger.debug(f"arch_dict keys: {list(arch_dict.keys())}") + + return arch_dict @logit(logger) - def get_all_yaml_vars(self) -> Dict[str, Any]: - """Collect all archive variables and build complete arch_dict for YAML templates. + def add_config_vars(self) -> Dict[str, Any]: + """Collect and format general variables for archive operations. This method: - 1. Formats general variables (OCNRES, ICERES) - 2. Determines system type (GFS, GEFS, GCAFS) from NET - 3. Dispatches to appropriate system-specific method - 4. Builds complete arch_dict with all task_config and archive variables + 1. Updates resolution variables to be 3-digit formatted strings (if present) + 2. Extracts all required configuration keys for archiving + 3. Collects all COM* directory and template variables + 4. Returns complete dictionary ready for arch_dict + + Variables updated (if present in task_config): + - OCNRES: Ocean resolution (formatted to 3 digits) + - ICERES: Ice resolution (formatted to 3 digits) + + Configuration keys extracted (if present): + - current_cycle, RUN, PSLOT, ROTDIR, PARMgfs, ARCDIR, MODE + - DO_JEDIATMENS, DO_FIT2OBS, DO_JEDIATMVAR, DO_JEDISNOWDA + - DO_AERO_ANL, DO_PREP_OBS_AERO, DO_GSISOILDA, DO_LAND_IAU + - NET, FHOUT_GFS, FHMAX_HF_GFS, FHMAX_FITS, FHMAX, FHOUT, FHMAX_GFS + - FHMIN_GFS (if present in task_config) + + COM variable prefixes collected: + - COM_, COMIN_, COMOUT_ Returns ------- Dict[str, Any] - Complete arch_dict ready for configure_vrfy() and Jinja2 templates, - containing all task_config variables plus: - - cycle_HH, cycle_YMDH, cycle_YMD, head: Cycle-specific variables - - COMIN_*: All COM directory paths - - file_set: List of [source, destination] file pairs for archiving - - mkdir_list: List of directories to create + Dictionary containing all general archive variables Notes ----- - The NET variable determines which archiving method is called: - - NET='gefs' → gefs_arcdir() - - NET='gcafs' → gcafs_arcdir() - - Otherwise → gfs_arcdir() (handles gfs, gdas, enkfgdas, enkfgfs) + Missing keys will be silently skipped (not added to general_dict). """ - # Format general variables (e.g., OCNRES, ICERES to 3-digit strings) - self.add_general_vars() + general_dict = {} - NET = self.task_config.get('NET', 'gfs') - RUN = self.task_config.RUN - - # Dispatch to appropriate system-specific method based on NET - if NET == 'gefs': - logger.info(f"Collecting GEFS archive variables for cycle {self.task_config.current_cycle}") - arcdir_result = self.gefs_arcdir() - elif NET == 'gcafs': - logger.info(f"Collecting GCAFS archive variables for cycle {self.task_config.current_cycle}") - arcdir_result = self.gcafs_arcdir() - else: # gfs, gdas, enkfgdas, enkfgfs (default) - logger.info(f"Collecting GFS/GDAS archive variables for RUN={RUN}, cycle {self.task_config.current_cycle}") - arcdir_result = self.gfs_arcdir() - - # Build complete arch_dict with all variables for configure_vrfy and Jinja2 templates - arch_dict = dict(self.task_config) - - # Add cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) - arch_dict.update(arcdir_result['cycle_vars']) + # Update resolution keys to be 3 digits if they are part of task_config + for key in ['OCNRES', 'ICERES']: + if key in self.task_config: + self.task_config[key] = f"{self.task_config[key]:03d}" - # Add COM paths (COMIN_ATMOS_ANALYSIS, COMIN_ATMOS_GRIB_*, etc.) - arch_dict.update(arcdir_result['com_paths']) + # Configuration keys to extract (if present) + config_keys = ['current_cycle', 'RUN', 'PSLOT', 'ROTDIR', 'PARMgfs', + 'ARCDIR', 'MODE', 'DO_JEDIATMENS', 'DO_FIT2OBS', 'DO_JEDIATMVAR', + 'DO_JEDISNOWDA', 'DO_AERO_ANL', 'DO_PREP_OBS_AERO', 'NET', + 'FHOUT_GFS', 'FHMAX_HF_GFS', 'FHMAX_FITS', 'FHMAX', 'FHOUT', + 'FHMAX_GFS', 'DO_GSISOILDA', 'DO_LAND_IAU'] + + # Add FHMIN_GFS only if NET does not contain 'enkf' + if 'enkf' not in self.task_config.get('NET', ''): + config_keys.append('FHMIN_GFS') + + # Extract keys if they exist in task_config + for key in config_keys: + if key in self.task_config: + general_dict[key] = self.task_config[key] + else: + logger.warning(f"Config key '{key}' not found in task_config; skipping.") - # Add file_set and mkdir_list for Jinja2 templates - arch_dict['file_set'] = arcdir_result['file_set'] - arch_dict['mkdir_list'] = arcdir_result['mkdir_list'] + # Import COM* directory and template variables + for key in self.task_config.keys(): + if key.startswith(("COM_", "COMIN_", "COMOUT_")): + general_dict[key] = self.task_config.get(key) - logger.info(f"Built arch_dict with {len(arch_dict['file_set'])} files to archive in {len(arch_dict['mkdir_list'])} directories") - logger.debug(f"arch_dict keys: {list(arch_dict.keys())}") + logger.info(f"Collected {len(general_dict)} general archive variables") + logger.debug(f"General variables: {list(general_dict.keys())}") - return arch_dict + return general_dict @logit(logger) def _get_cycle_vars(self) -> Dict[str, Any]: @@ -162,22 +198,25 @@ def _get_cycle_vars(self) -> Dict[str, Any]: - cycle_YMDH: Full cycle timestamp (YYYYMMDDHH) - cycle_YMD: Cycle date (YYYYMMDD) - head: System head designation (e.g., 'gfs.t00z.') + - VFYARC: Verification archive directory (ROTDIR/vrfyarch) """ current_cycle = self.task_config.current_cycle - cycle_HH = current_cycle.strftime("%H") cycle_YMDH = to_YMDH(current_cycle) cycle_YMD = to_YMD(current_cycle) # Build head string (e.g., 'gfs.t00z.') - RUN = self.task_config.RUN - head = f"{RUN}.t{cycle_HH}z." + head = f"{self.task_config.RUN}.t{cycle_HH}z." + + # Archive directory (used by all systems) + VFYARC = os.path.join(self.task_config.ROTDIR, "vrfyarch") return { 'cycle_HH': cycle_HH, 'cycle_YMDH': cycle_YMDH, 'cycle_YMD': cycle_YMD, - 'head': head + 'head': head, + 'VFYARC': VFYARC } @logit(logger) @@ -186,6 +225,7 @@ def _get_template_dict(self) -> Dict[str, str]: This method builds the base dictionary used for template variable substitution. For GEFS, it includes MEMDIR: 'ensstat' to support ensemble statistics paths. + All values default to empty string if not found. Returns ------- @@ -209,685 +249,114 @@ def _get_template_dict(self) -> Dict[str, str]: """ cycle_vars = self._get_cycle_vars() - # Base template substitution dictionary + # Base template substitution dictionary with empty string defaults base_dict = { - 'ROTDIR': self.task_config.ROTDIR, - 'RUN': self.task_config.RUN, - 'YMD': cycle_vars['cycle_YMD'], - 'HH': cycle_vars['cycle_HH'], - 'PDY': cycle_vars['cycle_YMD'], - 'cyc': cycle_vars['cycle_HH'] + 'ROTDIR': self.task_config.get('ROTDIR', ''), + 'RUN': self.task_config.get('RUN', ''), + 'YMD': cycle_vars.get('cycle_YMD', ''), + 'HH': cycle_vars.get('cycle_HH', ''), + 'PDY': cycle_vars.get('cycle_YMD', ''), + 'cyc': cycle_vars.get('cycle_HH', '') } # GEFS-specific: Add MEMDIR for ensemble statistics # Corresponds to YAML: '${MEMDIR}': 'ensstat' - if 'gefs' in self.task_config.RUN.lower(): + if 'gefs' in self.task_config.get('RUN', '').lower(): base_dict['MEMDIR'] = 'ensstat' return base_dict - @logit(logger) - def _calculate_com_paths(self, base_dict: Dict[str, str]) -> Dict[str, str]: - """Calculate all COM paths used across arcdir YAMLs. - - This method generates all ROTDIR-based COM paths with logic for: - - Multiple grids (0p25, 0p50, 1p00) for GRIB files - - GEFS ensemble statistics (COMIN_ATMOS_ENSSTAT_1p00) - - Conditional paths based on RUN, MODE, DO_* flags - - All paths used by gfs_arcdir, gefs_arcdir, gcafs_arcdir YAMLs + def _get_com_template_specs(self) -> list: + """Collect COM template specifications. - Parameters - ---------- - base_dict : Dict[str, str] - Base template substitution dictionary from _get_template_dict() + This method defines which COM variables need to be generated from which + templates, along with any additional template variables required. Returns ------- - Dict[str, str] - Dictionary mapping COM variable names to resolved paths. - Examples: - - COMIN_ATMOS_ANALYSIS: /path/to/analysis - - COMIN_ATMOS_GRIB_0p25: /path/to/grib/0p25 - - COMIN_ATMOS_GRIB_0p50: /path/to/grib/0p50 - - COMIN_ATMOS_GRIB_1p00: /path/to/grib/1p00 - - COMIN_ATMOS_ENSSTAT_1p00: /path/to/ensstat (GEFS only) + list of tuples + Each tuple contains (com_key, template_key, extra_vars): + - com_key: Output variable name (e.g., 'COMIN_ATMOS_ANALYSIS') + - template_key: Template key in task_config (e.g., 'COM_ATMOS_ANALYSIS_TMPL') + - extra_vars: Dict of additional template variables (e.g., {'GRID': '0p25'}) + Empty dict {} if no additional variables needed """ - com_paths = {} - - # Helper function to get template variables with empty string default - def get_with_default(key): - """Return value from base_dict, or empty string if key not found.""" - return base_dict.get(key, '') + template_specs = [] - # Common paths (always needed) + # Common paths (no extra variables needed) common_templates = [ - ('COMIN_ATMOS_ANALYSIS', 'COM_ATMOS_ANALYSIS_TMPL'), - ('COMIN_ATMOS_GENESIS', 'COM_ATMOS_GENESIS_TMPL'), - ('COMIN_ATMOS_HISTORY', 'COM_ATMOS_HISTORY_TMPL'), - ('COMIN_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL'), - ('COMIN_CHEM_ANALYSIS', 'COM_CHEM_ANALYSIS_TMPL'), - ('COMIN_SNOW_ANALYSIS', 'COM_SNOW_ANALYSIS_TMPL'), - ('COMIN_OBS', 'COM_OBS_TMPL'), - ('COMOUT_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL'), + ('COMIN_ATMOS_ANALYSIS', 'COM_ATMOS_ANALYSIS_TMPL', {}), + ('COMIN_ATMOS_GENESIS', 'COM_ATMOS_GENESIS_TMPL', {}), + ('COMIN_ATMOS_HISTORY', 'COM_ATMOS_HISTORY_TMPL', {}), + ('COMIN_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), + ('COMIN_CHEM_ANALYSIS', 'COM_CHEM_ANALYSIS_TMPL', {}), + ('COMIN_SNOW_ANALYSIS', 'COM_SNOW_ANALYSIS_TMPL', {}), + ('COMIN_OBS', 'COM_OBS_TMPL', {}), + ('COMOUT_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), ] + template_specs.extend(common_templates) - for com_key, template_key in common_templates: - template = self.task_config.get(template_key, '') - if template: - com_paths[com_key] = Template.substitute_string( - template, TemplateConstants.DOLLAR_CURLY_BRACE, get_with_default) - else: - logger.warning(f"Template {template_key} not found for {com_key}") - com_paths[com_key] = '' - - # Grid-specific paths (loop over grids: 0p25, 0p50, 1p00) + # Grid-specific paths for grid in ["0p25", "0p50", "1p00"]: - grid_dict = base_dict.copy() - grid_dict['GRID'] = grid - - # Helper function for grid_dict with empty string default - def get_grid_with_default(key): - """Return value from grid_dict, or empty string if key not found.""" - return grid_dict.get(key, '') - - template = self.task_config.get('COM_ATMOS_GRIB_GRID_TMPL', '') - if template: - com_key = f"COMIN_ATMOS_GRIB_{grid}" - com_paths[com_key] = Template.substitute_string( - template, TemplateConstants.DOLLAR_CURLY_BRACE, get_grid_with_default) - else: - logger.warning(f"COM_ATMOS_GRIB_GRID_TMPL not found for grid {grid}") + com_key = f"COMIN_ATMOS_GRIB_{grid}" + template_specs.append((com_key, 'COM_ATMOS_GRIB_GRID_TMPL', {'GRID': grid})) # GEFS-specific: Ensemble statistics path - # Corresponds to YAML: COMIN_ATMOS_ENSSTAT_1p00 with MEMDIR='ensstat' if 'gefs' in self.task_config.RUN.lower(): - ensstat_dict = base_dict.copy() - ensstat_dict['GRID'] = '1p00' - # MEMDIR is already in base_dict for GEFS (added by _get_template_dict) - - # Helper function for ensstat_dict with empty string default - def get_ensstat_with_default(key): - """Return value from ensstat_dict, or empty string if key not found.""" - return ensstat_dict.get(key, '') - - template = self.task_config.get('COM_ATMOS_GRIB_GRID_TMPL', '') - if template: - com_paths['COMIN_ATMOS_ENSSTAT_1p00'] = Template.substitute_string( - template, TemplateConstants.DOLLAR_CURLY_BRACE, get_ensstat_with_default) - else: - logger.warning("COM_ATMOS_GRIB_GRID_TMPL not found for COMIN_ATMOS_ENSSTAT_1p00") + template_specs.append(('COMIN_ATMOS_ENSSTAT_1p00', 'COM_ATMOS_GRIB_GRID_TMPL', {'GRID': '1p00'})) # EnKF-specific: Analysis ensemble statistics path - # Uses COM_ATMOS_ANALYSIS_TMPL with MEMDIR='ensstat' for enkfgdas/enkfgfs if 'enkf' in self.task_config.RUN.lower(): - ensstat_anl_dict = base_dict.copy() - ensstat_anl_dict['MEMDIR'] = 'ensstat' - - # Helper function for ensstat_anl_dict with empty string default - def get_ensstat_anl_with_default(key): - """Return value from ensstat_anl_dict, or empty string if key not found.""" - return ensstat_anl_dict.get(key, '') - - template = self.task_config.get('COM_ATMOS_ANALYSIS_TMPL', '') - if template: - com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT'] = Template.substitute_string( - template, TemplateConstants.DOLLAR_CURLY_BRACE, get_ensstat_anl_with_default) - else: - logger.warning("COM_ATMOS_ANALYSIS_TMPL not found for COMIN_ATMOS_ANALYSIS_ENSSTAT") - - return com_paths - - def _build_gfs_list(self, cycle_vars: Dict[str, Any], com_paths: Dict[str, str], - arcdir: str) -> Dict[str, list]: - """Build mkdir list and file set for GFS archiving. - - This method contains nested helper functions to build the directory list - and file set for GFS archiving. - - Parameters - ---------- - cycle_vars : Dict[str, Any] - Cycle-specific variables - com_paths : Dict[str, str] - COM directory paths - arcdir : str - Archive directory path - - Returns - ------- - Dict[str, list] - Dictionary containing 'mkdir_list' and 'file_set' - """ - - def build_mkdir_list() -> list: - """Build list of directories to create for GFS archiving.""" - mkdir_list = [arcdir] - - # Add fit2obs directory if enabled - RUN = self.task_config.RUN - if RUN == "gfs" and self.task_config.get("DO_FIT2OBS", False): - vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") - cycle_YMD = cycle_vars['cycle_YMD'] - cycle_HH = cycle_vars['cycle_HH'] - fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) - mkdir_list.append(fit2obs_dir) - - return mkdir_list - - def build_file_set() -> list: - """Build list of files to archive for GFS.""" - file_set = [] - - head = cycle_vars['head'] - cycle_YMDH = cycle_vars['cycle_YMDH'] - cycle_YMD = cycle_vars['cycle_YMD'] - cycle_HH = cycle_vars['cycle_HH'] - - RUN = self.task_config.RUN - MODE = self.task_config.get('MODE', 'cycled') - CDUMP = self.task_config.get('CDUMP', RUN) - - # Deterministic files (not enkf) - if "enkf" not in RUN: - # Common deterministic files - Cyclone tracking - det_files = [] - if com_paths.get('COMIN_ATMOS_TRACK'): - # TC tracker files (only if they exist) - atcfunix_file = f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunix.{RUN}.{cycle_YMDH}" - if os.path.exists(atcfunix_file): - det_files.extend([ - [atcfunix_file, f"{arcdir}/atcfunix.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_TRACK']}/atcfunixp.{RUN}.{cycle_YMDH}", - f"{arcdir}/atcfunixp.{RUN}.{cycle_YMDH}"], - ]) - - # Basin tracking data - for basin in ["epac", "natl"]: - basin_dir = os.path.join(com_paths['COMIN_ATMOS_TRACK'], basin) - if os.path.exists(basin_dir): - det_files.append([basin_dir, f"{arcdir}/{basin}"]) - - file_set.extend(det_files) - - # Analysis files (cycled mode) - if MODE == "cycled": - det_anl_files = [] - - # Analysis grib file - if com_paths.get('COMIN_ATMOS_GRIB_1p00'): - det_anl_files.append([ - f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.analysis.grib2", - f"{arcdir}/pgbanl.{RUN}.{cycle_YMDH}.grib2" - ]) - - if com_paths.get('COMIN_ATMOS_ANALYSIS'): - # GSI or JEDI atmospheric statistics - if self.task_config.get('DO_JEDIATMVAR', False): - det_anl_files.append([ - f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}stat.atm.tar", - f"{arcdir}/atmstat.{RUN}.{cycle_YMDH}" - ]) - else: - det_anl_files.append([ - f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}gsistat.txt", - f"{arcdir}/gsistat.{RUN}.{cycle_YMDH}" - ]) - - # Snow DA statistics - if self.task_config.get('DO_JEDISNOWDA', False) and com_paths.get('COMIN_SNOW_ANALYSIS'): - det_anl_files.append([ - f"{com_paths['COMIN_SNOW_ANALYSIS']}/{head}snow_analysis.ioda_hofx.tar", - f"{arcdir}/snowstat.{RUN}.{cycle_YMDH}.tar" - ]) - - # Aerosol DA statistics - if self.task_config.get('DO_AERO_ANL', False) and com_paths.get('COMIN_CHEM_ANALYSIS'): - det_anl_files.append([ - f"{com_paths['COMIN_CHEM_ANALYSIS']}/{head}aerostat.tgz", - f"{arcdir}/aerostat.{RUN}.{cycle_YMDH}.tgz" - ]) - - # Aerosol observation files - if self.task_config.get('DO_PREP_OBS_AERO', False) and com_paths.get('COMIN_OBS'): - det_anl_files.extend([ - [f"{com_paths['COMIN_OBS']}/{head}aeroobs", - f"{arcdir}/aeroobs.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_OBS']}/{head}aeroawobs", - f"{arcdir}/aeroawobs.{RUN}.{cycle_YMDH}"], - ]) - - file_set.extend(det_anl_files) - - # GFS-specific files - if RUN == "gfs": - gfs_files = [] - - # GRIB2 forecast files (only 1p00 grid for archive) - if com_paths.get('COMIN_ATMOS_GRIB_1p00'): - FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) - FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) - - for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS): - fhr_str = str(fhr).zfill(3) - fhr_archive = str(fhr).zfill(2) # Archive uses 2-digit format - gfs_files.append([ - f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.f{fhr_str}.grib2", - f"{arcdir}/pgbf{fhr_archive}.{RUN}.{cycle_YMDH}.grib2" - ]) - - # Cyclone genesis data (only if files exist) - if com_paths.get('COMIN_ATMOS_GENESIS'): - genesis_file = f"{com_paths['COMIN_ATMOS_GENESIS']}/storms.gfso.atcf_gen.{cycle_YMDH}" - if os.path.exists(genesis_file): - gfs_files.extend([ - [genesis_file, f"{arcdir}/storms.gfso.atcf_gen.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_GENESIS']}/storms.gfso.atcf_gen.altg.{cycle_YMDH}", - f"{arcdir}/storms.gfso.atcf_gen.altg.{cycle_YMDH}"], - ]) - - trak_file = f"{com_paths['COMIN_ATMOS_GENESIS']}/trak.gfso.atcfunix.{cycle_YMDH}" - if os.path.exists(trak_file): - gfs_files.extend([ - [trak_file, f"{arcdir}/trak.gfso.atcfunix.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_GENESIS']}/trak.gfso.atcfunix.altg.{cycle_YMDH}", - f"{arcdir}/trak.gfso.atcfunix.altg.{cycle_YMDH}"], - ]) - - # Fit2Obs files (atm and sfc forecast history files) - if self.task_config.get("DO_FIT2OBS", False): - if com_paths.get('COMIN_ATMOS_HISTORY'): - vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") - fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) - - FHMAX_FITS = self.task_config.get('FHMAX_FITS', 180) - for fhr in range(0, FHMAX_FITS + 1, 6): - fhr_str = str(fhr).zfill(3) - sfcfile = f"{head}sfc.f{fhr_str}.nc" - sigfile = f"{head}atm.f{fhr_str}.nc" - gfs_files.extend([ - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{sfcfile}", - f"{fit2obs_dir}/{sfcfile}"], - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{sigfile}", - f"{fit2obs_dir}/{sigfile}"], - ]) - else: - logger.warning("DO_FIT2OBS enabled but COMIN_ATMOS_HISTORY path not available") - - file_set.extend(gfs_files) - - # GDAS-specific files - elif RUN == "gdas": - gdas_files = [] - - # GRIB2 forecast files - if com_paths.get('COMIN_ATMOS_GRIB_1p00'): - FHMAX = self.task_config.get('FHMAX', 9) - FHOUT = self.task_config.get('FHOUT', 3) - - for fhr in range(0, FHMAX + 1, FHOUT): - fhr_str = str(fhr).zfill(3) - fhr_archive = str(fhr).zfill(2) # Archive uses 2-digit format - gdas_files.append([ - f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.f{fhr_str}.grib2", - f"{arcdir}/pgbf{fhr_archive}.{RUN}.{cycle_YMDH}.grib2" - ]) - - # Radiance bias correction files - if com_paths.get('COMIN_ATMOS_ANALYSIS'): - gdas_files.extend([ - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias.txt", - f"{arcdir}/abias.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_pc.txt", - f"{arcdir}/abias_pc.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_air.txt", - f"{arcdir}/abias_air.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}abias_int.txt", - f"{arcdir}/abias_int.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS']}/{head}analysis.dtf.a006.nc", - f"{arcdir}/dtfanl.{RUN}.{cycle_YMDH}.nc"], - ]) - - file_set.extend(gdas_files) - - else: # Ensemble files (enkfgdas, enkfgfs) - only statistics archived - enkf_files = [] - - # EnKF ensemble statistics (from ensstat directory) - if com_paths.get('COMIN_ATMOS_ANALYSIS_ENSSTAT'): - if self.task_config.get('DO_JEDIATMENS', False): - # JEDI ensemble statistics - enkf_files.append([ - f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}stat.atm.tar", - f"{arcdir}/atmensstat.{RUN}.{cycle_YMDH}" - ]) - else: - # GSI EnKF statistics - enkf_files.extend([ - [f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}enkfstat.txt", - f"{arcdir}/enkfstat.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}gsistat.ensmean.txt", - f"{arcdir}/gsistat.{RUN}.{cycle_YMDH}.ensmean"], - ]) - file_set.extend(enkf_files) - else: - logger.warning("COMIN_ATMOS_ANALYSIS_ENSSTAT path not available for EnKF, skipping ensemble statistics") - - return file_set - - # Call nested helper functions - return { - 'mkdir_list': build_mkdir_list(), - 'file_set': build_file_set() - } - - @logit(logger) - def gfs_arcdir(self) -> Dict[str, Any]: - """Build complete file set for GFS archiving (gfs_arcdir.yaml.j2). - - This method corresponds to gfs_arcdir.yaml.j2 and builds the complete - file set with all logic, loops, and conditionals for GFS archiving. - - Returns - ------- - Dict[str, Any] - Dictionary containing: - - cycle_vars: Cycle-specific variables - - com_paths: All COM paths - - file_set: List of [source, destination] file pairs - - mkdir_list: List of directories to create - """ - cycle_vars = self._get_cycle_vars() - base_dict = self._get_template_dict() - com_paths = self._calculate_com_paths(base_dict) - - arcdir = self.task_config.ARCDIR - - # Build mkdir list and file set using helper method with nested functions - lists = self._build_gfs_list(cycle_vars, com_paths, arcdir) - - return { - 'cycle_vars': cycle_vars, - 'com_paths': com_paths, - 'file_set': lists['file_set'], - 'mkdir_list': lists['mkdir_list'] - } - - def _build_gefs_list(self, cycle_vars: Dict[str, Any], com_paths: Dict[str, str]) -> Dict[str, list]: - """Build mkdir list and file set for GEFS archiving. - - This method contains nested helper functions to build the directory list - and file set for GEFS archiving. - - Parameters - ---------- - cycle_vars : Dict[str, Any] - Cycle-specific variables - com_paths : Dict[str, str] - COM directory paths - - Returns - ------- - Dict[str, list] - Dictionary containing 'mkdir_list' and 'file_set' - """ - gefs_arch = os.path.join(self.task_config.ROTDIR, "gefsarch") - - def build_mkdir_list() -> list: - """Build list of directories to create for GEFS archiving.""" - return [gefs_arch] - - def build_file_set() -> list: - """Build list of files to archive for GEFS.""" - file_set = [] - head = cycle_vars['head'] - - # GEFS ensemble statistics files - ensstat_path = com_paths.get('COMIN_ATMOS_ENSSTAT_1p00', '') - - if ensstat_path and os.path.exists(ensstat_path): - FHMIN_GFS = self.task_config.get('FHMIN_GFS', 0) - FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) - FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) - - for fhr in range(FHMIN_GFS, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS): - fhr_str = str(fhr).zfill(3) - source_file = f"{ensstat_path}/{head}mean.pres_.1p00.f{fhr_str}.grib2" - file_set.append([source_file, gefs_arch]) - else: - if not ensstat_path: - logger.warning("COMIN_ATMOS_ENSSTAT_1p00 not found in com_paths") - else: - logger.warning(f"COMIN_ATMOS_ENSSTAT_1p00 path does not exist: {ensstat_path}") - - return file_set - - # Call nested helper functions - return { - 'mkdir_list': build_mkdir_list(), - 'file_set': build_file_set() - } - - @logit(logger) - def gefs_arcdir(self) -> Dict[str, Any]: - """Build complete file set for GEFS archiving (gefs_arcdir.yaml.j2). - - This method corresponds to gefs_arcdir.yaml.j2 and builds the complete - file set for GEFS ensemble forecast archiving. - - Returns - ------- - Dict[str, Any] - Dictionary containing: - - cycle_vars: Cycle-specific variables - - com_paths: All COM paths (includes COMIN_ATMOS_ENSSTAT_1p00) - - file_set: List of [source, destination] file pairs - - mkdir_list: List of directories to create - """ - cycle_vars = self._get_cycle_vars() - base_dict = self._get_template_dict() - com_paths = self._calculate_com_paths(base_dict) + template_specs.append(('COMIN_ATMOS_ANALYSIS_ENSSTAT', 'COM_ATMOS_ANALYSIS_TMPL', {'MEMDIR': 'ensstat'})) - # Build mkdir list and file set using helper method with nested functions - lists = self._build_gefs_list(cycle_vars, com_paths) + return template_specs - return { - 'cycle_vars': cycle_vars, - 'com_paths': com_paths, - 'file_set': lists['file_set'], - 'mkdir_list': lists['mkdir_list'] - } + def _construct_com_paths(self, base_dict: Dict[str, str], template_specs: list) -> Dict[str, str]: + """Construct COM paths from template specifications. - def _build_gcafs_list(self, cycle_vars: Dict[str, Any], com_paths: Dict[str, str], - arcdir: str) -> Dict[str, list]: - """Build mkdir list and file set for GCAFS archiving. - - This method contains nested helper functions to build the directory list - and file set for GCAFS archiving. GCAFS is simpler than GFS - mainly - forecast files and optional aerosol files. + This method takes template specifications and constructs the actual paths + by substituting template variables using base_dict updated with extra_vars. Parameters ---------- - cycle_vars : Dict[str, Any] - Cycle-specific variables - com_paths : Dict[str, str] - COM directory paths - arcdir : str - Archive directory path + base_dict : Dict[str, str] + Base template substitution dictionary from _get_template_dict() + template_specs : list of tuples + List from _get_com_template_specs() containing specifications Returns ------- - Dict[str, list] - Dictionary containing 'mkdir_list' and 'file_set' + Dict[str, str] + Dictionary mapping COM variable names to resolved paths """ + com_paths = {} - def build_mkdir_list() -> list: - """Build list of directories to create for GCAFS archiving.""" - mkdir_list = [arcdir] - - # Add fit2obs directory if enabled - RUN = self.task_config.RUN - if self.task_config.get("DO_FIT2OBS", False): - vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") - cycle_YMD = cycle_vars['cycle_YMD'] - cycle_HH = cycle_vars['cycle_HH'] - fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) - mkdir_list.append(fit2obs_dir) - - return mkdir_list - - def build_file_set() -> list: - """Build list of files to archive for GCAFS.""" - file_set = [] - - head = cycle_vars['head'] - cycle_YMDH = cycle_vars['cycle_YMDH'] - cycle_YMD = cycle_vars['cycle_YMD'] - cycle_HH = cycle_vars['cycle_HH'] - - RUN = self.task_config.RUN - MODE = self.task_config.get('MODE', 'cycled') - - # Deterministic files (not enkf) - if "enkf" not in RUN: - # Analysis files (cycled mode) - only aerosol for GCAFS - if MODE == "cycled": - det_anl_files = [] - - # Aerosol DA statistics - if self.task_config.get('DO_AERO_ANL', False) and com_paths.get('COMIN_CHEM_ANALYSIS'): - det_anl_files.append([ - f"{com_paths['COMIN_CHEM_ANALYSIS']}/{head}aerostat.tgz", - f"{arcdir}/aerostat.{RUN}.{cycle_YMDH}.tgz" - ]) - - # Aerosol observation files - if self.task_config.get('DO_PREP_OBS_AERO', False) and com_paths.get('COMIN_OBS'): - det_anl_files.extend([ - [f"{com_paths['COMIN_OBS']}/{head}aeroobs", - f"{arcdir}/aeroobs.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_OBS']}/{head}aeroawobs", - f"{arcdir}/aeroawobs.{RUN}.{cycle_YMDH}"], - ]) - - file_set.extend(det_anl_files) - - # GCAFS-specific forecast files - if RUN == "gcafs": - gcafs_files = [] - - # GRIB2 forecast files (only 1p00 grid for archive) - if com_paths.get('COMIN_ATMOS_GRIB_1p00'): - FHMAX_GFS = self.task_config.get('FHMAX_GFS', 384) - FHOUT_GFS = self.task_config.get('FHOUT_GFS', 3) - - for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS): - fhr_str = str(fhr).zfill(3) - fhr_archive = str(fhr).zfill(2) # Archive uses 2-digit format - gcafs_files.append([ - f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.f{fhr_str}.grib2", - f"{arcdir}/pgbf{fhr_archive}.{RUN}.{cycle_YMDH}.grib2" - ]) - - # Fit2Obs files (atm and sfc forecast history files) - if self.task_config.get("DO_FIT2OBS", False): - if com_paths.get('COMIN_ATMOS_HISTORY'): - vfyarc = os.path.join(self.task_config.ROTDIR, "vrfyarch") - fit2obs_dir = os.path.join(vfyarc, f"{RUN}.{cycle_YMD}", cycle_HH) - - FHMAX_FITS = self.task_config.get('FHMAX_FITS', 180) - for fhr in range(0, FHMAX_FITS + 1, 6): - fhr_str = str(fhr).zfill(3) - sfcfile = f"{head}sfc.f{fhr_str}.nc" - sigfile = f"{head}atm.f{fhr_str}.nc" - gcafs_files.extend([ - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{sfcfile}", - f"{fit2obs_dir}/{sfcfile}"], - [f"{com_paths['COMIN_ATMOS_HISTORY']}/{sigfile}", - f"{fit2obs_dir}/{sigfile}"], - ]) - else: - logger.warning("DO_FIT2OBS enabled but COMIN_ATMOS_HISTORY path not available") - - file_set.extend(gcafs_files) - - # GCDAS-specific forecast files - elif RUN == "gcdas": - gcdas_files = [] - - # GRIB2 forecast files - if com_paths.get('COMIN_ATMOS_GRIB_1p00'): - FHMAX = self.task_config.get('FHMAX', 9) - FHOUT = self.task_config.get('FHOUT', 3) - - for fhr in range(0, FHMAX + 1, FHOUT): - fhr_str = str(fhr).zfill(3) - fhr_archive = str(fhr).zfill(2) # Archive uses 2-digit format - gcdas_files.append([ - f"{com_paths['COMIN_ATMOS_GRIB_1p00']}/{head}pres_a.1p00.f{fhr_str}.grib2", - f"{arcdir}/pgbf{fhr_archive}.{RUN}.{cycle_YMDH}.grib2" - ]) - - file_set.extend(gcdas_files) - - else: # Ensemble files (enkfgcafs, enkfgcdas) - only statistics archived - enkf_files = [] - - # EnKF ensemble statistics (from ensstat directory) - if com_paths.get('COMIN_ATMOS_ANALYSIS_ENSSTAT'): - if self.task_config.get('DO_JEDIATMENS', False): - # JEDI ensemble statistics - NOTE: GCAFS uses different filename - enkf_files.append([ - f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}atmensstat", - f"{arcdir}/atmensstat.{RUN}.{cycle_YMDH}" - ]) - else: - # GSI EnKF statistics - enkf_files.extend([ - [f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}enkfstat.txt", - f"{arcdir}/enkfstat.{RUN}.{cycle_YMDH}"], - [f"{com_paths['COMIN_ATMOS_ANALYSIS_ENSSTAT']}/{head}gsistat.ensmean.txt", - f"{arcdir}/gsistat.{RUN}.{cycle_YMDH}.ensmean"], - ]) - file_set.extend(enkf_files) - else: - logger.warning("COMIN_ATMOS_ANALYSIS_ENSSTAT path not available for EnKF, skipping ensemble statistics") - - return file_set - - # Call nested helper functions - return { - 'mkdir_list': build_mkdir_list(), - 'file_set': build_file_set() - } - - @logit(logger) - def gcafs_arcdir(self) -> Dict[str, Any]: - """Build complete file set for GCAFS archiving (gcafs_arcdir.yaml.j2). - - This method corresponds to gcafs_arcdir.yaml.j2. GCAFS is simpler than GFS, - archiving mainly forecast files and optional aerosol files. + for com_key, template_key, extra_vars in template_specs: + # Use base_dict directly, updated with any extra variables + tmpl_dict = {**base_dict, **extra_vars} - Returns - ------- - Dict[str, Any] - Dictionary containing: - - cycle_vars: Cycle-specific variables - - com_paths: All COM paths - - file_set: List of [source, destination] file pairs - - mkdir_list: List of directories to create - """ - cycle_vars = self._get_cycle_vars() - base_dict = self._get_template_dict() - com_paths = self._calculate_com_paths(base_dict) + template = self.task_config.get(template_key, '') + com_paths[com_key] = Template.substitute_string( + template, TemplateConstants.DOLLAR_CURLY_BRACE, + lambda key: tmpl_dict.get(key, '')) if template else '' - arcdir = self.task_config.ARCDIR + return com_paths - # Build mkdir list and file set using helper method with nested functions - lists = self._build_gcafs_list(cycle_vars, com_paths, arcdir) - return { - 'cycle_vars': cycle_vars, - 'com_paths': com_paths, - 'file_set': lists['file_set'], - 'mkdir_list': lists['mkdir_list'] - } +# ============================================================================ +# FILE SET GENERATION NOW HANDLED BY YAML TEMPLATES +# ============================================================================ +# The following methods have been removed and their logic moved to YAML templates: +# - _build_gfs_list() +# - gfs_arcdir() +# - _build_gefs_list() +# - gefs_arcdir() +# - _build_gcafs_list() +# - gcafs_arcdir() +# +# The YAML templates (parm/archive/*_arcdir.yaml.j2) now contain all file set +# generation logic (loops, conditionals, file path construction). +# +# The Python code only provides VARIABLES (cycle vars, COM paths, config vars) +# that the YAML templates need via get_all_yaml_vars(). +# ============================================================================ From b727e5257dbf9021d887bfbee4d416cfac686d88 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Thu, 4 Dec 2025 01:34:33 +0000 Subject: [PATCH 07/29] update yamls and arch.py --- parm/archive/gfs_arcdir.yaml.j2 | 12 +++++- ush/python/pygfs/task/archive_vars.py | 59 +++++++++++++++------------ 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index 82dcde907a7..19103e3319a 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -98,6 +98,16 @@ {% do gdas_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} {% endfor %} + {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias.txt", + ARCDIR ~ "/abias." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_pc.txt", + ARCDIR ~ "/abias_pc." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_air.txt", + ARCDIR ~ "/abias_air." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_int.txt", + ARCDIR ~ "/abias_int." ~ RUN ~ "." ~ cycle_YMDH ]) %} + {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "analysis.dtf.a006.nc", + ARCDIR ~ "/dtfanl." ~ RUN ~ "." ~ cycle_YMDH ~ ".nc"]) %} # Now append the necessary file pairs to file_set # Common deterministic files @@ -145,7 +155,7 @@ mkdir: - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" {% endif %} -copy: +copy_opt: {% for source_dest_pair in file_set %} - {{ source_dest_pair }} {% endfor %} diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index c64d3abfb94..0a9f81e997f 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -281,33 +281,38 @@ def _get_com_template_specs(self) -> list: - extra_vars: Dict of additional template variables (e.g., {'GRID': '0p25'}) Empty dict {} if no additional variables needed """ - template_specs = [] - - # Common paths (no extra variables needed) - common_templates = [ - ('COMIN_ATMOS_ANALYSIS', 'COM_ATMOS_ANALYSIS_TMPL', {}), - ('COMIN_ATMOS_GENESIS', 'COM_ATMOS_GENESIS_TMPL', {}), - ('COMIN_ATMOS_HISTORY', 'COM_ATMOS_HISTORY_TMPL', {}), - ('COMIN_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), - ('COMIN_CHEM_ANALYSIS', 'COM_CHEM_ANALYSIS_TMPL', {}), - ('COMIN_SNOW_ANALYSIS', 'COM_SNOW_ANALYSIS_TMPL', {}), - ('COMIN_OBS', 'COM_OBS_TMPL', {}), - ('COMOUT_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), - ] - template_specs.extend(common_templates) - - # Grid-specific paths - for grid in ["0p25", "0p50", "1p00"]: - com_key = f"COMIN_ATMOS_GRIB_{grid}" - template_specs.append((com_key, 'COM_ATMOS_GRIB_GRID_TMPL', {'GRID': grid})) - - # GEFS-specific: Ensemble statistics path - if 'gefs' in self.task_config.RUN.lower(): - template_specs.append(('COMIN_ATMOS_ENSSTAT_1p00', 'COM_ATMOS_GRIB_GRID_TMPL', {'GRID': '1p00'})) - - # EnKF-specific: Analysis ensemble statistics path - if 'enkf' in self.task_config.RUN.lower(): - template_specs.append(('COMIN_ATMOS_ANALYSIS_ENSSTAT', 'COM_ATMOS_ANALYSIS_TMPL', {'MEMDIR': 'ensstat'})) + # EnKF-specific: Only these 3 ENSSTAT paths with MEMDIR='ensstat' + if 'enkf' in self.task_config.RUN: + template_specs = [ + ('COMIN_ATMOS_ANALYSIS_ENSSTAT', 'COM_ATMOS_ANALYSIS_TMPL', {'MEMDIR': 'ensstat'}), + ('COMIN_ATMOS_HISTORY_ENSSTAT', 'COM_ATMOS_HISTORY_TMPL', {'MEMDIR': 'ensstat'}), + ('COMIN_SNOW_ANALYSIS_ENSSTAT', 'COM_SNOW_ANALYSIS_TMPL', {'MEMDIR': 'ensstat'}) + ] + else: + # All other systems (GFS, GEFS, GCAFS) get common + grid-specific paths + template_specs = [] + + # Common paths (no extra variables needed) + common_templates = [ + ('COMIN_ATMOS_ANALYSIS', 'COM_ATMOS_ANALYSIS_TMPL', {}), + ('COMIN_ATMOS_GENESIS', 'COM_ATMOS_GENESIS_TMPL', {}), + ('COMIN_ATMOS_HISTORY', 'COM_ATMOS_HISTORY_TMPL', {}), + ('COMIN_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), + ('COMIN_CHEM_ANALYSIS', 'COM_CHEM_ANALYSIS_TMPL', {}), + ('COMIN_SNOW_ANALYSIS', 'COM_SNOW_ANALYSIS_TMPL', {}), + ('COMIN_OBS', 'COM_OBS_TMPL', {}), + ('COMOUT_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), + ] + template_specs.extend(common_templates) + + # Grid-specific paths + for grid in ["0p25", "0p50", "1p00"]: + com_key = f"COMIN_ATMOS_GRIB_{grid}" + template_specs.append((com_key, 'COM_ATMOS_GRIB_GRID_TMPL', {'GRID': grid})) + + # GEFS-specific: Ensemble statistics path + if 'gefs' in self.task_config.RUN: + template_specs.append(('COMIN_ATMOS_ENSSTAT_1p00', 'COM_ATMOS_GRIB_GRID_TMPL', {'GRID': '1p00'})) return template_specs From bade5e5e0028e94fb859ff87ae04938f99076601 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Thu, 4 Dec 2025 01:38:02 +0000 Subject: [PATCH 08/29] update ush/python/pygfs/task/archive_vars.py --- ush/python/pygfs/task/archive_vars.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index 0a9f81e997f..bc3a7b1cee9 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -290,10 +290,7 @@ def _get_com_template_specs(self) -> list: ] else: # All other systems (GFS, GEFS, GCAFS) get common + grid-specific paths - template_specs = [] - - # Common paths (no extra variables needed) - common_templates = [ + template_specs = [ ('COMIN_ATMOS_ANALYSIS', 'COM_ATMOS_ANALYSIS_TMPL', {}), ('COMIN_ATMOS_GENESIS', 'COM_ATMOS_GENESIS_TMPL', {}), ('COMIN_ATMOS_HISTORY', 'COM_ATMOS_HISTORY_TMPL', {}), @@ -303,8 +300,6 @@ def _get_com_template_specs(self) -> list: ('COMIN_OBS', 'COM_OBS_TMPL', {}), ('COMOUT_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), ] - template_specs.extend(common_templates) - # Grid-specific paths for grid in ["0p25", "0p50", "1p00"]: com_key = f"COMIN_ATMOS_GRIB_{grid}" From 8b2f5dea94c23719d237afc8c8d562ca0ef5c074 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Thu, 4 Dec 2025 16:54:29 +0000 Subject: [PATCH 09/29] add filehandler logic --- parm/archive/gcafs_arcdir.yaml.j2 | 120 +++++++---------- parm/archive/gefs_arcdir.yaml.j2 | 25 ++-- parm/archive/gfs_arcdir.yaml.j2 | 205 +++++++++++------------------- 3 files changed, 125 insertions(+), 225 deletions(-) diff --git a/parm/archive/gcafs_arcdir.yaml.j2 b/parm/archive/gcafs_arcdir.yaml.j2 index 5cb27e4d830..77f90bd2774 100644 --- a/parm/archive/gcafs_arcdir.yaml.j2 +++ b/parm/archive/gcafs_arcdir.yaml.j2 @@ -3,102 +3,70 @@ # - VFYARC # - All COMIN_* paths -# Select data to store in the ARCDIR and VFYARC from deterministic runs -# This file set will contain all source-destination pairs to send to the FileHandler for copying -{% set file_set = [] %} - -# Deterministic files -{% if "enkf" not in RUN %} - # Common files to be added to both the gcafs and gcdas keys below - {% set det_files = [] %} - - # Deterministic analysis files (generated for cycled experiments) - {% set det_anl_files = [] %} - - {% if DO_AERO_ANL %} - {% do det_anl_files.append([COMIN_CHEM_ANALYSIS ~ "/" ~ head ~ "aerostat.tgz", - ARCDIR ~ "/aerostat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tgz"]) %} +mkdir: + - "{{ ARCDIR }}" + {% if DO_FIT2OBS == True %} + - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" {% endif %} - {% if DO_PREP_OBS_AERO == True %} - {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroobs", - ARCDIR ~ "/aeroobs." ~ RUN ~ "." ~ cycle_YMDH]) %} - {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroawobs", - ARCDIR ~ "/aeroawobs." ~ RUN ~ "." ~ cycle_YMDH]) %} - {% endif %} +{% if "enkf" not in RUN %} +# Deterministic runs (gcafs, gcdas) - # GCAFS-specific files - {% set gfs_files = [] %} +copy_req: + # GCAFS forecast files - REQUIRED + {% if RUN == "gcafs" %} {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} - {% do gfs_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", - ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} + - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", + "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] {% endfor %} - # GCAFS Fit2Obs data - {% set fit2obs_files = [] %} + # GCAFS Fit2Obs data - REQUIRED if DO_FIT2OBS is enabled + {% if DO_FIT2OBS == True %} {% for fhr in range(0, FHMAX_FITS + 1, 6) %} - {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} - {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} - {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sfcfile, - VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sfcfile ]) %} - {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sigfile, - VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sigfile ]) %} + - ["{{ COMIN_ATMOS_HISTORY }}/{{ head }}sfc.f{{ '%03d'|format(fhr) }}.nc", + "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}/{{ head }}sfc.f{{ '%03d'|format(fhr) }}.nc"] + - ["{{ COMIN_ATMOS_HISTORY }}/{{ head }}atm.f{{ '%03d'|format(fhr) }}.nc", + "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}/{{ head }}atm.f{{ '%03d'|format(fhr) }}.nc"] {% endfor %} + {% endif %} + {% endif %} - # GCDAS-specific files - {% set gdas_files = [] %} + # GCDAS forecast files - REQUIRED + {% if RUN == "gcdas" %} {% for fhr in range(0, FHMAX + 1, FHOUT) %} - {% do gdas_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", - ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} + - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", + "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] {% endfor %} + {% endif %} - # Now append the necessary file pairs to file_set - # Common deterministic files - {% set file_set = file_set + det_files %} + # Deterministic analysis files (cycled mode only) - REQUIRED {% if MODE == "cycled" %} - {% set file_set = file_set + det_anl_files %} + {% if DO_AERO_ANL %} + - ["{{ COMIN_CHEM_ANALYSIS }}/{{ head }}aerostat.tgz", + "{{ ARCDIR }}/aerostat.{{ RUN }}.{{ cycle_YMDH }}.tgz"] {% endif %} - # Run-specific deterministic files - {% if RUN == "gcafs" %} - {% set file_set = file_set + gfs_files %} - # Fit2Obs files - {% if DO_FIT2OBS == True %} - {% set file_set = file_set + fit2obs_files %} - {% endif %} - {% elif RUN == "gcdas" %} - {% set file_set = file_set + gdas_files %} + {% if DO_PREP_OBS_AERO == True %} + - ["{{ COMIN_OBS }}/{{ head }}aeroobs", + "{{ ARCDIR }}/aeroobs.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_OBS }}/{{ head }}aeroawobs", + "{{ ARCDIR }}/aeroawobs.{{ RUN }}.{{ cycle_YMDH }}"] + {% endif %} {% endif %} -{% else %} # End of deterministic files +{% else %} +# Ensemble runs (enkfgcafs, enkfgcdas) - # Ensemble analysis files - {% set enkf_files = [] %} +copy_req: + # Ensemble analysis statistics - REQUIRED {% if DO_JEDIATMENS == True %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "atmensstat", - ARCDIR ~ "/atmensstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}atmensstat", + "{{ ARCDIR }}/atmensstat.{{ RUN }}.{{ cycle_YMDH }}"] {% else %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "enkfstat.txt", - ARCDIR ~ "/enkfstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "gsistat.ensmean.txt", - ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ~ ".ensmean"]) %} + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}enkfstat.txt", + "{{ ARCDIR }}/enkfstat.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}gsistat.ensmean.txt", + "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}.ensmean"] {% endif %} - # Construct the final file set - {% set file_set = file_set + enkf_files %} - {% endif %} - - -# Actually write the yaml -mkdir: - - "{{ ARCDIR }}" - - {% if DO_FIT2OBS == True %} - - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" - {% endif %} - -copy: - {% for source_dest_pair in file_set %} - - {{ source_dest_pair }} - {% endfor %} diff --git a/parm/archive/gefs_arcdir.yaml.j2 b/parm/archive/gefs_arcdir.yaml.j2 index 1d63e2b1954..2cc1f973523 100644 --- a/parm/archive/gefs_arcdir.yaml.j2 +++ b/parm/archive/gefs_arcdir.yaml.j2 @@ -3,22 +3,15 @@ # - VFYARC (archive directory) # - COMIN_ATMOS_ENSSTAT_1p00 (calculated in Python with MEMDIR='ensstat') -{% set file_set = [] %} +# Create directories first +mkdir: + - "{{ VFYARC }}" -# Select ensstat files to copy to the arcdir -{% set ensstat_files = [] %} -{% if path_exists(COMIN_ATMOS_ENSSTAT_1p00) %} +# Define all source-destination pairs for archiving +# Use copy_req for files that MUST exist (raise error if missing) +copy_req: + # GEFS ensemble mean forecast files - REQUIRED {% for fhr in range(FHMIN_GFS, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS) %} - {% do ensstat_files.append([COMIN_ATMOS_ENSSTAT_1p00 ~ "/" ~ head ~ "mean.pres_." ~ - "1p00" ~ ".f" ~ '%03d'|format(fhr) ~ ".grib2", - VFYARC]) %} + - ["{{ COMIN_ATMOS_ENSSTAT_1p00 }}/{{ head }}mean.pres_.1p00.f{{ '%03d'|format(fhr) }}.grib2", + "{{ VFYARC }}/{{ head }}mean.pres_.1p00.f{{ '%03d'|format(fhr) }}.grib2"] {% endfor %} -{% endif %} -{% set file_set = ensstat_files %} -# Actually write the yaml -mkdir: - - "{{ VFYARC }}" -copy: - {% for source_dest_pair in file_set %} - - {{ source_dest_pair }} - {% endfor %} diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index 19103e3319a..4fbe07e1724 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -3,159 +3,98 @@ # - VFYARC, GEFS_ARCH # - All COMIN_* paths -# Select data to store in the ARCDIR and VFYARC from deterministic runs -# This file set will contain all source-destination pairs to send to the FileHandler for copying -{% set file_set = [] %} - -# Deterministic files -{% if "enkf" not in RUN %} - # Common files to be added to both the gfs and gdas keys below - {% set det_files = [] %} - # Cyclone forecasts, produced for both gdas and gfs cycles - ## Only created if tracking is on and there were systems to track - {% if path_exists(COMIN_ATMOS_TRACK ~ "/atcfunix." ~ RUN ~ "." ~ cycle_YMDH) %} - {% do det_files.append([COMIN_ATMOS_TRACK ~ "/atcfunix." ~ RUN ~ "." ~ cycle_YMDH, - ARCDIR ~"/atcfunix." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do det_files.append([COMIN_ATMOS_TRACK ~ "/atcfunixp." ~ RUN ~ "." ~ cycle_YMDH, - ARCDIR ~ "/atcfunixp." ~ RUN ~ "." ~ cycle_YMDH]) %} +# Create directories first +mkdir: + - "{{ ARCDIR }}" + {% if DO_FIT2OBS == True %} + - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" {% endif %} - # Cyclone tracking data - {% for basin in ["epac", "natl"] %} - {% if path_exists(COMIN_ATMOS_TRACK + "/" + basin) %} - {% do det_files.append([COMIN_ATMOS_TRACK ~ "/" ~ basin, - ARCDIR ~ "/" ~ basin ]) %} - {% endif %} - {% endfor %} +# Define all source-destination pairs for archiving +# Use copy_req for files that MUST exist (raise error if missing) +# Use copy_opt for files that MAY exist (warning if missing, no error) - # Deterministic analysis files (generated for cycled experiments) - {% set det_anl_files = [] %} - # Analysis data (if we are running in cycled mode) - {% do det_anl_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.analysis.grib2", - ARCDIR ~ "/pgbanl." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} +{% if "enkf" not in RUN %} +copy_req: + {% if MODE == "cycled" %} + # Deterministic analysis files (cycled mode only) - REQUIRED + - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.analysis.grib2", "{{ ARCDIR }}/pgbanl.{{ RUN }}.{{ cycle_YMDH }}.grib2"] - {% if DO_JEDIATMVAR == True %} - {% do det_anl_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "stat.atm.tar", - ARCDIR ~ "/atmstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% else %} - {% do det_anl_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "gsistat.txt", - ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% endif %} + {% if DO_JEDIATMVAR == True %} + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}stat.atm.tar", "{{ ARCDIR }}/atmstat.{{ RUN }}.{{ cycle_YMDH }}"] + {% else %} + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}gsistat.txt", "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}"] + {% endif %} - {% if DO_JEDISNOWDA == True %} - {% do det_anl_files.append([COMIN_SNOW_ANALYSIS ~ "/" ~ head ~ "snow_analysis.ioda_hofx.tar", - ARCDIR ~ "/snowstat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tar"]) %} - {% endif %} + {% if DO_JEDISNOWDA == True %} + - ["{{ COMIN_SNOW_ANALYSIS }}/{{ head }}snow_analysis.ioda_hofx.tar", "{{ ARCDIR }}/snowstat.{{ RUN }}.{{ cycle_YMDH }}.tar"] + {% endif %} - {% if DO_AERO_ANL %} - {% do det_anl_files.append([COMIN_CHEM_ANALYSIS ~ "/" ~ head ~ "aerostat.tgz", - ARCDIR ~ "/aerostat." ~ RUN ~ "." ~ cycle_YMDH ~ ".tgz"]) %} - {% endif %} + {% if DO_AERO_ANL %} + - ["{{ COMIN_CHEM_ANALYSIS }}/{{ head }}aerostat.tgz", "{{ ARCDIR }}/aerostat.{{ RUN }}.{{ cycle_YMDH }}.tgz"] + {% endif %} - {% if DO_PREP_OBS_AERO == True %} - {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroobs", - ARCDIR ~ "/aeroobs." ~ RUN ~ "." ~ cycle_YMDH]) %} - {% do det_anl_files.append([COMIN_OBS ~ "/" ~ head ~ "aeroawobs", - ARCDIR ~ "/aeroawobs." ~ RUN ~ "." ~ cycle_YMDH]) %} + {% if DO_PREP_OBS_AERO == True %} + - ["{{ COMIN_OBS }}/{{ head }}aeroobs", "{{ ARCDIR }}/aeroobs.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_OBS }}/{{ head }}aeroawobs", "{{ ARCDIR }}/aeroawobs.{{ RUN }}.{{ cycle_YMDH }}"] + {% endif %} {% endif %} - # GFS-specific files - {% set gfs_files = [] %} - {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} - {% do gfs_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", - ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} - {% endfor %} + {% if RUN == "gfs" %} + # GFS forecast files - REQUIRED + {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} + - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] + {% endfor %} - # Cyclone genesis data (only present if there are storms) - {% if path_exists(COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH) %} - {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH, - ARCDIR ~ "/storms.gfso.atcf_gen." ~ cycle_YMDH ]) %} - {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/storms.gfso.atcf_gen.altg." ~ cycle_YMDH, - ARCDIR ~ "/storms.gfso.atcf_gen.altg." ~ cycle_YMDH ]) %} - {% endif %} + {% if DO_FIT2OBS == True %} + # GFS Fit2Obs data - REQUIRED + {% for fhr in range(0, FHMAX_FITS + 1, 6) %} + {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} + {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} + - ["{{ COMIN_ATMOS_HISTORY }}{{ sfcfile }}", "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sfcfile }}"] + - ["{{ COMIN_ATMOS_HISTORY }}{{ sigfile }}", "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sigfile }}"] + {% endfor %} + {% endif %} - {% if path_exists(COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix." ~ cycle_YMDH) %} - {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix." ~ cycle_YMDH, - ARCDIR ~ "/trak.gfso.atcfunix." ~ cycle_YMDH ]) %} - {% do gfs_files.append([COMIN_ATMOS_GENESIS ~ "/trak.gfso.atcfunix.altg." ~ cycle_YMDH, - ARCDIR ~ "/trak.gfso.atcfunix.altg." ~ cycle_YMDH ]) %} + {% elif RUN == "gdas" %} + # GDAS forecast files - REQUIRED + {% for fhr in range(0, FHMAX + 1, FHOUT) %} + - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] + {% endfor %} + + # GDAS bias correction files - REQUIRED + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias.txt", "{{ ARCDIR }}/abias.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_pc.txt", "{{ ARCDIR }}/abias_pc.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_air.txt", "{{ ARCDIR }}/abias_air.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_int.txt", "{{ ARCDIR }}/abias_int.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}analysis.dtf.a006.nc", "{{ ARCDIR }}/dtfanl.{{ RUN }}.{{ cycle_YMDH }}.nc"] {% endif %} - # GFS Fit2Obs data - {% set fit2obs_files = [] %} - {% for fhr in range(0, FHMAX_FITS + 1, 6) %} - {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} - {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} - {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sfcfile, - VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sfcfile ]) %} - {% do fit2obs_files.append([COMIN_ATMOS_HISTORY ~ "/" ~ sigfile, - VFYARC ~ "/" ~ RUN ~ "." ~ cycle_YMD ~ "/" ~ cycle_HH ~ "/" ~ sigfile ]) %} - {% endfor %} +copy_opt: + # Cyclone tracking files (optional - only exist when storms are tracked) + - ["{{ COMIN_ATMOS_TRACK }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}", "{{ ARCDIR }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_TRACK }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}", "{{ ARCDIR }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}"] - # GDAS-specific files - {% set gdas_files = [] %} - {% for fhr in range(0, FHMAX + 1, FHOUT) %} - {% do gdas_files.append([COMIN_ATMOS_GRIB_1p00 ~ "/" ~ head ~ "pres_a.1p00.f" ~ '%03d'|format(fhr) ~ ".grib2", - ARCDIR ~ "/pgbf" ~ '%02d'|format(fhr) ~ "." ~ RUN ~ "." ~ cycle_YMDH ~ ".grib2"]) %} + # Cyclone tracking data by basin (optional directories) + {% for basin in ["epac", "natl"] %} + - ["{{ COMIN_ATMOS_TRACK }}/{{ basin }}", "{{ ARCDIR }}/{{ basin }}"] {% endfor %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias.txt", - ARCDIR ~ "/abias." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_pc.txt", - ARCDIR ~ "/abias_pc." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_air.txt", - ARCDIR ~ "/abias_air." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "abias_int.txt", - ARCDIR ~ "/abias_int." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do gdas_files.append([COMIN_ATMOS_ANALYSIS ~ "/" ~ head ~ "analysis.dtf.a006.nc", - ARCDIR ~ "/dtfanl." ~ RUN ~ "." ~ cycle_YMDH ~ ".nc"]) %} - - # Now append the necessary file pairs to file_set - # Common deterministic files - {% set file_set = file_set + det_files %} - {% if MODE == "cycled" %} - {% set file_set = file_set + det_anl_files %} - {% endif %} - # Run-specific deterministic files {% if RUN == "gfs" %} - {% set file_set = file_set + gfs_files %} - # Fit2Obs files - {% if DO_FIT2OBS == True %} - {% set file_set = file_set + fit2obs_files %} - {% endif %} - {% elif RUN == "gdas" %} - {% set file_set = file_set + gdas_files %} + # GFS cyclone genesis data (optional - only exist when storms are tracked) + - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}", "{{ ARCDIR }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}", "{{ ARCDIR }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.{{ cycle_YMDH }}", "{{ ARCDIR }}/trak.gfso.atcfunix.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}", "{{ ARCDIR }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}"] {% endif %} -{% else %} # End of deterministic files - - # Ensemble analysis files - {% set enkf_files = [] %} +{% else %} +copy_req: + # Ensemble (EnKF) analysis files - REQUIRED {% if DO_JEDIATMENS == True %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "stat.atm.tar", - ARCDIR ~ "/atmensstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}stat.atm.tar", "{{ ARCDIR }}/atmensstat.{{ RUN }}.{{ cycle_YMDH }}"] {% else %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "enkfstat.txt", - ARCDIR ~ "/enkfstat." ~ RUN ~ "." ~ cycle_YMDH ]) %} - {% do enkf_files.append([COMIN_ATMOS_ANALYSIS_ENSSTAT ~ "/" ~ head ~ "gsistat.ensmean.txt", - ARCDIR ~ "/gsistat." ~ RUN ~ "." ~ cycle_YMDH ~ ".ensmean"]) %} + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}enkfstat.txt", "{{ ARCDIR }}/enkfstat.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}gsistat.ensmean.txt", "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}.ensmean"] {% endif %} - - # Construct the final file set - {% set file_set = file_set + enkf_files %} - {% endif %} - - -# Actually write the yaml -mkdir: - - "{{ ARCDIR }}" - - {% if DO_FIT2OBS == True %} - - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" - {% endif %} - -copy_opt: - {% for source_dest_pair in file_set %} - - {{ source_dest_pair }} - {% endfor %} From 83a86b3d565ef66029b6bbd5370947dd6b648890 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Thu, 4 Dec 2025 17:19:29 +0000 Subject: [PATCH 10/29] edits yamls to look more clean --- parm/archive/gfs_arcdir.yaml.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index 4fbe07e1724..28d7a305606 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -59,7 +59,8 @@ copy_req: {% elif RUN == "gdas" %} # GDAS forecast files - REQUIRED {% for fhr in range(0, FHMAX + 1, FHOUT) %} - - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] + - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", + "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] {% endfor %} # GDAS bias correction files - REQUIRED From 4469b5a8ce0458b43354f1dd30f5df8f7d10d4d8 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Thu, 4 Dec 2025 17:19:49 +0000 Subject: [PATCH 11/29] further edits to yamls --- parm/archive/enkf_arcdir.yaml.j2 | 22 ++++++++++++++++++++++ parm/archive/gcafs_arcdir.yaml.j2 | 23 +++-------------------- parm/archive/gfs_arcdir.yaml.j2 | 21 +++------------------ ush/python/pygfs/task/archive.py | 6 +++++- 4 files changed, 33 insertions(+), 39 deletions(-) create mode 100644 parm/archive/enkf_arcdir.yaml.j2 diff --git a/parm/archive/enkf_arcdir.yaml.j2 b/parm/archive/enkf_arcdir.yaml.j2 new file mode 100644 index 00000000000..1c5bf8cd0a2 --- /dev/null +++ b/parm/archive/enkf_arcdir.yaml.j2 @@ -0,0 +1,22 @@ +# Variables provided by archive_vars.py: +# - cycle_HH, cycle_YMDH, cycle_YMD, head +# - ARCDIR +# - All COMIN_* paths + +# Ensemble (EnKF) archiving template +# Used for: enkfgdas, enkfgfs, enkfgcafs, enkfgcdas + +mkdir: + - "{{ ARCDIR }}" + +copy_req: + # Ensemble analysis statistics - REQUIRED + {% if DO_JEDIATMENS == True %} + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}atmensstat", + "{{ ARCDIR }}/atmensstat.{{ RUN }}.{{ cycle_YMDH }}"] + {% else %} + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}enkfstat.txt", + "{{ ARCDIR }}/enkfstat.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}gsistat.ensmean.txt", + "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}.ensmean"] + {% endif %} diff --git a/parm/archive/gcafs_arcdir.yaml.j2 b/parm/archive/gcafs_arcdir.yaml.j2 index 77f90bd2774..6d99f358703 100644 --- a/parm/archive/gcafs_arcdir.yaml.j2 +++ b/parm/archive/gcafs_arcdir.yaml.j2 @@ -3,15 +3,15 @@ # - VFYARC # - All COMIN_* paths +# Deterministic GCAFS/GCDAS archiving template +# Used for: gcafs, gcdas + mkdir: - "{{ ARCDIR }}" {% if DO_FIT2OBS == True %} - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" {% endif %} -{% if "enkf" not in RUN %} -# Deterministic runs (gcafs, gcdas) - copy_req: # GCAFS forecast files - REQUIRED {% if RUN == "gcafs" %} @@ -53,20 +53,3 @@ copy_req: "{{ ARCDIR }}/aeroawobs.{{ RUN }}.{{ cycle_YMDH }}"] {% endif %} {% endif %} - -{% else %} -# Ensemble runs (enkfgcafs, enkfgcdas) - -copy_req: - # Ensemble analysis statistics - REQUIRED - {% if DO_JEDIATMENS == True %} - - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}atmensstat", - "{{ ARCDIR }}/atmensstat.{{ RUN }}.{{ cycle_YMDH }}"] - {% else %} - - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}enkfstat.txt", - "{{ ARCDIR }}/enkfstat.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}gsistat.ensmean.txt", - "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}.ensmean"] - {% endif %} - -{% endif %} diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index 28d7a305606..a837d48fd79 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -3,18 +3,14 @@ # - VFYARC, GEFS_ARCH # - All COMIN_* paths -# Create directories first +# Deterministic GFS/GDAS archiving template +# Used for: gfs, gdas + mkdir: - "{{ ARCDIR }}" {% if DO_FIT2OBS == True %} - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" {% endif %} - -# Define all source-destination pairs for archiving -# Use copy_req for files that MUST exist (raise error if missing) -# Use copy_opt for files that MAY exist (warning if missing, no error) - -{% if "enkf" not in RUN %} copy_req: {% if MODE == "cycled" %} # Deterministic analysis files (cycled mode only) - REQUIRED @@ -88,14 +84,3 @@ copy_opt: - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.{{ cycle_YMDH }}", "{{ ARCDIR }}/trak.gfso.atcfunix.{{ cycle_YMDH }}"] - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}", "{{ ARCDIR }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}"] {% endif %} - -{% else %} -copy_req: - # Ensemble (EnKF) analysis files - REQUIRED - {% if DO_JEDIATMENS == True %} - - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}stat.atm.tar", "{{ ARCDIR }}/atmensstat.{{ RUN }}.{{ cycle_YMDH }}"] - {% else %} - - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}enkfstat.txt", "{{ ARCDIR }}/enkfstat.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}gsistat.ensmean.txt", "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}.ensmean"] - {% endif %} -{% endif %} diff --git a/ush/python/pygfs/task/archive.py b/ush/python/pygfs/task/archive.py index d6a60364041..389075472e2 100644 --- a/ush/python/pygfs/task/archive.py +++ b/ush/python/pygfs/task/archive.py @@ -72,7 +72,11 @@ def configure_vrfy(self, arch_dict: Dict[str, Any]) -> (Dict[str, Any]): archive_parm = os.path.join(arch_dict.PARMgfs, "archive") # Collect the dataset to archive locally - arcdir_j2yaml = os.path.join(archive_parm, f"{arch_dict.NET}_arcdir.yaml.j2") + # Select template based on RUN type: ensemble (enkf) or deterministic (NET) + if "enkf" in arch_dict.RUN: + arcdir_j2yaml = os.path.join(archive_parm, "enkf_arcdir.yaml.j2") + else: + arcdir_j2yaml = os.path.join(archive_parm, f"{arch_dict.NET}_arcdir.yaml.j2") # Add the glob.glob function for capturing log filenames arch_dict['glob'] = glob.glob From dab508cf9cf4531484b0813427ce446415222136 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Thu, 4 Dec 2025 17:31:05 +0000 Subject: [PATCH 12/29] tabbed jinja --- parm/archive/gcafs_arcdir.yaml.j2 | 22 +++--- parm/archive/gfs_arcdir.yaml.j2 | 125 ++++++++++++++++++------------ 2 files changed, 85 insertions(+), 62 deletions(-) diff --git a/parm/archive/gcafs_arcdir.yaml.j2 b/parm/archive/gcafs_arcdir.yaml.j2 index 6d99f358703..3268882bfda 100644 --- a/parm/archive/gcafs_arcdir.yaml.j2 +++ b/parm/archive/gcafs_arcdir.yaml.j2 @@ -8,39 +8,39 @@ mkdir: - "{{ ARCDIR }}" - {% if DO_FIT2OBS == True %} +{% if DO_FIT2OBS == True %} - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" - {% endif %} +{% endif %} copy_req: +{% if RUN == "gcafs" %} # GCAFS forecast files - REQUIRED - {% if RUN == "gcafs" %} {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] {% endfor %} - # GCAFS Fit2Obs data - REQUIRED if DO_FIT2OBS is enabled {% if DO_FIT2OBS == True %} - {% for fhr in range(0, FHMAX_FITS + 1, 6) %} + # GCAFS Fit2Obs data - REQUIRED if DO_FIT2OBS is enabled + {% for fhr in range(0, FHMAX_FITS + 1, 6) %} - ["{{ COMIN_ATMOS_HISTORY }}/{{ head }}sfc.f{{ '%03d'|format(fhr) }}.nc", "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}/{{ head }}sfc.f{{ '%03d'|format(fhr) }}.nc"] - ["{{ COMIN_ATMOS_HISTORY }}/{{ head }}atm.f{{ '%03d'|format(fhr) }}.nc", "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}/{{ head }}atm.f{{ '%03d'|format(fhr) }}.nc"] - {% endfor %} - {% endif %} + {% endfor %} {% endif %} +{% endif %} +{% if RUN == "gcdas" %} # GCDAS forecast files - REQUIRED - {% if RUN == "gcdas" %} {% for fhr in range(0, FHMAX + 1, FHOUT) %} - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] {% endfor %} - {% endif %} +{% endif %} +{% if MODE == "cycled" %} # Deterministic analysis files (cycled mode only) - REQUIRED - {% if MODE == "cycled" %} {% if DO_AERO_ANL %} - ["{{ COMIN_CHEM_ANALYSIS }}/{{ head }}aerostat.tgz", "{{ ARCDIR }}/aerostat.{{ RUN }}.{{ cycle_YMDH }}.tgz"] @@ -52,4 +52,4 @@ copy_req: - ["{{ COMIN_OBS }}/{{ head }}aeroawobs", "{{ ARCDIR }}/aeroawobs.{{ RUN }}.{{ cycle_YMDH }}"] {% endif %} - {% endif %} +{% endif %} diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index a837d48fd79..a562b2bbef1 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -8,79 +8,102 @@ mkdir: - "{{ ARCDIR }}" - {% if DO_FIT2OBS == True %} +{% if DO_FIT2OBS == True %} - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" - {% endif %} +{% endif %} + copy_req: - {% if MODE == "cycled" %} +{% if MODE == "cycled" %} # Deterministic analysis files (cycled mode only) - REQUIRED - - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.analysis.grib2", "{{ ARCDIR }}/pgbanl.{{ RUN }}.{{ cycle_YMDH }}.grib2"] + - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.analysis.grib2", + "{{ ARCDIR }}/pgbanl.{{ RUN }}.{{ cycle_YMDH }}.grib2"] - {% if DO_JEDIATMVAR == True %} - - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}stat.atm.tar", "{{ ARCDIR }}/atmstat.{{ RUN }}.{{ cycle_YMDH }}"] - {% else %} - - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}gsistat.txt", "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}"] - {% endif %} + {% if DO_JEDIATMVAR == True %} + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}stat.atm.tar", + "{{ ARCDIR }}/atmstat.{{ RUN }}.{{ cycle_YMDH }}"] + {% else %} + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}gsistat.txt", + "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}"] + {% endif %} - {% if DO_JEDISNOWDA == True %} - - ["{{ COMIN_SNOW_ANALYSIS }}/{{ head }}snow_analysis.ioda_hofx.tar", "{{ ARCDIR }}/snowstat.{{ RUN }}.{{ cycle_YMDH }}.tar"] - {% endif %} + {% if DO_JEDISNOWDA == True %} + - ["{{ COMIN_SNOW_ANALYSIS }}/{{ head }}snow_analysis.ioda_hofx.tar", + "{{ ARCDIR }}/snowstat.{{ RUN }}.{{ cycle_YMDH }}.tar"] + {% endif %} - {% if DO_AERO_ANL %} - - ["{{ COMIN_CHEM_ANALYSIS }}/{{ head }}aerostat.tgz", "{{ ARCDIR }}/aerostat.{{ RUN }}.{{ cycle_YMDH }}.tgz"] - {% endif %} + {% if DO_AERO_ANL %} + - ["{{ COMIN_CHEM_ANALYSIS }}/{{ head }}aerostat.tgz", + "{{ ARCDIR }}/aerostat.{{ RUN }}.{{ cycle_YMDH }}.tgz"] + {% endif %} - {% if DO_PREP_OBS_AERO == True %} - - ["{{ COMIN_OBS }}/{{ head }}aeroobs", "{{ ARCDIR }}/aeroobs.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_OBS }}/{{ head }}aeroawobs", "{{ ARCDIR }}/aeroawobs.{{ RUN }}.{{ cycle_YMDH }}"] - {% endif %} + {% if DO_PREP_OBS_AERO == True %} + - ["{{ COMIN_OBS }}/{{ head }}aeroobs", + "{{ ARCDIR }}/aeroobs.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_OBS }}/{{ head }}aeroawobs", + "{{ ARCDIR }}/aeroawobs.{{ RUN }}.{{ cycle_YMDH }}"] {% endif %} +{% endif %} - {% if RUN == "gfs" %} +{% if RUN == "gfs" %} # GFS forecast files - REQUIRED - {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} - - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] - {% endfor %} + {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} + - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", + "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] + {% endfor %} - {% if DO_FIT2OBS == True %} + {% if DO_FIT2OBS == True %} # GFS Fit2Obs data - REQUIRED - {% for fhr in range(0, FHMAX_FITS + 1, 6) %} - {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} - {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} - - ["{{ COMIN_ATMOS_HISTORY }}{{ sfcfile }}", "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sfcfile }}"] - - ["{{ COMIN_ATMOS_HISTORY }}{{ sigfile }}", "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sigfile }}"] - {% endfor %} - {% endif %} + {% for fhr in range(0, FHMAX_FITS + 1, 6) %} + {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} + {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} + - ["{{ COMIN_ATMOS_HISTORY }}{{ sfcfile }}", + "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sfcfile }}"] + - ["{{ COMIN_ATMOS_HISTORY }}{{ sigfile }}", + "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sigfile }}"] + {% endfor %} + {% endif %} - {% elif RUN == "gdas" %} +{% elif RUN == "gdas" %} # GDAS forecast files - REQUIRED - {% for fhr in range(0, FHMAX + 1, FHOUT) %} + {% for fhr in range(0, FHMAX + 1, FHOUT) %} - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] - {% endfor %} + {% endfor %} # GDAS bias correction files - REQUIRED - - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias.txt", "{{ ARCDIR }}/abias.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_pc.txt", "{{ ARCDIR }}/abias_pc.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_air.txt", "{{ ARCDIR }}/abias_air.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_int.txt", "{{ ARCDIR }}/abias_int.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}analysis.dtf.a006.nc", "{{ ARCDIR }}/dtfanl.{{ RUN }}.{{ cycle_YMDH }}.nc"] - {% endif %} + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias.txt", + "{{ ARCDIR }}/abias.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_pc.txt", + "{{ ARCDIR }}/abias_pc.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_air.txt", + "{{ ARCDIR }}/abias_air.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_int.txt", + "{{ ARCDIR }}/abias_int.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}analysis.dtf.a006.nc", + "{{ ARCDIR }}/dtfanl.{{ RUN }}.{{ cycle_YMDH }}.nc"] +{% endif %} copy_opt: # Cyclone tracking files (optional - only exist when storms are tracked) - - ["{{ COMIN_ATMOS_TRACK }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}", "{{ ARCDIR }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_TRACK }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}", "{{ ARCDIR }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_TRACK }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}", + "{{ ARCDIR }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_TRACK }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}", + "{{ ARCDIR }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}"] # Cyclone tracking data by basin (optional directories) - {% for basin in ["epac", "natl"] %} - - ["{{ COMIN_ATMOS_TRACK }}/{{ basin }}", "{{ ARCDIR }}/{{ basin }}"] - {% endfor %} +{% for basin in ["epac", "natl"] %} + - ["{{ COMIN_ATMOS_TRACK }}/{{ basin }}", + "{{ ARCDIR }}/{{ basin }}"] +{% endfor %} - {% if RUN == "gfs" %} +{% if RUN == "gfs" %} # GFS cyclone genesis data (optional - only exist when storms are tracked) - - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}", "{{ ARCDIR }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}", "{{ ARCDIR }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.{{ cycle_YMDH }}", "{{ ARCDIR }}/trak.gfso.atcfunix.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}", "{{ ARCDIR }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}"] - {% endif %} + - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}", + "{{ ARCDIR }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}", + "{{ ARCDIR }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.{{ cycle_YMDH }}", + "{{ ARCDIR }}/trak.gfso.atcfunix.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}", + "{{ ARCDIR }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}"] +{% endif %} From c6472053fde5f4e3cd50b180b745caeade803e17 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Thu, 4 Dec 2025 18:45:44 +0000 Subject: [PATCH 13/29] resolve conflicts --- parm/archive/enkf_arcdir.yaml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/archive/enkf_arcdir.yaml.j2 b/parm/archive/enkf_arcdir.yaml.j2 index 1c5bf8cd0a2..d65bba8c514 100644 --- a/parm/archive/enkf_arcdir.yaml.j2 +++ b/parm/archive/enkf_arcdir.yaml.j2 @@ -17,6 +17,6 @@ copy_req: {% else %} - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}enkfstat.txt", "{{ ARCDIR }}/enkfstat.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}gsistat.ensmean.txt", + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}gsistat_ensmean.txt", "{{ ARCDIR }}/gsistat.{{ RUN }}.{{ cycle_YMDH }}.ensmean"] {% endif %} From 6cee909e92b61d8805a106a6238bd804b32b62f8 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Fri, 5 Dec 2025 17:26:25 +0000 Subject: [PATCH 14/29] typo --- parm/archive/enkf_arcdir.yaml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/archive/enkf_arcdir.yaml.j2 b/parm/archive/enkf_arcdir.yaml.j2 index d65bba8c514..43e92930912 100644 --- a/parm/archive/enkf_arcdir.yaml.j2 +++ b/parm/archive/enkf_arcdir.yaml.j2 @@ -12,7 +12,7 @@ mkdir: copy_req: # Ensemble analysis statistics - REQUIRED {% if DO_JEDIATMENS == True %} - - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}atmensstat", + - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}stat.atm.tar", "{{ ARCDIR }}/atmensstat.{{ RUN }}.{{ cycle_YMDH }}"] {% else %} - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}enkfstat.txt", From 4c31622881a7d2b9a59b7e04b161810077e99740 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Fri, 5 Dec 2025 18:53:49 +0000 Subject: [PATCH 15/29] addressed suggestions --- jobs/JGLOBAL_ARCHIVE_VRFY | 29 ++++- jobs/JGLOBAL_ENKF_ARCHIVE_VRFY | 15 ++- ush/python/pygfs/task/archive_vars.py | 167 ++------------------------ 3 files changed, 51 insertions(+), 160 deletions(-) diff --git a/jobs/JGLOBAL_ARCHIVE_VRFY b/jobs/JGLOBAL_ARCHIVE_VRFY index 207fad02bb5..2e6e9a42d37 100755 --- a/jobs/JGLOBAL_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ARCHIVE_VRFY @@ -2,9 +2,32 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "arch_vrfy" -c "base arch_vrfy" +############################################## +# Set variables used in the script +############################################## +YMD=${PDY} HH=${cyc} declare_from_tmpl -rx \ + COMIN_ATMOS_ANALYSIS:COM_ATMOS_ANALYSIS_TMPL \ + COMIN_ATMOS_GENESIS:COM_ATMOS_GENESIS_TMPL \ + COMIN_ATMOS_HISTORY:COM_ATMOS_HISTORY_TMPL \ + COMIN_ATMOS_TRACK:COM_ATMOS_TRACK_TMPL \ + COMIN_CHEM_ANALYSIS:COM_CHEM_ANALYSIS_TMPL \ + COMIN_SNOW_ANALYSIS:COM_SNOW_ANALYSIS_TMPL \ + COMIN_OBS:COM_OBS_TMPL \ + COMOUT_ATMOS_TRACK:COM_ATMOS_TRACK_TMPL + +for grid in "0p25" "0p50" "1p00"; do + YMD=${PDY} HH=${cyc} GRID=${grid} declare_from_tmpl -rx \ + "COMIN_ATMOS_GRIB_${grid}:COM_ATMOS_GRIB_GRID_TMPL" +done + +# GEFS-specific: Ensemble statistics path +if [[ "${RUN}" == "gefs" ]]; then + MEMDIR="ensstat" YMD=${PDY} HH=${cyc} GRID="1p00" declare_from_tmpl -rx \ + COMIN_ATMOS_ENSSTAT_1p00:COM_ATMOS_GRIB_GRID_TMPL +fi + ############################################################### # Run archive script -# Note: COM paths are calculated within the Python script ############################################################### ${GLOBALARCHIVESH:-${SCRgfs}/exglobal_archive_vrfy.py} @@ -13,6 +36,10 @@ if [[ ${err} -ne 0 ]]; then exit "${err}" fi +############################################## +# End JOB SPECIFIC work +############################################## + ############################################## # Final processing ############################################## diff --git a/jobs/JGLOBAL_ENKF_ARCHIVE_VRFY b/jobs/JGLOBAL_ENKF_ARCHIVE_VRFY index 2ebbb58df57..77c3d1973fc 100755 --- a/jobs/JGLOBAL_ENKF_ARCHIVE_VRFY +++ b/jobs/JGLOBAL_ENKF_ARCHIVE_VRFY @@ -2,9 +2,16 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "earc_vrfy" -c "base earc_vrfy" +############################################## +# Set variables used in the script +############################################## +MEMDIR="ensstat" YMD=${PDY} HH=${cyc} declare_from_tmpl -rx \ + COMIN_ATMOS_ANALYSIS_ENSSTAT:COM_ATMOS_ANALYSIS_TMPL \ + COMIN_ATMOS_HISTORY_ENSSTAT:COM_ATMOS_HISTORY_TMPL \ + COMIN_SNOW_ANALYSIS_ENSSTAT:COM_SNOW_ANALYSIS_TMPL + ############################################################### # Run archive script -# Note: COM paths are calculated within the Python script ############################################################### "${SCRgfs}/exglobal_enkf_earc_vrfy.py" @@ -13,6 +20,12 @@ if [[ ${err} -ne 0 ]]; then exit "${err}" fi +############################################################### + +############################################## +# End JOB SPECIFIC work +############################################## + ############################################## # Final processing ############################################## diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index bc3a7b1cee9..ea0782822a2 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -30,17 +30,11 @@ Main entry point - collects all variables for YAML templates add_config_vars(): - Extracts configuration keys and COM* template variables + Extracts configuration keys and COM* variables (created in job scripts) _get_cycle_vars(): Computes cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) -_calculate_com_paths(): - Generates all COM paths (ROTDIR-based) with grid loops and conditional logic - -_get_template_dict(): - Creates base template substitution dictionary - Logging ------- All public operational methods are decorated with @logit(logger). @@ -48,7 +42,7 @@ import os from logging import getLogger from typing import Any, Dict -from wxflow import Task, logit, to_YMD, to_YMDH, Template, TemplateConstants +from wxflow import Task, logit, to_YMD, to_YMDH logger = getLogger(__name__.split('.')[-1]) @@ -91,28 +85,25 @@ def get_all_yaml_vars(self) -> Dict[str, Any]: Dict[str, Any] Dictionary containing variables for Jinja2 templates: - cycle_HH, cycle_YMDH, cycle_YMD, head: Cycle-specific variables - - COMIN_*, COMOUT_*, COM_*: All COM directory paths + - COMIN_*, COMOUT_*, COM_*: All COM directory paths (from job scripts) - Config keys: RUN, PSLOT, ROTDIR, DO_* flags, FHMAX*, etc. Notes ----- File set generation (mkdir lists, copy operations) is handled entirely by the YAML templates. This method only provides the variables they need. + COM paths are created in the job scripts (JGLOBAL_ARCHIVE_VRFY and + JGLOBAL_ENKF_ARCHIVE_VRFY) and passed through task_config. """ # Build arch_dict with variables for Jinja2 templates arch_dict = {} - # Add config variables (config keys, COM* variables) + # Add config variables (config keys, COM* variables from job scripts) arch_dict.update(self.add_config_vars()) # Add cycle-specific variables arch_dict.update(self._get_cycle_vars()) - # Add COM paths - base_dict = self._get_template_dict() - template_specs = self._get_com_template_specs() - arch_dict.update(self._construct_com_paths(base_dict, template_specs)) - logger.info(f"Collected {len(arch_dict)} variables for YAML templates") logger.debug(f"arch_dict keys: {list(arch_dict.keys())}") @@ -176,7 +167,9 @@ def add_config_vars(self) -> Dict[str, Any]: else: logger.warning(f"Config key '{key}' not found in task_config; skipping.") - # Import COM* directory and template variables + # Import COM* directory and template variables created by job scripts + # Job scripts use declare_from_tmpl -rx which exports variables to environment + # Python reads os.environ, so these COM variables are in task_config for key in self.task_config.keys(): if key.startswith(("COM_", "COMIN_", "COMOUT_")): general_dict[key] = self.task_config.get(key) @@ -218,145 +211,3 @@ def _get_cycle_vars(self) -> Dict[str, Any]: 'head': head, 'VFYARC': VFYARC } - - @logit(logger) - def _get_template_dict(self) -> Dict[str, str]: - """Create template substitution dictionary for COM path generation. - - This method builds the base dictionary used for template variable substitution. - For GEFS, it includes MEMDIR: 'ensstat' to support ensemble statistics paths. - All values default to empty string if not found. - - Returns - ------- - Dict[str, str] - Template substitution dictionary with keys: - - ROTDIR: Rotating directory path - - RUN: Run type (gfs, gdas, gefs, etc.) - - YMD/PDY: Cycle date (YYYYMMDD) - - HH/cyc: Cycle hour (HH) - - GRID: Grid resolution (added per-call for grid-specific paths) - - MEMDIR: 'ensstat' (GEFS only, for ensemble statistics) - - Examples - -------- - GFS/GDAS: - {'ROTDIR': '/path', 'RUN': 'gfs', 'YMD': '20240101', 'HH': '00', ...} - - GEFS: - {'ROTDIR': '/path', 'RUN': 'gefs', 'YMD': '20240101', 'HH': '00', - 'MEMDIR': 'ensstat', ...} - """ - cycle_vars = self._get_cycle_vars() - - # Base template substitution dictionary with empty string defaults - base_dict = { - 'ROTDIR': self.task_config.get('ROTDIR', ''), - 'RUN': self.task_config.get('RUN', ''), - 'YMD': cycle_vars.get('cycle_YMD', ''), - 'HH': cycle_vars.get('cycle_HH', ''), - 'PDY': cycle_vars.get('cycle_YMD', ''), - 'cyc': cycle_vars.get('cycle_HH', '') - } - - # GEFS-specific: Add MEMDIR for ensemble statistics - # Corresponds to YAML: '${MEMDIR}': 'ensstat' - if 'gefs' in self.task_config.get('RUN', '').lower(): - base_dict['MEMDIR'] = 'ensstat' - - return base_dict - - def _get_com_template_specs(self) -> list: - """Collect COM template specifications. - - This method defines which COM variables need to be generated from which - templates, along with any additional template variables required. - - Returns - ------- - list of tuples - Each tuple contains (com_key, template_key, extra_vars): - - com_key: Output variable name (e.g., 'COMIN_ATMOS_ANALYSIS') - - template_key: Template key in task_config (e.g., 'COM_ATMOS_ANALYSIS_TMPL') - - extra_vars: Dict of additional template variables (e.g., {'GRID': '0p25'}) - Empty dict {} if no additional variables needed - """ - # EnKF-specific: Only these 3 ENSSTAT paths with MEMDIR='ensstat' - if 'enkf' in self.task_config.RUN: - template_specs = [ - ('COMIN_ATMOS_ANALYSIS_ENSSTAT', 'COM_ATMOS_ANALYSIS_TMPL', {'MEMDIR': 'ensstat'}), - ('COMIN_ATMOS_HISTORY_ENSSTAT', 'COM_ATMOS_HISTORY_TMPL', {'MEMDIR': 'ensstat'}), - ('COMIN_SNOW_ANALYSIS_ENSSTAT', 'COM_SNOW_ANALYSIS_TMPL', {'MEMDIR': 'ensstat'}) - ] - else: - # All other systems (GFS, GEFS, GCAFS) get common + grid-specific paths - template_specs = [ - ('COMIN_ATMOS_ANALYSIS', 'COM_ATMOS_ANALYSIS_TMPL', {}), - ('COMIN_ATMOS_GENESIS', 'COM_ATMOS_GENESIS_TMPL', {}), - ('COMIN_ATMOS_HISTORY', 'COM_ATMOS_HISTORY_TMPL', {}), - ('COMIN_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), - ('COMIN_CHEM_ANALYSIS', 'COM_CHEM_ANALYSIS_TMPL', {}), - ('COMIN_SNOW_ANALYSIS', 'COM_SNOW_ANALYSIS_TMPL', {}), - ('COMIN_OBS', 'COM_OBS_TMPL', {}), - ('COMOUT_ATMOS_TRACK', 'COM_ATMOS_TRACK_TMPL', {}), - ] - # Grid-specific paths - for grid in ["0p25", "0p50", "1p00"]: - com_key = f"COMIN_ATMOS_GRIB_{grid}" - template_specs.append((com_key, 'COM_ATMOS_GRIB_GRID_TMPL', {'GRID': grid})) - - # GEFS-specific: Ensemble statistics path - if 'gefs' in self.task_config.RUN: - template_specs.append(('COMIN_ATMOS_ENSSTAT_1p00', 'COM_ATMOS_GRIB_GRID_TMPL', {'GRID': '1p00'})) - - return template_specs - - def _construct_com_paths(self, base_dict: Dict[str, str], template_specs: list) -> Dict[str, str]: - """Construct COM paths from template specifications. - - This method takes template specifications and constructs the actual paths - by substituting template variables using base_dict updated with extra_vars. - - Parameters - ---------- - base_dict : Dict[str, str] - Base template substitution dictionary from _get_template_dict() - template_specs : list of tuples - List from _get_com_template_specs() containing specifications - - Returns - ------- - Dict[str, str] - Dictionary mapping COM variable names to resolved paths - """ - com_paths = {} - - for com_key, template_key, extra_vars in template_specs: - # Use base_dict directly, updated with any extra variables - tmpl_dict = {**base_dict, **extra_vars} - - template = self.task_config.get(template_key, '') - com_paths[com_key] = Template.substitute_string( - template, TemplateConstants.DOLLAR_CURLY_BRACE, - lambda key: tmpl_dict.get(key, '')) if template else '' - - return com_paths - - -# ============================================================================ -# FILE SET GENERATION NOW HANDLED BY YAML TEMPLATES -# ============================================================================ -# The following methods have been removed and their logic moved to YAML templates: -# - _build_gfs_list() -# - gfs_arcdir() -# - _build_gefs_list() -# - gefs_arcdir() -# - _build_gcafs_list() -# - gcafs_arcdir() -# -# The YAML templates (parm/archive/*_arcdir.yaml.j2) now contain all file set -# generation logic (loops, conditionals, file path construction). -# -# The Python code only provides VARIABLES (cycle vars, COM paths, config vars) -# that the YAML templates need via get_all_yaml_vars(). -# ============================================================================ From 3bd29eb1c09a897ad0ee9b93baa6db2aa45f0385 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Fri, 5 Dec 2025 19:13:16 +0000 Subject: [PATCH 16/29] typo --- ush/python/pygfs/task/archive_vars.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index ea0782822a2..d1876db599c 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -156,8 +156,8 @@ def add_config_vars(self) -> Dict[str, Any]: 'FHOUT_GFS', 'FHMAX_HF_GFS', 'FHMAX_FITS', 'FHMAX', 'FHOUT', 'FHMAX_GFS', 'DO_GSISOILDA', 'DO_LAND_IAU'] - # Add FHMIN_GFS only if NET does not contain 'enkf' - if 'enkf' not in self.task_config.get('NET', ''): + # Add FHMIN_GFS only if RUN does not contain 'enkf' + if 'enkf' not in self.task_config.get('RUN', ''): config_keys.append('FHMIN_GFS') # Extract keys if they exist in task_config From 00271245fa4a6e56a7f3cb0332d24b96b848c2bd Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Fri, 5 Dec 2025 22:15:23 +0000 Subject: [PATCH 17/29] update class attributes --- scripts/exglobal_archive_vrfy.py | 9 +-- ush/python/pygfs/task/archive_vars.py | 105 +++++++++++++++----------- 2 files changed, 62 insertions(+), 52 deletions(-) diff --git a/scripts/exglobal_archive_vrfy.py b/scripts/exglobal_archive_vrfy.py index 73d146a6d47..be31a44cc5c 100755 --- a/scripts/exglobal_archive_vrfy.py +++ b/scripts/exglobal_archive_vrfy.py @@ -15,17 +15,14 @@ def main(): config = cast_strdict_as_dtypedict(os.environ) - # Instantiate the Archive object for execute_store_products + # Instantiate the Archive task object archive = Archive(config) - # Instantiate the ArchiveVrfy object for variable and file set calculation - archive_vars = ArchiveVrfy(config) - with chdir(config.ROTDIR): # Collect all archive variables in complete arch_dict for YAML templates - # This method handles everything: general vars, NET dispatch, cycle_vars, com_paths, file_set, mkdir_list - arch_dict = archive_vars.get_all_yaml_vars() + # Use static utility methods from ArchiveVrfy (not a Task instance) + arch_dict = ArchiveVrfy.get_all_yaml_vars(archive.task_config) # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index d1876db599c..559f4f414c3 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 """ -Archive Variables Task +Archive Variables Utility Module Overview -------- -This module provides variables needed by YAML templates for archiving verification -(vrfy) data for GFS, GEFS, and GCAFS systems. File set generation logic (loops, -conditionals, path construction) is handled by the YAML templates themselves. +This module provides utility functions to collect variables needed by YAML templates +for archiving verification (vrfy) data for GFS, GEFS, and GCAFS systems. File set +generation logic (loops, conditionals, path construction) is handled by the YAML +templates themselves. Architecture ------------ @@ -24,32 +25,37 @@ - Apply conditionals (DO_* flags, MODE, RUN type) - Create mkdir lists for directory creation -Key Methods ------------ -get_all_yaml_vars(): +Key Functions +------------- +get_all_yaml_vars(config_dict): Main entry point - collects all variables for YAML templates -add_config_vars(): +add_config_vars(config_dict): Extracts configuration keys and COM* variables (created in job scripts) -_get_cycle_vars(): +_get_cycle_vars(config_dict): Computes cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) +Design Note +----------- +This is NOT a Task class - it's a utility module with functions that operate on +config_dict dictionaries. This avoids duplicate Task instantiation in archive workflows. + Logging ------- -All public operational methods are decorated with @logit(logger). +All public operational functions are decorated with @logit(logger). """ import os from logging import getLogger from typing import Any, Dict -from wxflow import Task, logit, to_YMD, to_YMDH +from wxflow import AttrDict, logit, to_YMD, to_YMDH logger = getLogger(__name__.split('.')[-1]) -class ArchiveVrfy(Task): +class ArchiveVrfy: """ - Task class for archive verification operations. + Utility class for collecting archive verification variables. This class provides variables for YAML templates that handle archiving for three systems: @@ -61,25 +67,20 @@ class ArchiveVrfy(Task): generation logic. This class only provides the variables they need. """ - @logit(logger, name="ArchiveVrfy") - def __init__(self, config: Dict[str, Any]) -> None: - """Constructor for the ArchiveVrfy task - - Parameters - ---------- - config : Dict[str, Any] - Incoming configuration for the task from the environment - """ - super().__init__(config) - + @staticmethod @logit(logger) - def get_all_yaml_vars(self) -> Dict[str, Any]: + def get_all_yaml_vars(config_dict: AttrDict) -> Dict[str, Any]: """Collect all variables needed for YAML templates. This method provides only the VARIABLES needed by the YAML templates (cycle vars, COM paths, config keys). The YAML templates handle all file set generation logic (loops, conditionals, path construction). + Parameters + ---------- + config_dict : AttrDict + Configuration dictionary from Archive.task_config + Returns ------- Dict[str, Any] @@ -93,24 +94,25 @@ def get_all_yaml_vars(self) -> Dict[str, Any]: File set generation (mkdir lists, copy operations) is handled entirely by the YAML templates. This method only provides the variables they need. COM paths are created in the job scripts (JGLOBAL_ARCHIVE_VRFY and - JGLOBAL_ENKF_ARCHIVE_VRFY) and passed through task_config. + JGLOBAL_ENKF_ARCHIVE_VRFY) and passed through config_dict. """ # Build arch_dict with variables for Jinja2 templates arch_dict = {} # Add config variables (config keys, COM* variables from job scripts) - arch_dict.update(self.add_config_vars()) + arch_dict.update(ArchiveVrfy.add_config_vars(config_dict)) # Add cycle-specific variables - arch_dict.update(self._get_cycle_vars()) + arch_dict.update(ArchiveVrfy._get_cycle_vars(config_dict)) logger.info(f"Collected {len(arch_dict)} variables for YAML templates") logger.debug(f"arch_dict keys: {list(arch_dict.keys())}") return arch_dict + @staticmethod @logit(logger) - def add_config_vars(self) -> Dict[str, Any]: + def add_config_vars(config_dict: AttrDict) -> Dict[str, Any]: """Collect and format general variables for archive operations. This method: @@ -119,7 +121,12 @@ def add_config_vars(self) -> Dict[str, Any]: 3. Collects all COM* directory and template variables 4. Returns complete dictionary ready for arch_dict - Variables updated (if present in task_config): + Parameters + ---------- + config_dict : AttrDict + Configuration dictionary from Archive.task_config + + Variables updated (if present in config_dict): - OCNRES: Ocean resolution (formatted to 3 digits) - ICERES: Ice resolution (formatted to 3 digits) @@ -128,7 +135,7 @@ def add_config_vars(self) -> Dict[str, Any]: - DO_JEDIATMENS, DO_FIT2OBS, DO_JEDIATMVAR, DO_JEDISNOWDA - DO_AERO_ANL, DO_PREP_OBS_AERO, DO_GSISOILDA, DO_LAND_IAU - NET, FHOUT_GFS, FHMAX_HF_GFS, FHMAX_FITS, FHMAX, FHOUT, FHMAX_GFS - - FHMIN_GFS (if present in task_config) + - FHMIN_GFS (if present in config_dict) COM variable prefixes collected: - COM_, COMIN_, COMOUT_ @@ -144,10 +151,10 @@ def add_config_vars(self) -> Dict[str, Any]: """ general_dict = {} - # Update resolution keys to be 3 digits if they are part of task_config + # Update resolution keys to be 3 digits if they are part of config_dict for key in ['OCNRES', 'ICERES']: - if key in self.task_config: - self.task_config[key] = f"{self.task_config[key]:03d}" + if key in config_dict: + config_dict[key] = f"{config_dict[key]:03d}" # Configuration keys to extract (if present) config_keys = ['current_cycle', 'RUN', 'PSLOT', 'ROTDIR', 'PARMgfs', @@ -157,32 +164,38 @@ def add_config_vars(self) -> Dict[str, Any]: 'FHMAX_GFS', 'DO_GSISOILDA', 'DO_LAND_IAU'] # Add FHMIN_GFS only if RUN does not contain 'enkf' - if 'enkf' not in self.task_config.get('RUN', ''): + if 'enkf' not in config_dict.get('RUN', ''): config_keys.append('FHMIN_GFS') - # Extract keys if they exist in task_config + # Extract keys if they exist in config_dict for key in config_keys: - if key in self.task_config: - general_dict[key] = self.task_config[key] + if key in config_dict: + general_dict[key] = config_dict[key] else: - logger.warning(f"Config key '{key}' not found in task_config; skipping.") + logger.warning(f"Config key '{key}' not found in config_dict; skipping.") # Import COM* directory and template variables created by job scripts # Job scripts use declare_from_tmpl -rx which exports variables to environment - # Python reads os.environ, so these COM variables are in task_config - for key in self.task_config.keys(): + # Python reads os.environ, so these COM variables are in config_dict + for key in config_dict.keys(): if key.startswith(("COM_", "COMIN_", "COMOUT_")): - general_dict[key] = self.task_config.get(key) + general_dict[key] = config_dict.get(key) logger.info(f"Collected {len(general_dict)} general archive variables") logger.debug(f"General variables: {list(general_dict.keys())}") return general_dict + @staticmethod @logit(logger) - def _get_cycle_vars(self) -> Dict[str, Any]: + def _get_cycle_vars(config_dict: AttrDict) -> Dict[str, Any]: """Calculate cycle-specific variables using wxflow timetools. + Parameters + ---------- + config_dict : AttrDict + Configuration dictionary from Archive.task_config + Returns ------- Dict[str, Any] @@ -193,16 +206,16 @@ def _get_cycle_vars(self) -> Dict[str, Any]: - head: System head designation (e.g., 'gfs.t00z.') - VFYARC: Verification archive directory (ROTDIR/vrfyarch) """ - current_cycle = self.task_config.current_cycle + current_cycle = config_dict.current_cycle cycle_HH = current_cycle.strftime("%H") cycle_YMDH = to_YMDH(current_cycle) cycle_YMD = to_YMD(current_cycle) # Build head string (e.g., 'gfs.t00z.') - head = f"{self.task_config.RUN}.t{cycle_HH}z." + head = f"{config_dict.RUN}.t{cycle_HH}z." # Archive directory (used by all systems) - VFYARC = os.path.join(self.task_config.ROTDIR, "vrfyarch") + VFYARC = os.path.join(config_dict.ROTDIR, "vrfyarch") return { 'cycle_HH': cycle_HH, From 977915ae6b3e332e4097198d2dfd0e8e83d97cbc Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Sat, 6 Dec 2025 01:59:23 +0000 Subject: [PATCH 18/29] update scripts/exglobal_enkf_earc_vrfy.py --- scripts/exglobal_enkf_earc_vrfy.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scripts/exglobal_enkf_earc_vrfy.py b/scripts/exglobal_enkf_earc_vrfy.py index 77255faded9..cb094f3aecd 100755 --- a/scripts/exglobal_enkf_earc_vrfy.py +++ b/scripts/exglobal_enkf_earc_vrfy.py @@ -15,17 +15,14 @@ def main(): config = cast_strdict_as_dtypedict(os.environ) - # Instantiate the Archive object for execute_store_products + # Instantiate the Archive task object archive = Archive(config) - # Instantiate the ArchiveVrfy object to collect variables to render YAML - archive_vars = ArchiveVrfy(config) - with chdir(config.ROTDIR): # Collect all archive variables in complete arch_dict for YAML templates - # This method handles everything: general vars, NET dispatch, cycle_vars, com_paths, file_set, mkdir_list - arch_dict = archive_vars.get_all_yaml_vars() + # Use static utility methods from ArchiveVrfy (not a Task instance) + arch_dict = ArchiveVrfy.get_all_yaml_vars(archive.task_config) # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) From 166dca95de698c936ab2a9d3061308383f7aebaa Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Mon, 8 Dec 2025 16:22:58 +0000 Subject: [PATCH 19/29] changed location head string --- parm/archive/enkf_arcdir.yaml.j2 | 3 +++ parm/archive/gcafs_arcdir.yaml.j2 | 3 +++ parm/archive/gefs_arcdir.yaml.j2 | 3 +++ parm/archive/gfs_arcdir.yaml.j2 | 3 +++ ush/python/pygfs/task/archive_vars.py | 4 ---- 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/parm/archive/enkf_arcdir.yaml.j2 b/parm/archive/enkf_arcdir.yaml.j2 index 43e92930912..8e05e52a103 100644 --- a/parm/archive/enkf_arcdir.yaml.j2 +++ b/parm/archive/enkf_arcdir.yaml.j2 @@ -6,6 +6,9 @@ # Ensemble (EnKF) archiving template # Used for: enkfgdas, enkfgfs, enkfgcafs, enkfgcdas +# Build head string (e.g., 'gfs.t00z.') +{% set head = RUN + ".t" + cycle_HH + "z." %} + mkdir: - "{{ ARCDIR }}" diff --git a/parm/archive/gcafs_arcdir.yaml.j2 b/parm/archive/gcafs_arcdir.yaml.j2 index 3268882bfda..c7b3d8dff65 100644 --- a/parm/archive/gcafs_arcdir.yaml.j2 +++ b/parm/archive/gcafs_arcdir.yaml.j2 @@ -6,6 +6,9 @@ # Deterministic GCAFS/GCDAS archiving template # Used for: gcafs, gcdas +# Build head string (e.g., 'gfs.t00z.') +{% set head = RUN + ".t" + cycle_HH + "z." %} + mkdir: - "{{ ARCDIR }}" {% if DO_FIT2OBS == True %} diff --git a/parm/archive/gefs_arcdir.yaml.j2 b/parm/archive/gefs_arcdir.yaml.j2 index 2cc1f973523..8be329861ec 100644 --- a/parm/archive/gefs_arcdir.yaml.j2 +++ b/parm/archive/gefs_arcdir.yaml.j2 @@ -3,6 +3,9 @@ # - VFYARC (archive directory) # - COMIN_ATMOS_ENSSTAT_1p00 (calculated in Python with MEMDIR='ensstat') +# Build head string (e.g., 'gfs.t00z.') +{% set head = RUN + ".t" + cycle_HH + "z." %} + # Create directories first mkdir: - "{{ VFYARC }}" diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index a562b2bbef1..453a34ed31c 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -6,6 +6,9 @@ # Deterministic GFS/GDAS archiving template # Used for: gfs, gdas +# Build head string (e.g., 'gfs.t00z.') +{% set head = RUN + ".t" + cycle_HH + "z." %} + mkdir: - "{{ ARCDIR }}" {% if DO_FIT2OBS == True %} diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/task/archive_vars.py index 559f4f414c3..150b671fdd7 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/task/archive_vars.py @@ -211,9 +211,6 @@ def _get_cycle_vars(config_dict: AttrDict) -> Dict[str, Any]: cycle_YMDH = to_YMDH(current_cycle) cycle_YMD = to_YMD(current_cycle) - # Build head string (e.g., 'gfs.t00z.') - head = f"{config_dict.RUN}.t{cycle_HH}z." - # Archive directory (used by all systems) VFYARC = os.path.join(config_dict.ROTDIR, "vrfyarch") @@ -221,6 +218,5 @@ def _get_cycle_vars(config_dict: AttrDict) -> Dict[str, Any]: 'cycle_HH': cycle_HH, 'cycle_YMDH': cycle_YMDH, 'cycle_YMD': cycle_YMD, - 'head': head, 'VFYARC': VFYARC } From 57508ab7c3acc301d3bcbd0b28e5737e38b1b3ff Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 9 Dec 2025 15:34:08 +0000 Subject: [PATCH 20/29] added suggestions --- dev/scripts/exglobal_archive_vrfy.py | 14 +++++++------- dev/scripts/exglobal_enkf_earc_vrfy.py | 14 +++++++------- ush/python/pygfs/{task => utils}/archive_vars.py | 12 +++--------- 3 files changed, 17 insertions(+), 23 deletions(-) rename ush/python/pygfs/{task => utils}/archive_vars.py (94%) diff --git a/dev/scripts/exglobal_archive_vrfy.py b/dev/scripts/exglobal_archive_vrfy.py index be31a44cc5c..b9929c030db 100755 --- a/dev/scripts/exglobal_archive_vrfy.py +++ b/dev/scripts/exglobal_archive_vrfy.py @@ -3,7 +3,7 @@ import os from pygfs.task.archive import Archive -from pygfs.task.archive_vars import ArchiveVrfy +from pygfs.utils.archive_vars import ArchiveVrfyVars from wxflow import AttrDict, Logger, cast_strdict_as_dtypedict, logit, chdir # initialize root logger @@ -18,14 +18,14 @@ def main(): # Instantiate the Archive task object archive = Archive(config) - with chdir(config.ROTDIR): + # Collect all archive variables in complete arch_dict for YAML templates + # Use static utility methods from ArchiveVrfyVars (not a Task instance) + arch_dict = ArchiveVrfyVars.get_all_yaml_vars(archive.task_config) - # Collect all archive variables in complete arch_dict for YAML templates - # Use static utility methods from ArchiveVrfy (not a Task instance) - arch_dict = ArchiveVrfy.get_all_yaml_vars(archive.task_config) + # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML + arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) - # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML - arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) + with chdir(config.ROTDIR): # Populate the product archive (ARCDIR) archive.execute_store_products(arcdir_set) diff --git a/dev/scripts/exglobal_enkf_earc_vrfy.py b/dev/scripts/exglobal_enkf_earc_vrfy.py index cb094f3aecd..bda385d039c 100755 --- a/dev/scripts/exglobal_enkf_earc_vrfy.py +++ b/dev/scripts/exglobal_enkf_earc_vrfy.py @@ -3,7 +3,7 @@ import os from pygfs.task.archive import Archive -from pygfs.task.archive_vars import ArchiveVrfy +from pygfs.task.archive_vars import ArchiveVrfyVars from wxflow import AttrDict, Logger, cast_strdict_as_dtypedict, chdir, logit # initialize root logger @@ -18,14 +18,14 @@ def main(): # Instantiate the Archive task object archive = Archive(config) - with chdir(config.ROTDIR): + # Collect all archive variables in complete arch_dict for YAML templates + # Use static utility methods from ArchiveVrfyVars (not a Task instance) + arch_dict = ArchiveVrfyVars.get_all_yaml_vars(archive.task_config) - # Collect all archive variables in complete arch_dict for YAML templates - # Use static utility methods from ArchiveVrfy (not a Task instance) - arch_dict = ArchiveVrfy.get_all_yaml_vars(archive.task_config) + # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML + arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) - # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML - arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) + with chdir(config.ROTDIR): # Populate the product archive (ARCDIR) archive.execute_store_products(arcdir_set) diff --git a/ush/python/pygfs/task/archive_vars.py b/ush/python/pygfs/utils/archive_vars.py similarity index 94% rename from ush/python/pygfs/task/archive_vars.py rename to ush/python/pygfs/utils/archive_vars.py index 150b671fdd7..faced830b5d 100644 --- a/ush/python/pygfs/task/archive_vars.py +++ b/ush/python/pygfs/utils/archive_vars.py @@ -36,11 +36,6 @@ _get_cycle_vars(config_dict): Computes cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) -Design Note ------------ -This is NOT a Task class - it's a utility module with functions that operate on -config_dict dictionaries. This avoids duplicate Task instantiation in archive workflows. - Logging ------- All public operational functions are decorated with @logit(logger). @@ -53,7 +48,7 @@ logger = getLogger(__name__.split('.')[-1]) -class ArchiveVrfy: +class ArchiveVrfyVars: """ Utility class for collecting archive verification variables. @@ -100,10 +95,10 @@ def get_all_yaml_vars(config_dict: AttrDict) -> Dict[str, Any]: arch_dict = {} # Add config variables (config keys, COM* variables from job scripts) - arch_dict.update(ArchiveVrfy.add_config_vars(config_dict)) + arch_dict.update(ArchiveVrfyVars.add_config_vars(config_dict)) # Add cycle-specific variables - arch_dict.update(ArchiveVrfy._get_cycle_vars(config_dict)) + arch_dict.update(ArchiveVrfyVars._get_cycle_vars(config_dict)) logger.info(f"Collected {len(arch_dict)} variables for YAML templates") logger.debug(f"arch_dict keys: {list(arch_dict.keys())}") @@ -176,7 +171,6 @@ def add_config_vars(config_dict: AttrDict) -> Dict[str, Any]: # Import COM* directory and template variables created by job scripts # Job scripts use declare_from_tmpl -rx which exports variables to environment - # Python reads os.environ, so these COM variables are in config_dict for key in config_dict.keys(): if key.startswith(("COM_", "COMIN_", "COMOUT_")): general_dict[key] = config_dict.get(key) From dabb70f3f413e20bde459023f403d59dadeedcdf Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 9 Dec 2025 15:34:56 +0000 Subject: [PATCH 21/29] corrected a typo --- dev/scripts/exglobal_enkf_earc_vrfy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/scripts/exglobal_enkf_earc_vrfy.py b/dev/scripts/exglobal_enkf_earc_vrfy.py index bda385d039c..0c40244f602 100755 --- a/dev/scripts/exglobal_enkf_earc_vrfy.py +++ b/dev/scripts/exglobal_enkf_earc_vrfy.py @@ -3,7 +3,7 @@ import os from pygfs.task.archive import Archive -from pygfs.task.archive_vars import ArchiveVrfyVars +from pygfs.utils.archive_vars import ArchiveVrfyVars from wxflow import AttrDict, Logger, cast_strdict_as_dtypedict, chdir, logit # initialize root logger From a0e6b2c80c351570d7480929f4ccdc90df462dc6 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 9 Dec 2025 16:49:05 +0000 Subject: [PATCH 22/29] update comments --- dev/scripts/exglobal_archive_vrfy.py | 2 +- dev/scripts/exglobal_enkf_earc_vrfy.py | 2 +- ush/python/pygfs/utils/archive_vars.py | 42 ++++---------------------- 3 files changed, 8 insertions(+), 38 deletions(-) diff --git a/dev/scripts/exglobal_archive_vrfy.py b/dev/scripts/exglobal_archive_vrfy.py index b9929c030db..a334e5bf717 100755 --- a/dev/scripts/exglobal_archive_vrfy.py +++ b/dev/scripts/exglobal_archive_vrfy.py @@ -19,7 +19,7 @@ def main(): archive = Archive(config) # Collect all archive variables in complete arch_dict for YAML templates - # Use static utility methods from ArchiveVrfyVars (not a Task instance) + # Use static utility methods from ArchiveVrfyVars arch_dict = ArchiveVrfyVars.get_all_yaml_vars(archive.task_config) # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML diff --git a/dev/scripts/exglobal_enkf_earc_vrfy.py b/dev/scripts/exglobal_enkf_earc_vrfy.py index 0c40244f602..8fc95e0f011 100755 --- a/dev/scripts/exglobal_enkf_earc_vrfy.py +++ b/dev/scripts/exglobal_enkf_earc_vrfy.py @@ -19,7 +19,7 @@ def main(): archive = Archive(config) # Collect all archive variables in complete arch_dict for YAML templates - # Use static utility methods from ArchiveVrfyVars (not a Task instance) + # Use static utility methods from ArchiveVrfyVars arch_dict = ArchiveVrfyVars.get_all_yaml_vars(archive.task_config) # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML diff --git a/ush/python/pygfs/utils/archive_vars.py b/ush/python/pygfs/utils/archive_vars.py index faced830b5d..5d3a3cba563 100644 --- a/ush/python/pygfs/utils/archive_vars.py +++ b/ush/python/pygfs/utils/archive_vars.py @@ -17,7 +17,6 @@ - Compute cycle-specific variables (cycle_HH, cycle_YMDH, cycle_YMD, head) - Calculate COM directory paths with grid loops (0p25, 0p50, 1p00) - Extract configuration keys (RUN, DO_* flags, FHMAX*, etc.) - - Provide complete arch_dict to YAML templates YAML Template Responsibilities (parm/archive/*_arcdir.yaml.j2): - Build file sets with source → destination mappings @@ -68,8 +67,7 @@ def get_all_yaml_vars(config_dict: AttrDict) -> Dict[str, Any]: """Collect all variables needed for YAML templates. This method provides only the VARIABLES needed by the YAML templates - (cycle vars, COM paths, config keys). The YAML templates handle all - file set generation logic (loops, conditionals, path construction). + (cycle vars, COM paths, config keys). Parameters ---------- @@ -83,13 +81,6 @@ def get_all_yaml_vars(config_dict: AttrDict) -> Dict[str, Any]: - cycle_HH, cycle_YMDH, cycle_YMD, head: Cycle-specific variables - COMIN_*, COMOUT_*, COM_*: All COM directory paths (from job scripts) - Config keys: RUN, PSLOT, ROTDIR, DO_* flags, FHMAX*, etc. - - Notes - ----- - File set generation (mkdir lists, copy operations) is handled entirely - by the YAML templates. This method only provides the variables they need. - COM paths are created in the job scripts (JGLOBAL_ARCHIVE_VRFY and - JGLOBAL_ENKF_ARCHIVE_VRFY) and passed through config_dict. """ # Build arch_dict with variables for Jinja2 templates arch_dict = {} @@ -108,41 +99,20 @@ def get_all_yaml_vars(config_dict: AttrDict) -> Dict[str, Any]: @staticmethod @logit(logger) def add_config_vars(config_dict: AttrDict) -> Dict[str, Any]: - """Collect and format general variables for archive operations. + """Collect configuration keys and COM* variables for archive operations. - This method: - 1. Updates resolution variables to be 3-digit formatted strings (if present) - 2. Extracts all required configuration keys for archiving - 3. Collects all COM* directory and template variables - 4. Returns complete dictionary ready for arch_dict + Formats resolution variables (OCNRES, ICERES) to 3 digits and extracts + all configuration keys and COM* directory paths needed for archiving. Parameters ---------- config_dict : AttrDict Configuration dictionary from Archive.task_config - Variables updated (if present in config_dict): - - OCNRES: Ocean resolution (formatted to 3 digits) - - ICERES: Ice resolution (formatted to 3 digits) - - Configuration keys extracted (if present): - - current_cycle, RUN, PSLOT, ROTDIR, PARMgfs, ARCDIR, MODE - - DO_JEDIATMENS, DO_FIT2OBS, DO_JEDIATMVAR, DO_JEDISNOWDA - - DO_AERO_ANL, DO_PREP_OBS_AERO, DO_GSISOILDA, DO_LAND_IAU - - NET, FHOUT_GFS, FHMAX_HF_GFS, FHMAX_FITS, FHMAX, FHOUT, FHMAX_GFS - - FHMIN_GFS (if present in config_dict) - - COM variable prefixes collected: - - COM_, COMIN_, COMOUT_ - Returns ------- Dict[str, Any] - Dictionary containing all general archive variables - - Notes - ----- - Missing keys will be silently skipped (not added to general_dict). + Dictionary with config keys and all COM_*, COMIN_*, COMOUT_* variables """ general_dict = {} @@ -183,7 +153,7 @@ def add_config_vars(config_dict: AttrDict) -> Dict[str, Any]: @staticmethod @logit(logger) def _get_cycle_vars(config_dict: AttrDict) -> Dict[str, Any]: - """Calculate cycle-specific variables using wxflow timetools. + """Calculate cycle-specific variables. Parameters ---------- From 934acd6e75a26213374eec973f495e9e9d7265e4 Mon Sep 17 00:00:00 2001 From: AntonMFernando-NOAA <167725623+AntonMFernando-NOAA@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:32:08 -0500 Subject: [PATCH 23/29] Update dev/scripts/exglobal_archive_vrfy.py Co-authored-by: Rahul Mahajan --- dev/scripts/exglobal_archive_vrfy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/scripts/exglobal_archive_vrfy.py b/dev/scripts/exglobal_archive_vrfy.py index a334e5bf717..634c1c1db34 100755 --- a/dev/scripts/exglobal_archive_vrfy.py +++ b/dev/scripts/exglobal_archive_vrfy.py @@ -23,7 +23,7 @@ def main(): arch_dict = ArchiveVrfyVars.get_all_yaml_vars(archive.task_config) # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML - arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) + arcdir_set = archive.configure_vrfy(arch_dict) with chdir(config.ROTDIR): From 35130e643d099e0fad50caa15e0fc7762ac8aefb Mon Sep 17 00:00:00 2001 From: AntonMFernando-NOAA <167725623+AntonMFernando-NOAA@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:32:22 -0500 Subject: [PATCH 24/29] Update dev/scripts/exglobal_enkf_earc_vrfy.py Co-authored-by: Rahul Mahajan --- dev/scripts/exglobal_enkf_earc_vrfy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/scripts/exglobal_enkf_earc_vrfy.py b/dev/scripts/exglobal_enkf_earc_vrfy.py index 8fc95e0f011..4315cf0ca69 100755 --- a/dev/scripts/exglobal_enkf_earc_vrfy.py +++ b/dev/scripts/exglobal_enkf_earc_vrfy.py @@ -23,7 +23,7 @@ def main(): arch_dict = ArchiveVrfyVars.get_all_yaml_vars(archive.task_config) # Pass arch_dict to configure_vrfy which will render the Jinja2 YAML - arcdir_set = archive.configure_vrfy(AttrDict(arch_dict)) + arcdir_set = archive.configure_vrfy(arch_dict) with chdir(config.ROTDIR): From d9ce4cca47fdca535204e1658291c819fc6f861c Mon Sep 17 00:00:00 2001 From: AntonMFernando-NOAA <167725623+AntonMFernando-NOAA@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:32:29 -0500 Subject: [PATCH 25/29] Update ush/python/pygfs/utils/archive_vars.py Co-authored-by: Rahul Mahajan --- ush/python/pygfs/utils/archive_vars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/python/pygfs/utils/archive_vars.py b/ush/python/pygfs/utils/archive_vars.py index 5d3a3cba563..182d918e996 100644 --- a/ush/python/pygfs/utils/archive_vars.py +++ b/ush/python/pygfs/utils/archive_vars.py @@ -83,7 +83,7 @@ def get_all_yaml_vars(config_dict: AttrDict) -> Dict[str, Any]: - Config keys: RUN, PSLOT, ROTDIR, DO_* flags, FHMAX*, etc. """ # Build arch_dict with variables for Jinja2 templates - arch_dict = {} + arch_dict = AttrDict() # Add config variables (config keys, COM* variables from job scripts) arch_dict.update(ArchiveVrfyVars.add_config_vars(config_dict)) From b383e1dbb675bb6eebad9b30e88d45f5dace6cd2 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 9 Dec 2025 19:49:44 +0000 Subject: [PATCH 26/29] change copy_req to copy_opt --- parm/archive/enkf_arcdir.yaml.j2 | 2 +- parm/archive/gfs_arcdir.yaml.j2 | 48 +++++++++++++++----------------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/parm/archive/enkf_arcdir.yaml.j2 b/parm/archive/enkf_arcdir.yaml.j2 index 8e05e52a103..87505719bd0 100644 --- a/parm/archive/enkf_arcdir.yaml.j2 +++ b/parm/archive/enkf_arcdir.yaml.j2 @@ -12,7 +12,7 @@ mkdir: - "{{ ARCDIR }}" -copy_req: +copy_opt: # Ensemble analysis statistics - REQUIRED {% if DO_JEDIATMENS == True %} - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}stat.atm.tar", diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index 453a34ed31c..6836dcc5fae 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -15,7 +15,19 @@ mkdir: - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}" {% endif %} -copy_req: +copy_opt: +# Cyclone tracking files (optional - only exist when storms are tracked) + - ["{{ COMIN_ATMOS_TRACK }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}", + "{{ ARCDIR }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_TRACK }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}", + "{{ ARCDIR }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}"] + +# Cyclone tracking data by basin +{% for basin in ["epac", "natl"] %} + - ["{{ COMIN_ATMOS_TRACK }}/{{ basin }}", + "{{ ARCDIR }}/{{ basin }}"] +{% endfor %} + {% if MODE == "cycled" %} # Deterministic analysis files (cycled mode only) - REQUIRED - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.analysis.grib2", @@ -54,6 +66,15 @@ copy_req: "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] {% endfor %} + - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}", + "{{ ARCDIR }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}", + "{{ ARCDIR }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.{{ cycle_YMDH }}", + "{{ ARCDIR }}/trak.gfso.atcfunix.{{ cycle_YMDH }}"] + - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}", + "{{ ARCDIR }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}"] + {% if DO_FIT2OBS == True %} # GFS Fit2Obs data - REQUIRED {% for fhr in range(0, FHMAX_FITS + 1, 6) %} @@ -85,28 +106,3 @@ copy_req: - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}analysis.dtf.a006.nc", "{{ ARCDIR }}/dtfanl.{{ RUN }}.{{ cycle_YMDH }}.nc"] {% endif %} - -copy_opt: - # Cyclone tracking files (optional - only exist when storms are tracked) - - ["{{ COMIN_ATMOS_TRACK }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}", - "{{ ARCDIR }}/atcfunix.{{ RUN }}.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_TRACK }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}", - "{{ ARCDIR }}/atcfunixp.{{ RUN }}.{{ cycle_YMDH }}"] - - # Cyclone tracking data by basin (optional directories) -{% for basin in ["epac", "natl"] %} - - ["{{ COMIN_ATMOS_TRACK }}/{{ basin }}", - "{{ ARCDIR }}/{{ basin }}"] -{% endfor %} - -{% if RUN == "gfs" %} - # GFS cyclone genesis data (optional - only exist when storms are tracked) - - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}", - "{{ ARCDIR }}/storms.gfso.atcf_gen.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_GENESIS }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}", - "{{ ARCDIR }}/storms.gfso.atcf_gen.altg.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.{{ cycle_YMDH }}", - "{{ ARCDIR }}/trak.gfso.atcfunix.{{ cycle_YMDH }}"] - - ["{{ COMIN_ATMOS_GENESIS }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}", - "{{ ARCDIR }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}"] -{% endif %} From e8ed3b9355f30c9b32db0d6e4ec4b2416da580e9 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Tue, 9 Dec 2025 20:20:03 +0000 Subject: [PATCH 27/29] typo --- parm/archive/gfs_arcdir.yaml.j2 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index 6836dcc5fae..a991658e1a1 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -29,7 +29,7 @@ copy_opt: {% endfor %} {% if MODE == "cycled" %} - # Deterministic analysis files (cycled mode only) - REQUIRED + # Deterministic analysis files (cycled mode only) - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.analysis.grib2", "{{ ARCDIR }}/pgbanl.{{ RUN }}.{{ cycle_YMDH }}.grib2"] @@ -60,7 +60,7 @@ copy_opt: {% endif %} {% if RUN == "gfs" %} - # GFS forecast files - REQUIRED + # GFS forecast files {% for fhr in range(0, FHMAX_GFS + 1, FHOUT_GFS) %} - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] @@ -76,7 +76,7 @@ copy_opt: "{{ ARCDIR }}/trak.gfso.atcfunix.altg.{{ cycle_YMDH }}"] {% if DO_FIT2OBS == True %} - # GFS Fit2Obs data - REQUIRED + # GFS Fit2Obs data {% for fhr in range(0, FHMAX_FITS + 1, 6) %} {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} @@ -88,13 +88,13 @@ copy_opt: {% endif %} {% elif RUN == "gdas" %} - # GDAS forecast files - REQUIRED + # GDAS forecast files {% for fhr in range(0, FHMAX + 1, FHOUT) %} - ["{{ COMIN_ATMOS_GRIB_1p00 }}/{{ head }}pres_a.1p00.f{{ '%03d'|format(fhr) }}.grib2", "{{ ARCDIR }}/pgbf{{ '%02d'|format(fhr) }}.{{ RUN }}.{{ cycle_YMDH }}.grib2"] {% endfor %} - # GDAS bias correction files - REQUIRED + # GDAS bias correction files - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias.txt", "{{ ARCDIR }}/abias.{{ RUN }}.{{ cycle_YMDH }}"] - ["{{ COMIN_ATMOS_ANALYSIS }}/{{ head }}abias_pc.txt", From f5299f3f125a276baeb6454feacce8995b4bf616 Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Wed, 10 Dec 2025 14:41:20 +0000 Subject: [PATCH 28/29] suggestions --- parm/archive/enkf_arcdir.yaml.j2 | 2 +- parm/archive/gfs_arcdir.yaml.j2 | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/parm/archive/enkf_arcdir.yaml.j2 b/parm/archive/enkf_arcdir.yaml.j2 index 87505719bd0..5219f035610 100644 --- a/parm/archive/enkf_arcdir.yaml.j2 +++ b/parm/archive/enkf_arcdir.yaml.j2 @@ -13,7 +13,7 @@ mkdir: - "{{ ARCDIR }}" copy_opt: - # Ensemble analysis statistics - REQUIRED + # Ensemble analysis statistics {% if DO_JEDIATMENS == True %} - ["{{ COMIN_ATMOS_ANALYSIS_ENSSTAT }}/{{ head }}stat.atm.tar", "{{ ARCDIR }}/atmensstat.{{ RUN }}.{{ cycle_YMDH }}"] diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index a991658e1a1..a533584354c 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -78,11 +78,11 @@ copy_opt: {% if DO_FIT2OBS == True %} # GFS Fit2Obs data {% for fhr in range(0, FHMAX_FITS + 1, 6) %} - {% set sfcfile = "/" + head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} - {% set sigfile = "/" + head + "atm.f" + '%03d'|format(fhr) + ".nc" %} - - ["{{ COMIN_ATMOS_HISTORY }}{{ sfcfile }}", + {% set sfcfile = head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} + {% set sigfile = head + "atm.f" + '%03d'|format(fhr) + ".nc" %} + - ["{{ COMIN_ATMOS_HISTORY }}/{{ sfcfile }}", "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sfcfile }}"] - - ["{{ COMIN_ATMOS_HISTORY }}{{ sigfile }}", + - ["{{ COMIN_ATMOS_HISTORY }}/{{ sigfile }}", "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sigfile }}"] {% endfor %} {% endif %} From 733b35e47aaed33ec8501705217a06b73bbeba3b Mon Sep 17 00:00:00 2001 From: Anton Fernando Date: Wed, 10 Dec 2025 16:30:48 +0000 Subject: [PATCH 29/29] typo --- parm/archive/gfs_arcdir.yaml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/archive/gfs_arcdir.yaml.j2 b/parm/archive/gfs_arcdir.yaml.j2 index a533584354c..9fffe5a1b7e 100644 --- a/parm/archive/gfs_arcdir.yaml.j2 +++ b/parm/archive/gfs_arcdir.yaml.j2 @@ -81,9 +81,9 @@ copy_opt: {% set sfcfile = head + "sfc.f" + '%03d'|format(fhr) + ".nc" %} {% set sigfile = head + "atm.f" + '%03d'|format(fhr) + ".nc" %} - ["{{ COMIN_ATMOS_HISTORY }}/{{ sfcfile }}", - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sfcfile }}"] + "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}/{{ sfcfile }}"] - ["{{ COMIN_ATMOS_HISTORY }}/{{ sigfile }}", - "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}{{ sigfile }}"] + "{{ VFYARC }}/{{ RUN }}.{{ cycle_YMD }}/{{ cycle_HH }}/{{ sigfile }}"] {% endfor %} {% endif %}