diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 69c6f7499cc..57b8abe50a5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -208,7 +208,8 @@ ush/python/pygfs/jedi/jedi.py @DavidNew-NOAA ush/python/pygfs/task/__init__.py @aerorahul ush/python/pygfs/task/aero_analysis.py @DavidNew-NOAA @CoryMartin-NOAA ush/python/pygfs/task/aero_bmatrix.py @DavidNew-NOAA @CoryMartin-NOAA -ush/python/pygfs/task/aero_emissions.py @bbakernoaa +ush/python/pygfs/task/chem_fire_emission.py @bbakernoaa +ush/python/pygfs/task/nexus_emission.py @bbakernoaa ush/python/pygfs/task/analysis.py @DavidNew-NOAA @RussTreadon-NOAA ush/python/pygfs/task/analysis_stats.py @CoryMartin-NOAA @DavidNew-NOAA ush/python/pygfs/task/archive.py @DavidHuber-NOAA diff --git a/.gitmodules b/.gitmodules index e608c11ae23..fc87e5f3655 100644 --- a/.gitmodules +++ b/.gitmodules @@ -27,3 +27,7 @@ [submodule "sorc/gsi_monitor.fd"] path = sorc/gsi_monitor.fd url = https://github.com/NOAA-EMC/GSI-Monitor.git +[submodule "sorc/nexus.fd"] + path = sorc/nexus.fd + url = https://github.com/NOAA-OAR-ARL/NEXUS.git + branch = feature/gcafs diff --git a/dev/job_cards/rocoto/prep_emissions.sh b/dev/job_cards/rocoto/prep_emissions.sh index c764660bb74..6410a1e0bd0 100755 --- a/dev/job_cards/rocoto/prep_emissions.sh +++ b/dev/job_cards/rocoto/prep_emissions.sh @@ -11,6 +11,17 @@ status=$? export job="prep_emissions" export jobid="${job}.$$" +############################################################### +# Source relevant configs +configs="base aero prep_emissions" +for config in ${configs}; do + source "${EXPDIR}/config.${config}" + status=$? + if [[ ${status} -ne 0 ]]; then + exit "${status}" + fi +done + ############################################################### # Execute the JJOB "${HOMEgfs}/dev/jobs/JGLOBAL_PREP_EMISSIONS" diff --git a/dev/jobs/JGLOBAL_FORECAST b/dev/jobs/JGLOBAL_FORECAST index 565a9f70c82..816803d6ee2 100755 --- a/dev/jobs/JGLOBAL_FORECAST +++ b/dev/jobs/JGLOBAL_FORECAST @@ -91,6 +91,8 @@ if [[ "${DO_AERO_FCST}" == "YES" ]]; then COMOUT_CHEM_HISTORY:COM_CHEM_HISTORY_TMPL YMD="${PDY}" HH="${cyc}" RUN="${rCDUMP}" declare_from_tmpl -rx \ COMIN_TRACER_RESTART:COM_ATMOS_RESTART_TMPL + YMD="${PDY}" HH="${cyc}" declare_from_tmpl -rx \ + COMIN_CHEM_INPUT:COM_CHEM_INPUT_TMPL fi ############################################################### diff --git a/dev/jobs/JGLOBAL_PREP_EMISSIONS b/dev/jobs/JGLOBAL_PREP_EMISSIONS index 72c9d40604b..0d843a68ce7 100755 --- a/dev/jobs/JGLOBAL_PREP_EMISSIONS +++ b/dev/jobs/JGLOBAL_PREP_EMISSIONS @@ -10,8 +10,10 @@ source "${HOMEgfs}/ush/jjob_header.sh" -e "prep_emissions" -c "base prep_emissio ############################################## # Begin JOB SPECIFIC work ############################################## -# Generate COM variables from templates -# TODO: Add necessary COMIN, COMOUT variables for this job +YMD=${PDY} HH=${cyc} declare_from_tmpl -rx COMOUT_CHEM_INPUT:COM_CHEM_INPUT_TMPL +YMD=${PDY} HH=${cyc} declare_from_tmpl -rx COMOUT_CHEM_RESTART:COM_CHEM_RESTART_TMPL + +mkdir -p "${COMOUT_CHEM_INPUT}" ############################################################### # Run relevant script diff --git a/dev/parm/config/gcafs/config.aero.j2 b/dev/parm/config/gcafs/config.aero.j2 deleted file mode 100644 index 719e100525f..00000000000 --- a/dev/parm/config/gcafs/config.aero.j2 +++ /dev/null @@ -1,23 +0,0 @@ -#! /usr/bin/env bash - -# UFS-Aerosols settings - -# Path to the input data tree -export AERO_INPUTS_DIR="{{ AERO_INPUTS_DIR }}" - -export AERO_DIAG_TABLE="${PARMgfs}/ufs/fv3/diag_table.aero" -export AERO_FIELD_TABLE="${PARMgfs}/ufs/fv3/field_table.aero" -# Biomass burning emission dataset. Choose from: gbbepx, qfed, none -export AERO_EMIS_FIRE="qfed" -# Directory containing GOCART configuration files -export AERO_CONFIG_DIR="${PARMgfs}/ufs/gocart" - -# Aerosol convective scavenging factors (list of string array elements) -# Element syntax: ':'. Use = * to set default factor for all aerosol tracers -# Scavenging factors are set to 0 (no scavenging) if unset -export fscav_aero="'*:0.3','so2:0.0','msa:0.0','dms:0.0','nh3:0.4','nh4:0.6','bc1:0.6','bc2:0.6','oc1:0.4','oc2:0.4','dust1:0.6','dust2:0.6', 'dust3:0.6','dust4:0.6','dust5:0.6','seas1:0.5','seas2:0.5','seas3:0.5','seas4:0.5','seas5:0.5'" -# -# Number of diagnostic aerosol tracers (default: 0) -export dnats_aero=2 - -echo "END: config.aero" diff --git a/dev/parm/config/gcafs/config.aero.j2 b/dev/parm/config/gcafs/config.aero.j2 new file mode 120000 index 00000000000..cf791379998 --- /dev/null +++ b/dev/parm/config/gcafs/config.aero.j2 @@ -0,0 +1 @@ +../gfs/config.aero.j2 \ No newline at end of file diff --git a/dev/parm/config/gcafs/config.prep_emissions b/dev/parm/config/gcafs/config.prep_emissions index fa411c27ad4..47bf5e26922 100644 --- a/dev/parm/config/gcafs/config.prep_emissions +++ b/dev/parm/config/gcafs/config.prep_emissions @@ -8,4 +8,6 @@ echo "BEGIN: config.prep_emissions" # Get task specific resources source "${EXPDIR}/config.resources" prep_emissions +source "${EXPDIR}/config.aero" + echo "END: config.prep_emissions" diff --git a/dev/parm/config/gcafs/config.resources b/dev/parm/config/gcafs/config.resources index 5308ac6f8fd..f5b412cffd0 100644 --- a/dev/parm/config/gcafs/config.resources +++ b/dev/parm/config/gcafs/config.resources @@ -793,10 +793,11 @@ case ${step} in ;; "prep_emissions") - export walltime="00:10:00" + export walltime="00:35:00" export ntasks=1 - export threads_per_task=1 - export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + export threads_per_task=${max_tasks_per_node} + export tasks_per_node=1 + export is_exclusive=True ;; "fcst" | "efcs") diff --git a/dev/parm/config/gcafs/config.resources.URSA b/dev/parm/config/gcafs/config.resources.URSA index ae555e97489..34bf80003a6 100644 --- a/dev/parm/config/gcafs/config.resources.URSA +++ b/dev/parm/config/gcafs/config.resources.URSA @@ -43,6 +43,10 @@ case ${step} in export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) fi ;; + + "offlineanl") + export memory="240GB" + ;; "eupd") case "${CASE}" in diff --git a/dev/parm/config/gefs/config.resources b/dev/parm/config/gefs/config.resources index 15930f9552d..600685925b4 100644 --- a/dev/parm/config/gefs/config.resources +++ b/dev/parm/config/gefs/config.resources @@ -80,11 +80,11 @@ case ${step} in ;; "prep_emissions") - export walltime="00:10:00" + export walltime="00:35:00" export ntasks=1 - export threads_per_task=1 - export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) - export memory="1GB" + export threads_per_task=${max_tasks_per_node} + export tasks_per_node=1 + export is_exclusive=True ;; "fcst" | "efcs") diff --git a/dev/parm/config/gfs/config.aero.j2 b/dev/parm/config/gfs/config.aero.j2 index 719e100525f..2722212c01e 100644 --- a/dev/parm/config/gfs/config.aero.j2 +++ b/dev/parm/config/gfs/config.aero.j2 @@ -1,23 +1,232 @@ #! /usr/bin/env bash +#================================================================================ # UFS-Aerosols settings +# This configuration file sets up environment variables for aerosol modeling in the UFS (Unified Forecast System) Aerosols component. +# It configures aerosol inputs, diagnostics, emissions, and the NEXUS emissions preprocessor for GCAFS (Global Coupled Aerosol Forecast System). +# Used in GFS (Global Forecast System) workflows for initializing and running aerosol simulations in FV3 (Finite-Volume Cubed-Sphere) dynamical core. +#================================================================================ +echo "BEGIN: config.aero" -# Path to the input data tree -export AERO_INPUTS_DIR="{{ AERO_INPUTS_DIR }}" +#================================================================================ +# 1. Aerosol settings +#================================================================================ +# General settings for aerosol tracers, diagnostics, and scavenging in the GOCART (Goddard Chemistry Aerosol Radiation and Transport) model. +# These are used in the atmospheric model to handle aerosol transport, chemistry, and interaction with radiation/cloud processes. +# Base directory for aerosol input data files (e.g., initial conditions, climatologies). +# This path is mounted or staged in the workflow and referenced by the model for reading aerosol fields. +#--------------------------------------------------------------------------------------------------- +export AERO_INPUTS_DIR="${HOMEgfs}/fix/chem/Emission_data" + +#------------------------------------------------- +# Diag Table and Field Table for GOCART aerosols +#------------------------------------------------- + +# Configuration files defining diagnostic outputs and field registrations for aerosol variables in GOCART. +# diag_table.aero: Specifies which aerosol fields to output and at what frequency (used by FMS diagnostics). +# field_table.aero: Registers prognostic/diagnostic tracers with the FV3 dynamical core (e.g., for advection, diffusion). +#--------------------------------------------------------------------------------------------------- export AERO_DIAG_TABLE="${PARMgfs}/ufs/fv3/diag_table.aero" export AERO_FIELD_TABLE="${PARMgfs}/ufs/fv3/field_table.aero" -# Biomass burning emission dataset. Choose from: gbbepx, qfed, none -export AERO_EMIS_FIRE="qfed" -# Directory containing GOCART configuration files + +#================================================================================ +# Aerosol configuration +#================================================================================ + +# Directory containing GOCART-specific namelists, parameters, and runtime configs (e.g., namelist.aero). +# Loaded during model initialization to set aerosol scheme parameters like time steps, vertical levels. +#--------------------------------------------------------------------------------------------------- export AERO_CONFIG_DIR="${PARMgfs}/ufs/gocart" # Aerosol convective scavenging factors (list of string array elements) # Element syntax: ':'. Use = * to set default factor for all aerosol tracers +# Scavenging factors represent the fraction of aerosol removed by convective precipitation (wet deposition). +# Used in the convection scheme (e.g., SAS or NF_CONV) to compute in-cloud scavenging rates for each tracer. +# * = default for unspecified tracers; specific gases like SO2 have lower factors due to solubility. # Scavenging factors are set to 0 (no scavenging) if unset +#--------------------------------------------------------------------------------------------------- export fscav_aero="'*:0.3','so2:0.0','msa:0.0','dms:0.0','nh3:0.4','nh4:0.6','bc1:0.6','bc2:0.6','oc1:0.4','oc2:0.4','dust1:0.6','dust2:0.6', 'dust3:0.6','dust4:0.6','dust5:0.6','seas1:0.5','seas2:0.5','seas3:0.5','seas4:0.5','seas5:0.5'" -# + # Number of diagnostic aerosol tracers (default: 0) +# Specifies how many additional diagnostic (non-prognostic) aerosol tracers to include in the model output. +# Used in GOCART to control verbosity of diagnostics; higher values add more fields for analysis/post-processing. +#--------------------------------------------------------------------------------------------------- export dnats_aero=2 +#================================================================================ +# 2. Aerosol emissions settings +#================================================================================ +# Configuration for surface emissions of aerosols and precursors (e.g., from fires, anthropogenic sources). +# These drive the source terms in the GOCART continuity equation for each tracer. +# Biomass burning emission dataset. Choose from: gbbepx, qfed, none +# Dataset for wildfire and biomass burning emissions (e.g., black/organic carbon, CO). +# qfed: Quick Fire Emission Dataset (near-real-time, version specified below). +# gbbepx: Global Biomass Burning Emissions Product (alternative). +# none: Disable fire emissions. +# Used in prep_emissions scripts to fetch/interpolate data to model grid. +#--------------------------------------------------------------------------------------------------- +export AERO_EMIS_FIRE="gbbepx" +# Version of the selected fire emissions dataset (e.g., for QFEDv2.5, version 061). +# Determines which historical or NRT files to load from input directories. +export AERO_EMIS_FIRE_VERSION="004" + +# Flag to enable historical (climatological) fire emissions instead of NRT for testing/spin-up. +# When true, uses fixed-year data; false uses real-time from FIRE_EMIS_NRT_DIR. +# Path to near-real-time (NRT) fire emissions data, updated daily (e.g., from satellites like MODIS). +# On WCOSS2, points to DCOM (Data Communication) root for operational runs; empty for testing. +# Processed by scripts like exglobal_prep_emissions.py to generate input files for GOCART. +#--------------------------------------------------------------------------------------------------- +export AERO_EMIS_FIRE_HIST=1 # Use historical fire emissions | 1 = true 0 = false + +#--------------------------------------------------------------------------------------------------- +export FIRE_EMIS_NRT_DIR="" #TODO: set to DCOM for WCOSS2 "${DCOMROOT}/YYYYMMDD/firewx" # Directory containing NRT fire emissions +export FIRE_EMIS_DIR="${HOMEgfs}/fix/chem/Emission_data/fires_data/GBBEPx/v4" # Directory containing historical fire emissions + + +#=============================================================================== +# 3. NEXUS settings +#=============================================================================== +# NEXUS (Next-generation Emissions eXchange Utility System) is a preprocessor for anthropogenic/biogenic emissions. +# Generates time-varying, gridded emission inputs for GOCART from inventories like CEDS, HTAP, CAMS. +# Runs offline before the forecast, outputting netCDF files read by the model via AERO_INPUTS_DIR. +# NEXUS aerosol emissions dataset. Choose from: gocart, none +# Specifies the emission species set for NEXUS processing (gocart for GOCART-compatible tracers like SO2, BC, OC, dust). +# none: Skip NEXUS entirely, use other emission sources or zero emissions. + +# NEXUS configuration set +#------------------------- +export NEXUS_CONFIG="{{ NEXUS_CONFIG | default('gocart') }}" # Options: gocart, none + +# Runtime choice of NEXUS config variant; defaults to gocart for standard aerosol tracers. +# Overrides via Jinja2 templating in workflow (e.g., for different chemistry schemes). +#--------------------------------------------------------------------------------------------------- +export NEXUS_CONFIG_DIR="${PARMgfs}/chem/nexus/${NEXUS_CONFIG}" # Directory containing NEXUS configuration files + +# NEXUS Inputs +#--------------- +# TODO: when this is merged this will point to AERO_INPUTS_DIR for operations +# export NEXUS_INPUT_DIR="${AERO_INPUTS_DIR}/nexus" +# Directory for static/dynamic input data used by NEXUS (e.g., emission inventories, masks, meteo fields). +# Currently hardcoded for development; will use shared AERO_INPUTS_DIR in production for consistency. +# Specific path for GCAFS external data on this filesystem. +# Contains emission datasets (e.g., CEDS2019/2024, HTAPv2, CAMS) processed by NEXUS. +#--------------------------------------------------------------------------------------------------- +export NEXUS_INPUT_DIR="${HOMEgfs}/fix/chem/Emission_data/nexus" + + + +#-------------------------- +# NEXUS Time Step (seconds) +#-------------------------- +# Temporal resolution for emission interpolation in NEXUS (e.g., hourly outputs). +# Must align with model coupling time; used in HEMCO time management for diurnal/seasonal scaling. +# 3600s = 1 hour; adjustable for finer/coarser emission updates (e.g., 1800s for sub-hourly). +#--------------------------------------------------------------------------------------------------- +export NEXUS_TSTEP="{{ NEXUS_TSTEP | default(3600) }}" # Default NEXUS time step in seconds + +#------------------ +# NEXUS Grid +#------------------ +# Defines the emission grid for NEXUS processing (0.5x0.5 degree global lat-lon). +# Emissions are interpolated from this grid to the FV3 cubed-sphere grid during prep. +# Number of longitude points (1440 for 0.25-degree resolution; here 1440 ~0.25deg). +#----------------------------------------------------- +export NEXUS_NX="{{ NEXUS_NX | default(1440) }}" + +# Number of latitude points (720 for 0.25-degree). +#-------------------------------------------------- +export NEXUS_NY="{{ NEXUS_NY | default(720) }}" + +# Western boundary longitude (global coverage). +#------------------------------------------------- +export NEXUS_XMIN="{{ NEXUS_XMIN | default(-180.0) }}" + +# Eastern boundary longitude. +#-------------------------------------------------- +export NEXUS_XMAX="{{ NEXUS_XMAX | default(180.0) }}" + +# Southern boundary latitude. +#-------------------------------------------------- +export NEXUS_YMIN="{{ NEXUS_YMIN | default(-90.0) }}" + +# Northern boundary latitude. +#--------------------------------------------------- +export NEXUS_YMAX="{{ NEXUS_YMAX | default(90.0) }}" + +# Number of vertical levels (1 for surface emissions; higher for vertical profiles if needed). +#-------------------------------------------------- +export NEXUS_NZ="{{ NEXUS_NZ | default(1) }}" + +#------------------- +# NEXUS Config Files +#------------------- +# HEMCO (Harmonized Emissions Component) runtime configuration files used by NEXUS. +# These define species mappings, time scales, grid alignments, and diagnostic flags. + +# Grid definition file: Specifies emission grid (lat-lon bounds, resolution) and interpolation options to model grid. +export NEXUS_GRID_NAME="{{ NEXUS_GRID_NAME | default('HEMCO_sa_Grid.rc') }}" +# Time management file: Defines temporal patterns (diurnal, weekly, monthly) for scaling emissions. +export NEXUS_TIME_NAME="{{ NEXUS_TIME_NAME | default('HEMCO_sa_Time.rc') }}" +# Diagnostics file: Controls which emission fields to output for verification (e.g., total SO2, BC emissions). +export NEXUS_DIAG_NAME="{{ NEXUS_DIAG_NAME | default('HEMCO_sa_Diag.rc') }}" +# Species mapping file: Links emission inventories to GOCART tracers (e.g., CEDS SO2 to model SO2). +export NEXUS_SPEC_NAME="{{ NEXUS_SPEC_NAME | default('HEMCO_sa_Spec.rc') }}" +# Master config file: Orchestrates all HEMCO components, emission sources, and runtime flags for NEXUS. +export NEXUS_CONFIG_NAME="{{ NEXUS_CONFIG_NAME | default('NEXUS_Config.rc') }}" + +#------------------ +# NEXUS Diagnostics +#------------------ +# Settings for outputting NEXUS-processed emissions for model input and verification. +# Outputs are netCDF files with gridded, time-varying sources read by GOCART at each time step. + +# Base filename prefix for diagnostic output files (e.g., NEXUS_DIAG_YYYYMMDD.nc). +export NEXUS_DIAG_PREFIX="{{ NEXUS_DIAG_PREFIX | default('NEXUS_DIAG') }}" +# Frequency of diagnostic emission outputs; Hourly for detailed analysis, coarser for storage efficiency. +export NEXUS_DIAG_FREQ="{{ NEXUS_DIAG_FREQ | default('Hourly') }}" # Options: Hourly, Daily, Monthly + +#------------------ +# NEXUS Logging +#------------------ +# Controls NEXUS execution logs for debugging and monitoring in the workflow. + +# Output log file for NEXUS run; captures errors, warnings, and processing summaries. +# Reviewed in post-processing or if emissions fail to generate. +export NEXUS_LOGFILE="{{ NEXUS_LOGFILE | default('NEXUS.log') }}" + +#------------------ +# NEXUS Emissions +#------------------ +# Flags to enable/disable specific emission inventories processed by NEXUS. +# Multiple can be true for blended emissions; used in NEXUS_Config.rc to select sources. +# Emissions are scaled by region, sector (e.g., industry, transport), and time. + +# Flag for MEGAN (Model of Emissions of Gases and Aerosols from Nature) biogenic VOC/PM emissions. +# Currently disabled; future integration for isoprene, terpenes affecting secondary organic aerosols. +export NEXUS_DO_MEGAN=.false # TODO: Add MEGAN biogenic emissions in the furture + +# Enable Community Emissions Data System 2019 inventory for anthropogenic aerosols/gases (e.g., SO2, NOx, PM2.5). +# Global, gridded data for 1750-2019; used for historical and recent baseline emissions. +export NEXUS_DO_CEDS2019=.false. # Use CEDS2019 emissions + +# Enable newer CEDS 2024 update (if available); mutually exclusive with 2019 for consistency. +export NEXUS_DO_CEDS2024=.true. # Use CEDS2024 emissions + +# Hemispheric Transport of Air Pollution version 2: Regional anthropogenic emissions for Europe/Asia/N. America. +# Focuses on transboundary pollution; supplements CEDS for finer regional detail. +export NEXUS_DO_HTAPv2=.true. # Use HTAPv2 emissions + +# HTAP version 3 flag; disabled pending updates to datasets and NEXUS compatibility. +export NEXUS_DO_HTAPv3=.false. # TODO: Currently only uses HTAPv2 for this. + +# Copernicus Atmosphere Monitoring Service global reanalysis emissions. +# Alternative to CEDS/HTAP for consistent meteo-coupled emissions; disabled here. +export NEXUS_DO_CAMS=.false. # Use CAMS global emissions + +# CAMS temporal disaggregation (e.g., hourly profiles for CAMS data). +# Enables time-varying scaling when CAMS is active. +export NEXUS_DO_CAMSTEMPO=.true. # Use CAMS temporal emissions + +#================================================================================ echo "END: config.aero" diff --git a/dev/parm/config/gfs/config.com b/dev/parm/config/gfs/config.com index 05615f256b5..7af66d14bc3 100644 --- a/dev/parm/config/gfs/config.com +++ b/dev/parm/config/gfs/config.com @@ -111,5 +111,7 @@ declare -rx COM_CHEM_HISTORY_TMPL=${COM_BASE}'/model/chem/history' declare -rx COM_CHEM_ANALYSIS_TMPL=${COM_BASE}'/analysis/chem' declare -rx COM_CHEM_BMAT_TMPL=${COM_CHEM_ANALYSIS_TMPL}'/bmatrix' declare -rx COM_CHEM_ANLMON_TMPL=${COM_BASE}'/products/chem/anlmon' +declare -rx COM_CHEM_INPUT_TMPL=${COM_BASE}'/model/chem/input' +declare -rx COM_CHEM_RESTART_TMPL=${COM_BASE}'/model/chem/restart' declare -rx COM_MED_RESTART_TMPL=${COM_BASE}'/model/med/restart' diff --git a/dev/parm/config/sfs/config.aero.j2 b/dev/parm/config/sfs/config.aero.j2 index 03571c376e3..cf791379998 120000 --- a/dev/parm/config/sfs/config.aero.j2 +++ b/dev/parm/config/sfs/config.aero.j2 @@ -1 +1 @@ -../gefs/config.aero.j2 \ No newline at end of file +../gfs/config.aero.j2 \ No newline at end of file diff --git a/dev/parm/config/sfs/config.resources b/dev/parm/config/sfs/config.resources index 803e6282711..1581945c8fe 100644 --- a/dev/parm/config/sfs/config.resources +++ b/dev/parm/config/sfs/config.resources @@ -62,11 +62,11 @@ case ${step} in ;; "prep_emissions") - export walltime="00:10:00" + export walltime="00:40:00" export ntasks=1 - export threads_per_task=1 - export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) - export memory="1GB" + export threads_per_task=${max_tasks_per_node} + export tasks_per_node=1 + export is_exclusive=True ;; "fcst" | "efcs") diff --git a/dev/scripts/exglobal_prep_emissions.py b/dev/scripts/exglobal_prep_emissions.py index 2cf718d7f6a..abd773e0bfa 100755 --- a/dev/scripts/exglobal_prep_emissions.py +++ b/dev/scripts/exglobal_prep_emissions.py @@ -5,7 +5,7 @@ import os from wxflow import Logger, cast_strdict_as_dtypedict -from pygfs import AerosolEmissions +from pygfs import ChemFireEmissions, NEXUSEmissions # Initialize root logger @@ -17,10 +17,15 @@ # Take configuration from environment and cast it as python dictionary config = cast_strdict_as_dtypedict(os.environ) - + nxsemis = NEXUSEmissions(config.copy()) # Instantiate the emissions pre-processing task - # emissions = AerosolEmissions(config) - # emissions.initialize() - # emissions.configure() - # emissions.execute(emissions.task_config.DATA, emissions.task_config.APRUN) - # emissions.finalize() + fireemis = ChemFireEmissions(config.copy()) + fireemis.initialize() + fireemis.configure() + fireemis.execute() + fireemis.finalize() + + nxsemis.initialize() + nxsemis.configure() + nxsemis.execute() + nxsemis.finalize() diff --git a/dev/ush/compare_f90nml.py b/dev/ush/compare_f90nml.py index f3c5573a927..2aa0e745db2 100755 --- a/dev/ush/compare_f90nml.py +++ b/dev/ush/compare_f90nml.py @@ -77,8 +77,7 @@ def _print_diffs(diff_dict: Dict) -> None: max_len = len(max(diff_dict[path], key=len)) for kk in diff_dict[path].keys(): items = diff_dict[path][kk] - print( - f"{kk:>{max_len+2}} : {' | '.join(map(str, diff_dict[path][kk]))}") + print(f"{kk:>{max_len + 2}} : {' | '.join(map(str, diff_dict[path][kk]))}") _print_diffs(result) diff --git a/dev/workflow/build_opts.yaml b/dev/workflow/build_opts.yaml index ae02715b291..a21d3d751c0 100644 --- a/dev/workflow/build_opts.yaml +++ b/dev/workflow/build_opts.yaml @@ -32,6 +32,8 @@ systems: - "gefs_ww3_prepost" gcafs: - "gcafs_model" + - "nexus" + - "gsi_utils" build: gfs_model: command: "./build_ufs.sh -e gfs_model.x" @@ -102,3 +104,8 @@ build: command: "./build_gdas.sh" cores: 40 walltime: "01:45:00" + + nexus: + command: "./build_nexus.sh" + cores: 8 + walltime: "00:20:00" diff --git a/dev/workflow/rocoto/gcafs_tasks.py b/dev/workflow/rocoto/gcafs_tasks.py index c6687399697..4aaf7f21d43 100644 --- a/dev/workflow/rocoto/gcafs_tasks.py +++ b/dev/workflow/rocoto/gcafs_tasks.py @@ -165,13 +165,14 @@ def prep_emissions(self): str XML representation of the task """ + cycledef = f'{self.run}_half,{self.run}' if self.run in ['gcdas', 'enkfgcdas'] else self.run resources = self.get_resource('prep_emissions') task_name = f'{self.run}_prep_emissions' task_dict = {'task_name': task_name, 'resources': resources, 'envars': self.envars, - 'cycledef': self.run, + 'cycledef': cycledef, 'command': f'{self.HOMEgfs}/dev/job_cards/rocoto/prep_emissions.sh', 'job_name': f'{self.pslot}_{task_name}_@H', 'log': f'{self.rotdir}/logs/@Y@m@d@H/{task_name}.log', @@ -218,7 +219,16 @@ def offlineanl(self): return task def sfcanl(self): + """ + Create a task for surface analysis (sfcanl). + This task performs the surface analysis step in the workflow, depending on whether JEDI atmospheric variational analysis is enabled. + + Returns + ------- + str + XML representation of the task + """ deps = [] if self.options['do_jediatmvar']: dep_dict = {'type': 'task', 'name': f'gcdas_atmanlfinal'} @@ -245,7 +255,16 @@ def sfcanl(self): return task def atmanlinit(self): + """ + Create a task for atmospheric analysis initialization. + + This task initializes the atmospheric analysis, including hybrid variational analysis if enabled. + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = {'type': 'task', 'name': f'{self.run}_prep'} deps.append(rocoto.add_dependency(dep_dict)) @@ -281,7 +300,16 @@ def atmanlinit(self): return task def atmanlvar(self): + """ + Create a task for atmospheric analysis variational step. + + This task performs the variational analysis step for the atmospheric component. + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = {'type': 'task', 'name': f'{self.run}_atmanlinit'} deps.append(rocoto.add_dependency(dep_dict)) @@ -305,7 +333,16 @@ def atmanlvar(self): return task def atmanlfv3inc(self): + """ + Create a task for applying FV3 increments to the atmospheric analysis. + + This task applies the FV3 increment files to the atmospheric analysis fields. + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = {'type': 'task', 'name': f'{self.run}_atmanlvar'} deps.append(rocoto.add_dependency(dep_dict)) @@ -329,7 +366,16 @@ def atmanlfv3inc(self): return task def atmanlfinal(self): + """ + Create a task for finalizing the atmospheric analysis. + This task finalizes the atmospheric analysis by applying all necessary increments and adjustments. + + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = {'type': 'task', 'name': f'{self.run}_atmanlfv3inc'} deps.append(rocoto.add_dependency(dep_dict)) @@ -353,7 +399,16 @@ def atmanlfinal(self): return task def aeroanlgenb(self): + """ + Create a task for generating aerosol background error files. + This task generates the background fields required for aerosol analysis. + + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = {'type': 'metatask', 'name': f'{self.run}_fcst'} deps.append(rocoto.add_dependency(dep_dict)) @@ -377,7 +432,16 @@ def aeroanlgenb(self): return task def aeroanlinit(self): + """ + Create a task for initializing aerosol analysis. + + This task initializes the aerosol analysis by preparing the necessary background and observation data. + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = {'type': 'task', 'name': 'gcdas_aeroanlgenb', 'offset': f"-{timedelta_to_HMS(self._base['interval_gdas'])}"} deps.append(rocoto.add_dependency(dep_dict)) @@ -403,7 +467,16 @@ def aeroanlinit(self): return task def aeroanlvar(self): + """ + Create a task for the aerosol analysis variational step. + This task performs the variational analysis for the aerosol component. + + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = { 'type': 'task', 'name': f'{self.run}_aeroanlinit', @@ -429,7 +502,16 @@ def aeroanlvar(self): return task def aeroanlfinal(self): + """ + Create a task for finalizing the aerosol analysis. + This task finalizes the aerosol analysis by applying all necessary increments and adjustments. + + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = {'type': 'task', 'name': f'{self.run}_aeroanlvar'} deps.append(rocoto.add_dependency(dep_dict)) @@ -601,24 +683,35 @@ def _fcst_forecast_only(self): def _fcst_cycled(self): anldep = 'gcdas' - dep_dict = {'type': 'task', 'name': f'{anldep}_sfcanl'} - dep = rocoto.add_dependency(dep_dict) - dependencies = rocoto.create_dependency(dep=dep) - if self.options['do_aero_fcst']: - dep_dict = {'type': 'task', 'name': f'{self.run}_prep_emissions'} - dependencies.append(rocoto.add_dependency(dep_dict)) + # Create the nested dependency structure + or_dependencies = [] + # Always group sfcanl and aeroanlfinal together with AND + sfcanl_aero_deps = [] + dep_dict = {'type': 'task', 'name': f'{anldep}_sfcanl'} + sfcanl_aero_deps.append(rocoto.add_dependency(dep_dict)) if self.options['use_aero_anl']: dep_dict = {'type': 'task', 'name': f'{anldep}_aeroanlfinal'} - dependencies.append(rocoto.add_dependency(dep_dict)) + sfcanl_aero_deps.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='and', dep=dependencies) + sfcanl_aero_and = rocoto.create_dependency(dep_condition='and', dep=sfcanl_aero_deps) + or_dependencies.append(sfcanl_aero_and) if self.run in ['gcdas']: dep_dict = {'type': 'task', 'name': f'{self.run}_stage_ic'} - dependencies.append(rocoto.add_dependency(dep_dict)) - dependencies = rocoto.create_dependency(dep_condition='or', dep=dependencies) + or_dependencies.append(rocoto.add_dependency(dep_dict)) + + # Create OR dependency between the analysis group and stage_ic + dependencies = rocoto.create_dependency(dep_condition='or', dep=or_dependencies) + + if self.options['do_aero_fcst']: + # Wrap the OR dependency in a list for the AND condition + and_deps = [dependencies] + dep_dict = {'type': 'task', 'name': f'{self.run}_prep_emissions'} + and_deps.append(rocoto.add_dependency(dep_dict)) + + dependencies = rocoto.create_dependency(dep_condition='and', dep=and_deps) cycledef = 'gcdas_half,gcdas' if self.run in ['gcdas'] else self.run @@ -739,6 +832,16 @@ def efcs(self): # return task def atmanlupp(self): + """ + Create a task for UPP post-processing of the atmospheric analysis. + + This task runs the Unified Post Processor (UPP) on the atmospheric analysis output. + + Returns + ------- + str + XML representation of the task + """ postenvars = self.envars.copy() postenvar_dict = {'FHR3': '000', 'UPP_RUN': 'analysis'} @@ -775,6 +878,16 @@ def atmanlupp(self): return task def atmanlprod(self): + """ + Create a task for generating atmospheric analysis products. + + This task generates products from the atmospheric analysis output using UPP. + + Returns + ------- + str + XML representation of the task + """ postenvars = self.envars.copy() postenvar_dict = {'FHR_LIST': '-1'} for key, value in postenvar_dict.items(): @@ -804,12 +917,50 @@ def atmanlprod(self): return task def atmupp(self): + """ + Create a task for UPP post-processing of the atmospheric forecast. + + This task runs the Unified Post Processor (UPP) on the atmospheric forecast output. + + Returns + ------- + str + XML representation of the task + """ return self._upptask(upp_run='forecast', task_id='atmupp') def goesupp(self): + """ + Create a task for UPP post-processing of GOES satellite data. + + This task runs the Unified Post Processor (UPP) for GOES satellite output. + + Returns + ------- + str + XML representation of the task + """ return self._upptask(upp_run='goes', task_id='goesupp') def _upptask(self, upp_run="forecast", task_id="atmupp"): + """ + Helper method to create a UPP post-processing task. + + This method creates a Rocoto task for running the Unified Post Processor (UPP) + on either forecast or GOES satellite output, depending on the arguments. + + Parameters + ---------- + upp_run : str, optional + Type of UPP run ('forecast' or 'goes'). Default is 'forecast'. + task_id : str, optional + Identifier for the task. Default is 'atmupp'. + + Returns + ------- + str + XML representation of the task + """ VALID_UPP_RUN = ["forecast", "goes"] if upp_run not in VALID_UPP_RUN: @@ -1008,6 +1159,16 @@ def atmos_ensstat(self): return task def metp(self): + """ + Create a task for METplus verification. + + This task runs METplus to verify model output against observations for various cases. + + Returns + ------- + str + XML representation of the task + """ deps = [] dep_dict = {'type': 'task', 'name': f'{self.run}_arch_vrfy'} deps.append(rocoto.add_dependency(dep_dict)) @@ -1065,6 +1226,16 @@ def metp(self): return task def anlstat(self): + """ + Create a task for analysis statistics. + + This task computes statistics for the analysis, including aerosol analysis if enabled. + + Returns + ------- + str + XML representation of the task + """ deps = [] if self.options['do_aero_anl']: dep_dict = {'type': 'task', 'name': f'{self.run}_aeroanlfinal'} diff --git a/dev/workflow/rocoto/gfs_tasks.py b/dev/workflow/rocoto/gfs_tasks.py index 690bc3cb16a..b703996b473 100644 --- a/dev/workflow/rocoto/gfs_tasks.py +++ b/dev/workflow/rocoto/gfs_tasks.py @@ -2017,7 +2017,7 @@ def metp(self): deps2.append(rocoto.add_dependency(dep_dict)) deps3 = [] for lookback in range(n_lookback): - offset = timedelta_to_HMS(-to_timedelta(f'{assim_freq * (lookback+1)}H')) + offset = timedelta_to_HMS(-to_timedelta(f'{assim_freq * (lookback + 1)}H')) dep_dict = {'type': 'task', 'name': f'{self.run}_arch_vrfy', 'offset': offset} deps3.append(rocoto.add_dependency(dep_dict)) diff --git a/docs/source/gcafs.rst b/docs/source/gcafs.rst index 2b0c1971c44..ba4be17c0b3 100644 --- a/docs/source/gcafs.rst +++ b/docs/source/gcafs.rst @@ -1,6 +1,6 @@ -===================================== -Global Chemistry and Aerosol Forecast -===================================== +==================================================== +Global Chemistry and Aerosol Forecast System (GCAFS) +==================================================== Overview -------- @@ -14,7 +14,9 @@ Key Features * Interactive GOCART aerosol module for forecasting dust, sea salt, sulfate, black carbon, and organic carbon * Optional full atmospheric chemistry with gas-phase and heterogeneous reactions -* Integration with biomass burning emissions sources (QFED, GBBEPX) +* Integration with biomass burning emissions sources (QFED, GBBEPx) +* NEXUS emissions preprocessing system for anthropogenic and biogenic sources +* Support for multiple emission inventories (CEDS, HTAP, CAMS) * Aerosol-radiation-cloud interactions * Optional aerosol data assimilation @@ -62,6 +64,45 @@ The GCAFS workflow includes these main tasks: The workflow is managed by the Rocoto workflow manager, with tasks defined in the ``workflow/rocoto/gcafs_tasks.py`` file. +Configuration Files +------------------ + +GCAFS configuration is managed through several key files in the ``parm/config/gcafs/`` directory: + +### config.aero.j2 + +The primary configuration file for aerosol settings, containing: + +**Aerosol Model Settings:** + +.. code-block:: bash + + export AERO_INPUTS_DIR="/path/to/aerosol/data" # Base directory for aerosol input data + export AERO_CONFIG_DIR="${PARMgfs}/ufs/gocart" # GOCART configuration files + export fscav_aero="'*:0.3','so2:0.0',..." # Convective scavenging factors + export dnats_aero=2 # Number of diagnostic tracers + +**Fire Emissions Settings:** + +.. code-block:: bash + + export AERO_EMIS_FIRE="gbbepx" # Fire dataset: gbbepx, qfed, none + export AERO_EMIS_FIRE_VERSION="061" # Dataset version + export AERO_EMIS_FIRE_HIST=1 # Historical (1) vs NRT (0) + +**NEXUS Emissions Settings:** + +.. code-block:: bash + + export NEXUS_CONFIG="gocart" # NEXUS configuration set + export NEXUS_TSTEP=3600 # Time step (seconds) + export NEXUS_DO_CEDS2019=.true. # Enable CEDS 2019 + export NEXUS_DO_HTAPv2=.true. # Enable HTAP v2 + export NEXUS_DO_CAMS=.false. # Enable CAMS + +These settings are processed as Jinja2 templates, allowing for experiment-specific customization +through template variables like ``{{ NEXUS_CONFIG | default('gocart') }}``. + Emissions Preprocessing ----------------------- @@ -72,48 +113,97 @@ The ``prep_emissions`` task is a critical component of the GCAFS workflow that p This task performs several important functions: 1. **Configuration Generation**: Creates customized GOCART configuration files from templates -2. **Emissions File Preparation**: Processes and prepares emissions data files -3. **Historical Data Handling**: Retrieves historical fire emissions when needed -4. **Fire Emissions Selection**: Configures the selected biomass burning emissions source (QFED/GBBEPx) -5. **Template Variable Processing**: Processes all template variables in the configuration files +2. **Fire Emissions Processing**: Handles biomass burning emissions from QFED or GBBEPx datasets +3. **NEXUS Preprocessing**: Processes anthropogenic and biogenic emissions through the NEXUS system +4. **Emissions File Preparation**: Generates model-ready emissions data files +5. **Historical Data Handling**: Retrieves historical emissions when needed for testing or spin-up +6. **Template Variable Processing**: Processes all template variables in the configuration files The task is implemented in ``ush/python/pygfs/task/aero_emissions.py`` as the ``AerosolEmissions`` class. -### Detailed Workflow +### Fire Emissions Configuration -When the ``prep_emissions`` task runs, it follows these steps: +GCAFS supports multiple biomass burning emission datasets that can be configured through the ``config.aero`` file: -1. **Initialization**: - ```python - def initialize(self): - # Parse the YAML template for chemistry emissions - yaml_template = os.path.join(self.task_config.HOMEgfs, 'parm/chem/chem_emission.yaml.j2') - yamlvars = parse_j2yaml(path=yaml_template) - self.task_config.append(yamlvars) - ``` +**Available Fire Emission Datasets:** - This loads the base configuration template and merges it with the task configuration. +* **GBBEPx** (Global Biomass Burning Emissions Product): NOAA/NWS operational fire emissions +* **QFED** (Quick Fire Emission Dataset): NASA fire emissions with near-real-time updates +* **None**: Disable fire emissions entirely +**Configuration Options:** -2. **Historical Fire Emission Handling**: - ```python - if self.task_config.fire_emissions == 'historical': - # Handle historical fire emissions - self.task_config.fire_emissions = 'historical' - self.task_config.fire_emissions_file = os.path.join(self.task_config.HOMEgfs, 'parm/chem/historical_fire_emissions.txt') - ``` +.. code-block:: bash - This sets up the task to use historical fire emissions data if specified. + # Select fire emissions dataset + export AERO_EMIS_FIRE="gbbepx" # Options: gbbepx, qfed, none + export AERO_EMIS_FIRE_VERSION="061" # Dataset version + export AERO_EMIS_FIRE_HIST=1 # Use historical (1) or near-real-time (0) + + # Directories for emissions data + export FIRE_EMIS_NRT_DIR="" # Near-real-time data location + export FIRE_EMIS_DIR="" # Historical data location -3. **Fire Emission Configuration**: - ```python - if self.task_config.fire_emissions == 'qfed': - # Configure QFED emissions - self.task_config.fire_emissions = 'qfed' - self.task_config.fire_emissions_file = os.path.join(self.task_config.HOMEgfs, 'parm/chem/qfed_fire_emissions.txt') - ``` +### NEXUS Emissions Preprocessing + +NEXUS (Next-generation Emissions eXchange Utility System) preprocesses anthropogenic and biogenic emissions from multiple global inventories: + +**Supported Emission Inventories:** + +* **CEDS** (Community Emissions Data System): Global anthropogenic emissions (2019/2024 versions) +* **HTAP** (Hemispheric Transport of Air Pollution): Regional high-resolution emissions (v2/v3) +* **CAMS** (Copernicus Atmosphere Monitoring Service): European reanalysis emissions +* **MEGAN** (Model of Emissions of Gases and Aerosols from Nature): Biogenic emissions (future) + +**NEXUS Configuration:** + +.. code-block:: bash + + # NEXUS system configuration + export NEXUS_CONFIG="gocart" # Configuration set (gocart, none) + export NEXUS_TSTEP=3600 # Time step in seconds + + # Grid specification (0.25-degree global) + export NEXUS_NX=1440 # Longitude points + export NEXUS_NY=720 # Latitude points + + # Enable/disable emission inventories + export NEXUS_DO_CEDS2019=.true. # CEDS 2019 emissions + export NEXUS_DO_CEDS2024=.false. # CEDS 2024 emissions + export NEXUS_DO_HTAPv2=.true. # HTAP v2 emissions + export NEXUS_DO_CAMS=.false. # CAMS emissions + +### Emission Dataset Details + +**Fire Emissions:** + +* **GBBEPx (Global Biomass Burning Emissions Product)**: + - Operational NOAA/NWS fire emissions based on VIIRS satellite data + - Near-real-time updates with ~6-hour latency + - Includes wildfire, agricultural burning, and prescribed burns + +* **QFED (Quick Fire Emission Dataset)**: + - NASA fire emissions using MODIS satellite observations + - Available in near-real-time and historical versions + - High spatial resolution with detailed speciation + +**Anthropogenic/Biogenic Emissions:** + +* **CEDS (Community Emissions Data System)**: + - Global gridded emissions inventory (1750-2019/2024) + - Anthropogenic sources: energy, industry, transport, residential, agriculture + - Species: SO2, NOx, CO, NH3, black carbon, organic carbon, PM2.5 + +* **HTAP (Hemispheric Transport of Air Pollution)**: + - Regional high-resolution emissions for Europe, Asia, North America + - Focuses on transboundary air pollution + - Complements CEDS with finer spatial detail + +* **CAMS (Copernicus Atmosphere Monitoring Service)**: + - European Centre reanalysis emissions + - Consistent with meteorological fields + - Includes temporal disaggregation capabilities - This sets up the task to use QFED emissions data if specified. GOCART Configuration Files -------------------------- @@ -156,13 +246,22 @@ are replaced at runtime with values from the workflow configuration. Emissions Configuration ~~~~~~~~~~~~~~~~~~~~~~~ -External data sources for emissions are configured in: +External data sources for emissions are configured through ExtData resource files: - **ExtData.gbbepx**: GBBEPx biomass burning emissions configuration -- **ExtData.qfed**: QFED fire emissions configuration -- **ExtData.other**: Anthropogenic, biogenic, and other emission sources +- **ExtData.qfed**: QFED fire emissions configuration +- **ExtData.nexus**: NEXUS-processed anthropogenic/biogenic emissions +- **ExtData.other**: Additional emission sources (volcanic, lightning, etc.) - **ExtData.none**: Placeholder configuration when emissions are disabled +The NEXUS system processes emissions through HEMCO (Harmonized Emissions Component) configuration files: + +- **NEXUS_Config.rc**: Master configuration orchestrating all emission sources +- **HEMCO_sa_Grid.rc**: Grid definition and interpolation settings +- **HEMCO_sa_Time.rc**: Temporal scaling patterns (diurnal, weekly, seasonal) +- **HEMCO_sa_Spec.rc**: Species mapping between inventories and GOCART tracers +- **HEMCO_sa_Diag.rc**: Diagnostic output configuration + To modify the aerosol configuration, edit these files or create custom versions in your experiment directory. The file ``gocart_tracer.list`` defines the complete set of aerosol tracers used in the model. @@ -174,7 +273,7 @@ The ExtData configuration files specify how external data sources are imported i .. code-block:: none # Import Name | Units | Clim | Regrid | Time Template | Offset | Scale | Var on File | File Template - OC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 biomass ExtData/nexus/QFED/%y4/%m2/qfed2.emis_oc.006.%y4%m2%d2.nc4 + OC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 OC ChemInput/FIRE_EMIS.%y4%m2%d2.nc4 Field descriptions: @@ -203,6 +302,24 @@ For example, in the QFED configuration: This imports SO2 emissions from QFED into the SU_BIOMASS variable, using a scale factor of 0.7778, from files with a date-based naming pattern. +### NEXUS ExtData Configuration + +The NEXUS-processed emissions are configured through **ExtData.nexus**, which handles anthropogenic and biogenic emissions from multiple inventories. Example entries: + +.. code-block:: none + + # Anthropogenic SO2 from CEDS + SU_ANTHRO NA N Y %y4-%m2-%d2t12:00:00 none none so2_anthro ExtData/nexus/CEDS/%y4/CEDS.emis_so2.%y4%m2%d2.nc4 + + # Black carbon from HTAP + BC_ANTHRO NA N Y %y4-%m2-%d2t12:00:00 none none bc_anthro ExtData/nexus/HTAP/%y4/HTAP.emis_bc.%y4%m2%d2.nc4 + +The NEXUS preprocessing system generates these files by: + +1. Reading emission inventories (CEDS, HTAP, CAMS) from ``NEXUS_INPUT_DIR`` +2. Applying temporal scaling patterns (diurnal, weekly, seasonal) +3. Regridding to the model resolution +4. Outputting model-ready netCDF files with standardized variable names AERO_HISTORY.rc File Details ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -275,12 +392,25 @@ corresponding frequency parameters are properly set in your workflow. Output Products --------------- -GCAFS produces standard meteorological outputs plus aerosol fields including: +GCAFS produces standard meteorological outputs plus comprehensive aerosol fields including: +**Core Aerosol Fields:** * Aerosol mass concentrations (dust, sea salt, sulfate, black carbon, organic carbon) -* Aerosol optical depth fields +* Aerosol optical depth fields at multiple wavelengths * PM2.5 and PM10 concentrations +* Aerosol extinction coefficients + +**Process-Specific Diagnostics:** +* Emission fields from fires and anthropogenic sources (when NEXUS diagnostics enabled) +* Dry and wet deposition fluxes +* Optical properties (single scattering albedo, asymmetry parameter) +* Column-integrated aerosol mass + +**Advanced Outputs:** +* 3D aerosol concentrations on model levels +* Aerosol number concentrations * Full chemical species concentrations when running with chemistry enabled +* NEXUS diagnostic emissions for verification -Output frequency is controlled by the standard global-workflow configuration options -in the same manner as GFS. +Output frequency and collections are controlled through the ``AERO_HISTORY.rc`` configuration file, +with standard global-workflow configuration options determining the base output settings. diff --git a/env/AWSPW.env b/env/AWSPW.env index 5b3ad7ffa2a..9c21c767e5a 100755 --- a/env/AWSPW.env +++ b/env/AWSPW.env @@ -50,7 +50,8 @@ elif [[ "${step}" = "prepsnowobs" ]]; then elif [[ "${step}" = "prep_emissions" ]]; then - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]] || [[ "${step}" = "wave_stat" ]] || [[ "${step}" = "wave_stat_pnt" ]]; then diff --git a/env/AZUREPW.env b/env/AZUREPW.env index f407b19302b..8248d3f9fc7 100755 --- a/env/AZUREPW.env +++ b/env/AZUREPW.env @@ -57,7 +57,8 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step} elif [[ "${step}" = "prep_emissions" ]]; then - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" elif [[ "${step}" = "post" ]]; then diff --git a/env/GAEAC5.env b/env/GAEAC5.env index 9f84a8b46ef..e4e7dcf2e7d 100755 --- a/env/GAEAC5.env +++ b/env/GAEAC5.env @@ -48,7 +48,8 @@ case ${step} in ;; "prep_emissions") - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" ;; "waveinit" | "waveprep" | "wavepostsbs" | "wavepostbndpnt" | "wavepostpnt" | "wavepostbndpntbll" | "wave_stat" | "wave_stat_pnt" ) diff --git a/env/GAEAC6.env b/env/GAEAC6.env index 5ab863acb16..07bdb854ae1 100755 --- a/env/GAEAC6.env +++ b/env/GAEAC6.env @@ -51,7 +51,8 @@ case ${step} in ;; "prep_emissions") - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" ;; "waveinit" | "waveprep" | "wavepostsbs" | "wavepostbndpnt" | "wavepostpnt" | "wavepostbndpntbll" | "wave_stat" | "wave_stat_pnt" ) diff --git a/env/GOOGLEPW.env b/env/GOOGLEPW.env index 9a821023e3f..b0376d1fbd5 100755 --- a/env/GOOGLEPW.env +++ b/env/GOOGLEPW.env @@ -49,7 +49,8 @@ if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then elif [[ "${step}" = "prep_emissions" ]]; then - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]] || [[ "${step}" = "wave_stat" ]] || [[ "${step}" = "wave_stat_pnt" ]]; then diff --git a/env/HERA.env b/env/HERA.env index 76af27e6748..00ef74a5836 100755 --- a/env/HERA.env +++ b/env/HERA.env @@ -57,7 +57,8 @@ if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then elif [[ "${step}" = "prep_emissions" ]]; then - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]] || [[ "${step}" = "wave_stat" ]] || [[ "${step}" = "wave_stat_pnt" ]]; then diff --git a/env/HERCULES.env b/env/HERCULES.env index 493e23db4a3..3a18a34e88e 100755 --- a/env/HERCULES.env +++ b/env/HERCULES.env @@ -56,7 +56,8 @@ case ${step} in ;; "prep_emissions") - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" ;; "waveinit" | "waveprep" | "wavepostsbs" | "wavepostbndpnt" | "wavepostpnt" | "wavepostbndpntbll" | "wave_stat" | "wave_stat_pnt" ) diff --git a/env/ORION.env b/env/ORION.env index 6634fda665f..d9c605d1436 100755 --- a/env/ORION.env +++ b/env/ORION.env @@ -53,7 +53,8 @@ if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then elif [[ "${step}" = "prep_emissions" ]]; then - export APRUN="${launcher} -n 1" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${launcher} -n 1 --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || \ [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostpnt" ]] || [[ "${step}" == "wavepostbndpntbll" ]] || [[ "${step}" = "wave_stat" ]] || [[ "${step}" = "wave_stat_pnt" ]]; then diff --git a/env/URSA.env b/env/URSA.env index 80711ea2b3b..c75fd759311 100644 --- a/env/URSA.env +++ b/env/URSA.env @@ -51,7 +51,8 @@ if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then elif [[ "${step}" = "prep_emissions" ]]; then - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} --cpus-per-task=${NTHREADS_PREP_EMISSIONS}" elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]] || [[ "${step}" = "wave_stat" ]] || [[ "${step}" = "wave_stat_pnt" ]]; then diff --git a/env/WCOSS2.env b/env/WCOSS2.env index 867c0a4eab2..502e3eaf996 100755 --- a/env/WCOSS2.env +++ b/env/WCOSS2.env @@ -40,7 +40,8 @@ if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then elif [[ "${step}" = "prep_emissions" ]]; then - export APRUN="${APRUN_default}" + export NTHREADS_PREP_EMISSIONS=${NTHREADSmax} + export APRUN="${APRUN_default} -ppn ${tasks_per_node} --cpu-bind depth --depth ${NTHREADS_PREP_EMISSIONS}" elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]] || [[ "${step}" = "wave_stat" ]] || [[ "${step}" = "wave_stat_pnt" ]]; then diff --git a/parm/chem/chem_emission.yaml.j2 b/parm/chem/chem_emission.yaml.j2 deleted file mode 100644 index 7583cbf82ca..00000000000 --- a/parm/chem/chem_emission.yaml.j2 +++ /dev/null @@ -1,23 +0,0 @@ -chem_emission: - config: - apply_quality_control: True - quality_control_threshold: 1.5 - GBBEPX_TEMPLATE: GBBEPx-all01GRID_YYYYMMDD.nc - QFED_VARS: {{ qfed_vars }} - GBBEPX_VARS: {{ gbbepx_vars }} - data_in: - mkdir: - - "{{ DATA }}" - copy: - {% for file in files_in %} - - ["{{ AERO_EMIS_FIRE_DIR }}/{{ file }}", "{{ DATA }}/{{ file }}"] - {% endfor %} - data_out: - mkdir: - - "{{ COMOUT_CHEM_HISTORY }}" - copy: - {% for file in processed_files %} - - ["{{ DATA }}/{{ file }}", "{{ COMOUT_CHEM_HISTORY }}/{{ file }}"] - {% endfor %} - - diff --git a/parm/chem/fire_emission.yaml.j2 b/parm/chem/fire_emission.yaml.j2 new file mode 100644 index 00000000000..2c414bde46b --- /dev/null +++ b/parm/chem/fire_emission.yaml.j2 @@ -0,0 +1,44 @@ +{% set cycle_YMD = sdate | to_YMD %} +fire_emission: + config: + apply_quality_control: True + quality_control_threshold: 1.5 + QFED_VARS: + {% for qvar in fire_vars %} + - "{{ qvar }}" + {% endfor %} + GBBEX_VARS: + {% for gvar in fire_vars %} + - "{{ gvar }}" + {% endfor %} + NRT_DIRECTORY: "{{ FIRE_EMIS_NRT_DIR }}/{{cycle_YMD}}/firewx" + data_in: + mkdir: + - "{{ DATA }}" + copy: + {% for fin in rawfiles %} + - ["{{ fin }}", "{{ DATA }}/"] + {% endfor %} + data_out: + mkdir: + {% if nmem_ens > 0 %} + {% for imem in range(0, nmem_ens + 1) %} + - "{{ COMOUT_CHEM_INPUT.replace('mem000', 'mem%03d' % imem) }}" + {% endfor %} + {% else %} + - "{{ COMOUT_CHEM_INPUT }}" + {% endif %} + copy: + {% if nmem_ens > 0 %} + {% for imem in range(0, nmem_ens + 1) %} + {% for fileout in processed_files %} + - ["{{ DATA }}/{{ fileout }}", "{{ COMOUT_CHEM_INPUT.replace('mem000', 'mem%03d' % imem) }}/"] + {% endfor %} + {% endfor %} + {% else %} + {% for fileout in processed_files %} + - ["{{ DATA }}/{{ fileout }}", "{{ COMOUT_CHEM_INPUT }}/"] + {% endfor %} + {% endif %} + + diff --git a/parm/chem/nexus_emission.yaml.j2 b/parm/chem/nexus_emission.yaml.j2 new file mode 100644 index 00000000000..de2f7073188 --- /dev/null +++ b/parm/chem/nexus_emission.yaml.j2 @@ -0,0 +1,35 @@ +nexus_emission: + data_in: + copy: + - ["{{ NEXUS_EXECUTABLE }}", "{{ DATA }}/"] + {% for filein in NEXUS_INPUT_FILES %} + - ["{{ filein }}", "{{ NEXUS_COPY_TO_FILES[loop.index0] }}"] + {% endfor %} + + data_out: + mkdir: + {% if nmem_ens > 0 %} + {% for imem in range(0, nmem_ens + 1) %} + - "{{ COMOUT_CHEM_INPUT.replace('mem000', 'mem%03d' % imem) }}" + {% endfor %} + {% for imem in range(0, nmem_ens + 1) %} + - "{{ COMOUT_CHEM_RESTART.replace('mem000', 'mem%03d' % imem) }}" + {% endfor %} + {% else %} + - "{{ COMOUT_CHEM_INPUT }}" + - "{{ COMOUT_CHEM_RESTART }}" + {% endif %} + copy: + {% if nmem_ens > 0 %} + {% for imem in range(0, nmem_ens + 1) %} + {% for fileout in FINAL_OUTPUT %} + - ["{{ DATA }}/{{ fileout }}", "{{ COMOUT_CHEM_INPUT.replace('mem000', 'mem%03d' % imem) }}/"] + {% endfor %} + - ["{{ DATA }}/Restarts/{{ RestartFile }}", "{{ COMOUT_CHEM_RESTART.replace('mem000', 'mem%03d' % imem) }}/"] + {% endfor %} + {% else %} + {% for fileout in FINAL_OUTPUT %} + - ["{{ DATA }}/{{ fileout }}", "{{ COMOUT_CHEM_INPUT }}/"] + {% endfor %} + - ["{{ DATA }}/Restarts/{{ RestartFile }}", "{{ COMOUT_CHEM_RESTART }}/"] + {% endif %} diff --git a/parm/ufs/gocart/CA2G_instance_CA.bc.rc b/parm/ufs/gocart/CA2G_instance_CA.bc.rc index 9ac462fa3e6..165e94d700c 100644 --- a/parm/ufs/gocart/CA2G_instance_CA.bc.rc +++ b/parm/ufs/gocart/CA2G_instance_CA.bc.rc @@ -5,7 +5,7 @@ nbins: 2 aerosol_radBands_optics_file: ExtData/optics/opticsBands_BC.v1_3.RRTMG.nc -aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_BC.v1_3.nc +aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_BC.v1_5.nc # Aircraft emission factor: convert input unit to kg C aircraft_fuel_emission_factor: 1.0000 @@ -30,6 +30,10 @@ fwet_ice: 0.0 1.0 fwet_snow: 0.0 1.0 fwet_rain: 0.0 1.0 +# SettlingSolver options +# Options: 'gocart' or 'ufs' +settling_scheme: 'ufs' + # Scavenging efficiency per bin [km-1] (NOT USED UNLESS RAS IS CALLED) fscav: 0.0 0.4 diff --git a/parm/ufs/gocart/CA2G_instance_CA.br.rc b/parm/ufs/gocart/CA2G_instance_CA.br.rc index e983240212b..679d33a42c7 100644 --- a/parm/ufs/gocart/CA2G_instance_CA.br.rc +++ b/parm/ufs/gocart/CA2G_instance_CA.br.rc @@ -33,6 +33,10 @@ fwet_ice: 0.0 0.4 fwet_snow: 0.0 0.4 fwet_rain: 0.0 0.4 +# SettlingSolver options +# Options: 'gocart' or 'ufs' +settling_scheme: 'ufs' + # Scavenging efficiency per bin [km-1] (NOT USED UNLESS RAS IS CALLED) fscav: 0.0 0.4 diff --git a/parm/ufs/gocart/CA2G_instance_CA.oc.rc b/parm/ufs/gocart/CA2G_instance_CA.oc.rc index f24d65e4c47..c90e4ffeb4e 100644 --- a/parm/ufs/gocart/CA2G_instance_CA.oc.rc +++ b/parm/ufs/gocart/CA2G_instance_CA.oc.rc @@ -3,7 +3,7 @@ # aerosol_radBands_optics_file: ExtData/optics/opticsBands_OC.v1_3.RRTMG.nc -aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_OC.v1_3.nc +aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_OC.v1_5.nc # Aircraft emission factor: convert input unit to kg C aircraft_fuel_emission_factor: 1.0000 @@ -40,6 +40,10 @@ fwet_ice: 0.0 1.0 fwet_snow: 0.0 1.0 fwet_rain: 0.0 1.0 +# SettlingSolver options +# Options: 'gocart' or 'ufs' +settling_scheme: 'ufs' + # Scavenging efficiency per bin [km-1] (NOT USED UNLESS RAS IS CALLED) fscav: 0.0 0.4 diff --git a/parm/ufs/gocart/DU2G_instance_DU.rc b/parm/ufs/gocart/DU2G_instance_DU.rc index 8001798189d..b3d249081e7 100644 --- a/parm/ufs/gocart/DU2G_instance_DU.rc +++ b/parm/ufs/gocart/DU2G_instance_DU.rc @@ -3,7 +3,7 @@ # aerosol_radBands_optics_file: ExtData/optics/opticsBands_DU.v15_3.RRTMG.nc -aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_DU.v15_3.nc +aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_DU.v15_5.nc particle_radius_microns: 0.73 1.4 2.4 4.5 8.0 @@ -52,9 +52,13 @@ pressure_lid_in_hPa: 0.01 emission_scheme: fengsha # choose among: fengsha, ginoux, k14 # FENGSHA settings -alpha: 0.16 +alpha: 0.2 gamma: 1.0 -soil_moisture_factor: 1 -soil_drylimit_factor: 1 +soil_moisture_factor: 1.0 +soil_drylimit_factor: 1.2 vertical_to_horizontal_flux_ratio_limit: 2.e-04 -drag_partition_option: 2 \ No newline at end of file +drag_partition_option: 1 + +# SettlingSolver options +# Options: 'gocart' or 'ufs' +settling_scheme: 'ufs' \ No newline at end of file diff --git a/parm/ufs/gocart/ExtData.gbbepx b/parm/ufs/gocart/ExtData.gbbepx index 3bd516c772a..686a8f20815 100644 --- a/parm/ufs/gocart/ExtData.gbbepx +++ b/parm/ufs/gocart/ExtData.gbbepx @@ -2,7 +2,7 @@ # GBBEPx #-------------------------------------------------------------------------------------------------------------------------------- -SU_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 SO2 ExtData/nexus/GBBEPx/GBBEPx_all01GRID.emissions_v003_%y4%m2%d2.nc -OC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 OC ExtData/nexus/GBBEPx/GBBEPx_all01GRID.emissions_v003_%y4%m2%d2.nc -BC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 BC ExtData/nexus/GBBEPx/GBBEPx_all01GRID.emissions_v003_%y4%m2%d2.nc -# EMI_NH3_BB NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 NH3 ExtData/nexus/GBBEPx/GBBEPx_all01GRID.emissions_v003_%y4%m2%d2.nc +SU_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 SO2 ChemInput/FIRE_EMIS_%y4%m2%d2.nc +OC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 OC ChemInput/FIRE_EMIS_%y4%m2%d2.nc +BC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 BC ChemInput/FIRE_EMIS_%y4%m2%d2.nc +# EMI_NH3_BB NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 NH3 ChemInput/FIRE_EMIS_%y4%m2%d2.nc diff --git a/parm/ufs/gocart/ExtData.other b/parm/ufs/gocart/ExtData.other index 1c138b4b312..1c7d505aec7 100644 --- a/parm/ufs/gocart/ExtData.other +++ b/parm/ufs/gocart/ExtData.other @@ -8,23 +8,23 @@ TROPP 'Pa' Y N - 0.0 1.0 #====== Dust Imports ================================================= # FENGSHA input files. Note: regridding should be N or E - Use files with _FillValue != NaN -DU_CLAY '1' Y E - none none clayfrac ExtData/nexus/FENGSHA/FENGSHA_2022_NESDIS_inputs_10km_v3.2.nc -DU_SAND '1' Y E - none none sandfrac ExtData/nexus/FENGSHA/FENGSHA_2022_NESDIS_inputs_10km_v3.2.nc +DU_CLAY '1' Y E - none none clayfrac ExtData/FENGSHA/FENGSHA_2022_NESDIS_inputs_10km_v3.2.nc +DU_SAND '1' Y E - none none sandfrac ExtData/FENGSHA/FENGSHA_2022_NESDIS_inputs_10km_v3.2.nc DU_SILT '1' Y E - none none siltfrac /dev/null -DU_SSM '1' Y E - none none sep /dev/null:1.0 -DU_UTHRES '1' Y E - none none uthres ExtData/nexus/FENGSHA/FENGSHA_2022_NESDIS_inputs_10km_v3.2.nc -DU_RDRAG '1' Y E %y4-%m2-%d2t12:00:00 none none PC ExtData/nexus/FENGSHA/FENGSHA_New_Method_NESDISv1.1_9km.nc -DU_GVF '1' Y E %y4-%m2-%d2T12:00:00 none none GVF ExtData/nexus/FENGSHA/FENGSHA_GVF_LAI2.nc -DU_LAI '1' Y E %y4-%m2-%d2T12:00:00 none none LAI ExtData/nexus/FENGSHA/FENGSHA_GVF_LAI2.nc +DU_SSM '1' Y E - none none sep ExtData/FENGSHA/FENGSHA_2022_NESDIS_inputs_10km_v3.2.nc +DU_UTHRES '1' Y E - none none uthres ExtData/FENGSHA/FENGSHA_2022_NESDIS_inputs_10km_v3.2.nc +DU_RDRAG '1' Y E %y4-%m2-%d2t12:00:00 none none albedo_drag ExtData/FENGSHA/FENGSHA_2022_NESDIS_inputs_10km_v4.nc +DU_GVF '1' Y E %y4-%m2-%d2T12:00:00 none none GVF /dev/null:0.0 +DU_LAI '1' Y E %y4-%m2-%d2T12:00:00 none none LAI /dev/null/:0.0 #====== Sulfate Sources ================================================= # Anthropogenic (BF & FF) emissions -- allowed to input as two layers -SU_ANTHROL1 NA Y Y %y4-%m2-%d2t12:00:00 none none SO2 ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc -SU_ANTHROL2 NA Y Y %y4-%m2-%d2t12:00:00 none none SO2_elev ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc +SU_ANTHROL1 NA N Y F0 none none SO2 ChemInput/NEXUS_DIAG.%y4%m2%d2.nc +SU_ANTHROL2 NA N Y F0 none none SO2_elev ChemInput/NEXUS_DIAG.%y4%m2%d2.nc # Ship emissions -SU_SHIPSO2 NA Y Y %y4-%m2-%d2t12:00:00 none none SO2_ship ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc -SU_SHIPSO4 NA Y Y %y4-%m2-%d2t12:00:00 none none SO4_ship ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc +SU_SHIPSO2 NA N Y F0 none none SO2_ship ChemInput/NEXUS_DIAG.%y4%m2%d2.nc +SU_SHIPSO4 NA N Y F0 none none SO4_ship ChemInput/NEXUS_DIAG.%y4%m2%d2.nc # Aircraft fuel consumption SU_AIRCRAFT NA Y Y %y4-%m2-%d2t12:00:00 none none none /dev/null @@ -65,11 +65,11 @@ OC_MTPO NA Y Y %y4-%m2-%d2t12:00:00 none none mtpo ExtData/nexus/MEGAN_ OC_BIOFUEL NA Y Y %y4-%m2-%d2t12:00:00 none none biofuel /dev/null # Anthropogenic (BF & FF) emissions -- allowed to input as two layers -OC_ANTEOC1 NA Y Y %y4-%m2-%d2t12:00:00 none none OC ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc -OC_ANTEOC2 NA Y Y %y4-%m2-%d2t12:00:00 none none OC_elev ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc +OC_ANTEOC1 NA N Y F0 none none OC ChemInput/NEXUS_DIAG.%y4%m2%d2.nc +OC_ANTEOC2 NA N Y F0 none none OC_elev ChemInput/NEXUS_DIAG.%y4%m2%d2.nc # EDGAR based ship emissions -OC_SHIP NA Y Y %y4-%m2-%d2t12:00:00 none none OC_ship ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc +OC_SHIP NA N Y F0 none none OC_ship ChemInput/NEXUS_DIAG.%y4%m2%d2.nc # Aircraft fuel consumption OC_AIRCRAFT NA N Y %y4-%m2-%d2t12:00:00 none none oc_aviation /dev/null @@ -90,14 +90,14 @@ pSOA_ANTHRO_VOC NA Y Y %y4-%m2-%d2t12:00:00 none none biofuel /dev/null BC_BIOFUEL NA Y Y %y4-%m2-%d2t12:00:00 none none biofuel /dev/null # Anthropogenic (BF & FF) emissions -- allowed to input as two layers -BC_ANTEBC1 NA Y Y %y4-%m2-%d2t12:00:00 none none BC ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc -BC_ANTEBC2 NA Y Y %y4-%m2-%d2t12:00:00 none none BC_elev ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc +BC_ANTEBC1 NA N Y F0 none none BC ChemInput/NEXUS_DIAG.%y4%m2%d2.nc +BC_ANTEBC2 NA N Y F0 none none BC_elev ChemInput/NEXUS_DIAG.%y4%m2%d2.nc # EDGAR based ship emissions -BC_SHIP NA Y Y %y4-%m2-%d2t12:00:00 none none BC_ship ExtData/nexus/CEDS/v2019/monthly/%y4/CEDS_2019_monthly.%y4%m2.nc +BC_SHIP NA N Y F0 none none BC_ship ChemInput/NEXUS_DIAG.%y4%m2%d2.nc # Aircraft fuel consumption -BC_AIRCRAFT NA N Y %y4-%m2-%d2t12:00:00 none none bc_aviation /dev/null +BC_AIRCRAFT NA N Y F0 none none bc_aviation /dev/null # Aviation emissions during the LTO, SDC and CRS phases of flight BC_AVIATION_LTO NA Y Y %y4-%m2-%d2t12:00:00 none none bc_aviation ExtData/PIESA/sfc/HTAP/v2.2/htap-v2.2.emis_bc.aviation_lto.x3600_y1800_t12.2010.nc4 diff --git a/parm/ufs/gocart/ExtData.qfed b/parm/ufs/gocart/ExtData.qfed index 805c1173e3f..60c2693ec0c 100644 --- a/parm/ufs/gocart/ExtData.qfed +++ b/parm/ufs/gocart/ExtData.qfed @@ -2,7 +2,7 @@ # QFED #-------------------------------------------------------------------------------------------------------------------------------- -SU_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 biomass ExtData/nexus/QFED/%y4/%m2/qfed2.emis_so2.061.%y4%m2%d2.nc4 -OC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 biomass ExtData/nexus/QFED/%y4/%m2/qfed2.emis_oc.061.%y4%m2%d2.nc4 -BC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 biomass ExtData/nexus/QFED/%y4/%m2/qfed2.emis_bc.061.%y4%m2%d2.nc4 -# EMI_NH3_BB NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 biomass ExtData/nexus/QFED/%y4/%m2/qfed2.emis_nh3.061.%y4%m2%d2.nc4 +SU_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 SO2 ChemInput/FIRE_EMIS_%y4%m2%d2.nc # ExtData/nexus/QFED/%y4/%m2/qfed2.emis_so2.061.%y4%m2%d2.nc4 +OC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 OC ChemInput/FIRE_EMIS_%y4%m2%d2.nc +BC_BIOMASS NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 BC ChemInput/FIRE_EMIS_%y4%m2%d2.nc +# EMI_NH3_BB NA N Y %y4-%m2-%d2t12:00:00 none 0.7778 NH3 ChemInput/FIRE_EMIS_%y4%m2%d2.nc diff --git a/parm/ufs/gocart/NI2G_instance_NI.rc b/parm/ufs/gocart/NI2G_instance_NI.rc index 73db6010732..0c5d22218c9 100644 --- a/parm/ufs/gocart/NI2G_instance_NI.rc +++ b/parm/ufs/gocart/NI2G_instance_NI.rc @@ -31,3 +31,7 @@ sigma: 2.0 2.0 2.0 2.0 2.0 pressure_lid_in_hPa: 0.01 rhFlag: 0 + +# SettlingSolver options +# Options: 'gocart' or 'ufs' +settling_scheme: 'ufs' diff --git a/parm/ufs/gocart/SS2G_instance_SS.rc b/parm/ufs/gocart/SS2G_instance_SS.rc index 5616616ea6c..5d093650c20 100644 --- a/parm/ufs/gocart/SS2G_instance_SS.rc +++ b/parm/ufs/gocart/SS2G_instance_SS.rc @@ -3,7 +3,7 @@ # aerosol_radBands_optics_file: ExtData/optics/opticsBands_SS.v3_3.RRTMG.nc -aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_SS.v3_3.nc +aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_SS.v3_5.nc particle_radius_microns: 0.079 0.316 1.119 2.818 7.772 @@ -48,4 +48,6 @@ fwet_ice: 1.0 1.0 1.0 1.0 1.0 fwet_snow: 1.0 1.0 1.0 1.0 1.0 fwet_rain: 1.0 1.0 1.0 1.0 1.0 - +# SettlingSolver options +# Options: 'gocart' or 'ufs' +settling_scheme: 'ufs' diff --git a/parm/ufs/gocart/SU2G_instance_SU.rc b/parm/ufs/gocart/SU2G_instance_SU.rc index 2efa5ec49f8..2e5928ed238 100644 --- a/parm/ufs/gocart/SU2G_instance_SU.rc +++ b/parm/ufs/gocart/SU2G_instance_SU.rc @@ -3,7 +3,7 @@ # aerosol_radBands_optics_file: ExtData/optics/opticsBands_SU.v1_3.RRTMG.nc -aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_SU.v1_3.nc +aerosol_monochromatic_optics_file: ExtData/monochromatic/optics_SU.v1_5.nc nbins: 4 @@ -23,6 +23,10 @@ aircraft_fuel_emission_factor: 0.0008 # Scavenging efficiency per bin [km-1] (NOT USED UNLESS RAS IS CALLED) fscav: 0.0 0.0 0.4 0.4 +# SettlingSolver options +# Options: 'gocart' or 'ufs' +settling_scheme: 'ufs' + # Dry particle radius [um], used for settling particle_radius_microns: 0.0 0.0 0.35 0.0 diff --git a/sorc/build_all.sh b/sorc/build_all.sh index e0522f0106b..2926e1d091c 100755 --- a/sorc/build_all.sh +++ b/sorc/build_all.sh @@ -85,10 +85,10 @@ system_builds=( ["gfs"]="ufs_gfs gfs_utils ufs_utils upp ww3_gfs" ["gefs"]="ufs_gefs gfs_utils ufs_utils upp ww3_gefs" ["sfs"]="ufs_sfs gfs_utils ufs_utils upp ww3_gefs" - ["gcafs"]="ufs_gcafs gfs_utils ufs_utils upp" + ["gcafs"]="ufs_gcafs gfs_utils ufs_utils upp nexus gsi_utils" ["gsi"]="gsi_enkf gsi_monitor gsi_utils" ["gdas"]="gdas gsi_monitor gsi_utils" - ["all"]="ufs_gfs gfs_utils ufs_utils upp ww3_gfs ufs_gefs ufs_sfs ufs_gcafs ww3_gefs gdas gsi_enkf gsi_monitor gsi_utils" + ["all"]="ufs_gfs gfs_utils ufs_utils upp ww3_gfs ufs_gefs ufs_sfs ufs_gcafs ww3_gefs gdas gsi_enkf gsi_monitor gsi_utils nexus" ) logs_dir="${HOMEgfs}/sorc/logs" @@ -101,7 +101,7 @@ fi declare -A build_jobs build_opts build_scripts build_jobs=( ["ufs_gfs"]=8 ["ufs_gefs"]=8 ["ufs_sfs"]=8 ["ufs_gcafs"]=8 ["gdas"]=8 ["gsi_enkf"]=2 ["gfs_utils"]=1 ["ufs_utils"]=1 - ["ww3_gfs"]=1 ["ww3_gefs"]=1 ["gsi_utils"]=1 ["gsi_monitor"]=1 ["gfs_utils"]=1 ["upp"]=1 + ["ww3_gfs"]=1 ["ww3_gefs"]=1 ["gsi_utils"]=1 ["gsi_monitor"]=1 ["gfs_utils"]=1 ["upp"]=1 ["nexus"]=1 ) # Establish build options for each job @@ -123,6 +123,7 @@ build_opts=( ["gsi_utils"]="${_verbose_opt} ${_build_debug}" ["gsi_enkf"]="${_verbose_opt} ${_build_debug}" ["gsi_monitor"]="${_verbose_opt} ${_build_debug}" + ["nexus"]="${_verbose_opt} ${_build_debug}" ) # Set the build script name for each build @@ -141,6 +142,7 @@ build_scripts=( ["gsi_monitor"]="build_gsi_monitor.sh" ["gfs_utils"]="build_gfs_utils.sh" ["upp"]="build_upp.sh" + ["nexus"]="build_nexus.sh" ) # Check the requested systems to make sure we can build them diff --git a/sorc/build_nexus.sh b/sorc/build_nexus.sh new file mode 100755 index 00000000000..c202fc67565 --- /dev/null +++ b/sorc/build_nexus.sh @@ -0,0 +1,39 @@ +#! /usr/bin/env bash +set -eux + +usage() { + echo "Usage: $0 [-d] [-j ] [-v]" + echo " -d Build in debug mode" + echo " -j Number of parallel build jobs" + echo " -v Verbose build output" + exit 1 +} + +# shellcheck disable=SC2155 +readonly HOMEgfs_=$(cd "$(dirname "$(readlink -f -n "${BASH_SOURCE[0]}" )" )/.." && pwd -P) + +OPTIND=1 +_opts="-f " # forces a clean build +while getopts ":j:dv" option; do + case "${option}" in + d) _opts+="-c -DCMAKE_BUILD_TYPE=Debug " ;; + j) BUILD_JOBS=${OPTARG};; + v) _opts+="-v ";; + :) + echo "[${BASH_SOURCE[0]}]: ${option} requires an argument" + usage + ;; + *) + echo "[${BASH_SOURCE[0]}]: Unrecognized option: ${option}" + usage + ;; + esac +done +shift $((OPTIND-1)) + +# double quoting opts will not work since it is a string of options +# shellcheck disable=SC2086 +BUILD_JOBS="${BUILD_JOBS:-1}" \ +./nexus.fd/build.sh ${_opts} -f -w ${HOMEgfs_} + +exit diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 571d3994248..52e7ad34e7a 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -325,6 +325,23 @@ if [[ -d "${HOMEgfs}/sorc/gsi_enkf.fd/fix/build_gsinfo" ]]; then fi +#------------------------------ +#--add NEXUS files +#------------------------------ +if [[ -d "${HOMEgfs}/sorc/nexus.fd" ]]; then + cd "${HOMEgfs}/parm/chem" || exit 1 + if [[ -d nexus ]]; then + rm -rf nexus + fi + mkdir -p nexus/gocart + cd nexus/gocart || exit 1 + ${LINK_OR_COPY} "${HOMEgfs}/sorc/nexus.fd/config/gocart/NEXUS_Config.rc.j2" . + ${LINK_OR_COPY} "${HOMEgfs}/sorc/nexus.fd/config/gocart/HEMCO_sa_Grid.rc.j2" . + ${LINK_OR_COPY} "${HOMEgfs}/sorc/nexus.fd/config/gocart/HEMCO_sa_Time.rc.j2" . + ${LINK_OR_COPY} "${HOMEgfs}/sorc/nexus.fd/config/gocart/HEMCO_sa_Diag.rc.j2" . + ${LINK_OR_COPY} "${HOMEgfs}/sorc/nexus.fd/config/gocart/HEMCO_sa_Spec.rc.j2" . +fi + #------------------------------ #--link executables #------------------------------ @@ -434,6 +451,12 @@ if [[ -d "${HOMEgfs}/sorc/gdas.cd/install" ]]; then cp -af "${HOMEgfs}/sorc/gdas.cd/install/lib/." ./ fi +# NEXUS executable +if [[ -d "${HOMEgfs}/sorc/nexus.fd/build/bin" ]]; then + cd "${HOMEgfs}/exec" || exit 1 + ${LINK_OR_COPY} "${HOMEgfs}/sorc/nexus.fd/build/bin/nexus" nexus.x +fi + #------------------------------ #--link source code directories #------------------------------ diff --git a/sorc/nexus.fd b/sorc/nexus.fd new file mode 160000 index 00000000000..7b0772b7a64 --- /dev/null +++ b/sorc/nexus.fd @@ -0,0 +1 @@ +Subproject commit 7b0772b7a649e073902622a3a4250eff428b6663 diff --git a/ush/forecast_predet.sh b/ush/forecast_predet.sh index 0cb6ff2ba04..102ae65a5a4 100755 --- a/ush/forecast_predet.sh +++ b/ush/forecast_predet.sh @@ -762,5 +762,7 @@ GOCART_predet() { # FHMAX gets modified when IAU is on, so keep origianl value for GOCART output GOCART_MAX=${FHMAX} - # GOCART output times can't be computed here because they may depend on FHROT + #TODO: fix to copying data so that its required for EE2 compliance. Will submit + # a subsequent PR to fix this properly. + ${NLN} "${COMIN_CHEM_INPUT}" "${DATA}/ChemInput" } diff --git a/ush/parsing_namelists_GOCART.sh b/ush/parsing_namelists_GOCART.sh index 26399356407..6993112358c 100755 --- a/ush/parsing_namelists_GOCART.sh +++ b/ush/parsing_namelists_GOCART.sh @@ -13,21 +13,21 @@ GOCART_namelists() { local inst_aod_freq="${fhout_aero_padded}0000" # Other gocart fields not currently used - local inst_du_ss_freq="120000" - local tavg_du_ss_freq="120000" - local inst_ca_freq="120000" - local inst_ni_freq="120000" - local inst_su_freq="120000" - local inst_du_bin_freq="010000" - local tavg_du_bin_freq="030000" - local inst_ss_bin_freq="060000" - local inst_ca_bin_freq="120000" - local inst_ni_bin_freq="120000" - local inst_su_bin_freq="120000" - local inst_2d_freq="030000" - local inst_3d_freq="060000" - local tavg_2d_rad_freq="120000" - local tavg_3d_rad_freq="120000" + local inst_du_ss_freq="${fhout_aero_padded}0000" + local tavg_du_ss_freq="${fhout_aero_padded}0000" + local inst_ca_freq="${fhout_aero_padded}0000" + local inst_ni_freq="${fhout_aero_padded}0000" + local inst_su_freq="${fhout_aero_padded}0000" + local inst_du_bin_freq="${fhout_aero_padded}0000" + local tavg_du_bin_freq="${fhout_aero_padded}0000" + local inst_ss_bin_freq="${fhout_aero_padded}0000" + local inst_ca_bin_freq="${fhout_aero_padded}0000" + local inst_ni_bin_freq="${fhout_aero_padded}0000" + local inst_su_bin_freq="${fhout_aero_padded}0000" + local inst_2d_freq="${fhout_aero_padded}0000" + local inst_3d_freq="${fhout_aero_padded}0000" + local tavg_2d_rad_freq="${fhout_aero_padded}0000" + local tavg_3d_rad_freq="${fhout_aero_padded}0000" for template_in in "${AERO_CONFIG_DIR}/"*.rc; do base_in="$(basename "${template_in}")" @@ -48,7 +48,9 @@ GOCART_namelists() { } > "${DATA}/AERO_ExtData.rc" # shellcheck disable=SC2320 status=$? - if ((status != 0)); then exit "${status}"; fi + if [[ ${status} -ne 0 ]]; then + exit "${status}" + fi fi fi } diff --git a/ush/python/pygfs/__init__.py b/ush/python/pygfs/__init__.py index 394a93e8a47..9a42e4f141f 100644 --- a/ush/python/pygfs/__init__.py +++ b/ush/python/pygfs/__init__.py @@ -1,8 +1,45 @@ +""" +pygfs +===== + +This package provides task classes and utilities for the GFS workflow, including analysis, chemistry, ensemble, marine, snow, and forecast processing. + +Modules +------- +- task.analysis: Analysis task +- task.chem_fire_emission: Chemistry fire emissions task +- task.nxs_emission: NEXUS emissions task +- task.aero_analysis: Aerosol analysis task +- task.aero_bmatrix: Aerosol background matrix task +- task.atm_analysis: Atmospheric analysis task +- task.atmens_analysis: Atmospheric ensemble analysis task +- task.ensemble_recenter: Ensemble recentering task +- task.fv3_analysis_calc: FV3 analysis calculation task +- task.marine_bmat: Marine background matrix task +- task.offline_analysis: Offline analysis task +- task.snow_analysis: Snow analysis task +- task.snowens_analysis: Snow ensemble analysis task +- task.upp: Unified Post Processor (UPP) task +- task.oceanice_products: Ocean/ice products task +- task.gfs_forecast: GFS forecast task +- utils.marine_da_utils: Marine data assimilation utilities +- task.fetch: Fetch task + +Attributes +---------- +__docformat__ : str + The documentation format for the module. +__version__ : str + The version of the pygfs package. +pygfs_directory : str + The absolute path to the pygfs package directory. +""" import os from .task.analysis import Analysis -from .task.aero_emissions import AerosolEmissions +from .task.chem_fire_emission import ChemFireEmissions +from .task.nexus_emission import NEXUSEmissions from .task.aero_analysis import AerosolAnalysis from .task.aero_bmatrix import AerosolBMatrix from .task.atm_analysis import AtmAnalysis diff --git a/ush/python/pygfs/task/aero_emissions.py b/ush/python/pygfs/task/aero_emissions.py deleted file mode 100644 index 6c17c135261..00000000000 --- a/ush/python/pygfs/task/aero_emissions.py +++ /dev/null @@ -1,465 +0,0 @@ -#!/usr/bin/env python3 - -import os -import re -import fnmatch -import xarray as xr -from logging import getLogger -from typing import Dict, Any, Union -from dateutil.rrule import DAILY, rrule -from pprint import pformat, pprint - -from wxflow import (AttrDict, - parse_j2yaml, - FileHandler, - logit, - Task, - to_timedelta, - WorkflowException, - Executable, which) - -logger = getLogger(__name__.split('.')[-1]) - - -class AerosolEmissions(Task): - """Chemistry Emissions pre-processing Task - """ - - @logit(logger, name="AeroEmission") - def __init__(self, config: Dict[str, Any]) -> None: - """Constructor for the Aerosol Emissions task - - Parameters - ---------- - config : Dict[str, Any] - Incoming configuration for the task from the environment - - Returns - ------- - None - """ - super().__init__(config) - - self.historical = bool(self.task_config.get('AERO_EMIS_FIRE_HIST', 0)) - nforecast_hours = self.task_config["FHMAX_GFS"] - self.start_date = self.task_config["PDY"] - self.end_date = self.start_date + to_timedelta(f'{nforecast_hours + 24}H') - self.forecast_dates = list(rrule(freq=DAILY, dtstart=self.start_date, until=self.end_date)) - - # # Extend task_config with localdict - # self.task_config = AttrDict(**self.task_config, **localdict) - - @logit(logger) - def initialize(self) -> None: - """Initialize the work directory and process chemical emissions configuration. - - This method performs the following steps: - 1. Loads and parses the chem_emission.yaml.j2 template - 2. Sets up template variables for emission configuration - 3. Creates necessary working directories - 4. Copies required input files to working directory - - Parameters - ---------- - None - - Returns - ------- - None - - Raises - ------ - WorkflowException - If the YAML template file is not found - If required directories cannot be created - If file copying operations fail - - Notes - ----- - The method expects the following configuration to be available: - - HOMEgfs : str - Base directory containing workflow configuration - - DATA : str - Working directory path - - COMOUT_CHEM_HISTORY : str - Output directory for chemical history files - - AERO_EMIS_FIRE_DIR : str - Directory containing fire emission data - - AERO_EMIS_FIRE_VERSION : str - Version of fire emission data (GBBEPx or QFED) - - The configuration is processed through a Jinja2 template system - and the resulting setup is stored in self.task_config. - """ - # # Parse the YAML template - # yaml_template = os.path.join(self.task_config.HOMEgfs, 'parm/chem/chem_emission.yaml.j2') - # if not os.path.exists(yaml_template): - # msg = f'YAML template not found: {yaml_template}' - # logger.error(msg) - # raise WorkflowException(msg) - # else: - # logger.debug(f'Found YAML template: {yaml_template}') - # yamlvars = parse_j2yaml(path=yaml_template, data=self.task_config) - # self.task_config.append(yamlvars) - # print(self.task_config) - - if self.historical: - logger.info(f'Processing historical emissions for {self.start_date} to {self.end_date}') - - # find the forecast dates that are in the historical period for the given emission dataset - for dates in self.forecast_dates: - if self.task_config.AERO_EMIS_FIRE.lower() == 'gbbepx': - files = self._find_gbbepx_files(dates, version=self.task_config.AERO_EMIS_FIRE_VERSION) - elif self.task_config.AERO_EMIS_FIRE.lower() == 'qfed': - files = self._find_qfed_files(dates, version=self.task_config.AERO_EMIS_FIRE_VERSION) - else: - logger.info(f'Processing forecast emissions for {self.start_date}') - - if self.task_config.AERO_EMIS_FIRE.lower() == 'gbbepx': - files = self._find_gbbepx_files( - self.start_date, - version=self.task_config.AERO_EMIS_FIRE_VERSION, - vars=self.task_config.gbbepx_vars - ) - elif self.task_config.AERO_EMIS_FIRE.lower() == 'qfed': - files = self._find_qfed_files( - self.start_date, - version=self.task_config.AERO_EMIS_FIRE_VERSION, - vars=self.task_config.qfed_vars.split() - ) - - # Set up template variables - pprint(self.task_config) - tmpl_dict = { - 'DATA': self.task_config.DATA, - 'COMOUT_CHEM_HISTORY': self.task_config.COMOUT_CHEM_HISTORY, - 'AERO_EMIS_FIRE_DIR': self.task_config.AERO_EMIS_FIRE_DIR, - 'AERO_EMIS_FIRE_VERSION': self.task_config.AERO_EMIS_FIRE_VERSION, - 'historical': self.historical, - 'forecast_dates': self.task_config.get('forecast_dates', []), - 'qfed_vars': self.task_config.get('QFED_VARS', - ["co", - "nox", - "so2", - "nh3", - "bc", - "oc"]), - 'gbbepx_vars': self.task_config.get('GBBEPX_VARS', - ["co", - "nox", - "so2", - "nh3", - "bc", - "oc"]), - "files_in": files - } - - # Parse template and update task configuration - logger.debug(f'Parsing YAML template: {yaml_template}') - yaml_config = parse_j2yaml(yaml_template, tmpl_dict) - self.task_config.update(yaml_config.get('chem_emission', {})) - - # Create directories - for dir_path in self.task_config.data_in.mkdir: - logger.info(f'Creating directory: {dir_path}') - os.makedirs(dir_path, exist_ok=True) - - # Copy input files - fh = FileHandler() - for file_pair in self.task_config.data_in.copy: - src = file_pair[0] - dst = os.path.join(self.task_config.DATA, os.path.basename(src)) - logger.info(f'Copying {src} to {dst}') - fh.copy(src, dst) - - @logit(logger) - def _get_unique_months(self): - """Extract unique months from forecast dates. - - This method finds all unique months present in the forecast dates - range. Useful for monthly-based emissions processing. - - Returns - ------- - set - Set of unique months as zero-padded strings (01-12) - - Notes - ----- - Uses self.forecast_dates which should be populated during initialization - Months are returned as strings with leading zeros (e.g., '01' for January) - """ - months = set(f"{date.month:02d}" for date in self.forecast_dates) - years = set(date.year for date in self.forecast_dates) - return months, years - - @logit(logger) - def execute(self, workdir: Union[str, os.PathLike]) -> None: - """Process emission files based on configuration. - - For GBBEPx files, converts them to COARDS compliant format and renames - according to template pattern. - - Parameters - ---------- - workdir : str | os.PathLike - work directory with the staged data - - Returns - ------- - None - - Notes - ----- - Uses GBBEPX_TEMPLATE from config to rename processed files - """ - logger.info(f"Processing emission files in {workdir}") - - if self.task_config.AERO_EMIS_FIRE.lower() == 'gbbepx': - # Process each GBBEPx file - for file_path in os.listdir(workdir): - if file_path.startswith('GBBEPx'): - full_path = os.path.join(workdir, file_path) - logger.info(f"Processing GBBEPx file: {file_path}") - - # Extract date from filename using regex - match = re.search(r"c(\d{8}).", file_path) - if not match: - logger.warning(f"Could not extract date from {file_path}, skipping") - continue - - current_date = match.group(1) - - # Convert to COARDS format - ds = self.GBBEPx_to_COARDS(full_path) - - # Generate new filename from template - template = self.task_config.config.GBBEPX_TEMPLATE - new_name = template.replace('YYYYMMDD', current_date) - output_path = os.path.join(workdir, f"processed_{new_name}") - - logger.info(f"Saving processed file to: {output_path}") - ds.to_netcdf(output_path) - - elif self.task_config.AERO_EMIS_FIRE.lower() == 'qfed': - logger.info("QFED files do not require processing, skipping execute step") - return - - logger.info("Emission processing complete") - - @logit(logger) - def finalize(self) -> None: - """Perform closing actions of the task. - Copy processed files from the DATA directory to COMOUT_CHEM_HISTORY. - - Returns - ------- - None - - Notes - ----- - Only copies processed GBBEPx files or QFED files based on configuration - Uses FileHandler for reliable file operations with logging - """ - logger.info("Finalizing chemical emissions processing") - - fh = FileHandler() - data_dir = self.task_config.DATA - comout_dir = self.task_config.COMOUT_CHEM_HISTORY - - if self.task_config.AERO_EMIS_FIRE.lower() == 'gbbepx': - pattern = "processed_GBBEPx*.nc" - else: - pattern = "qfed*.nc" - - processed_files = [] - for file_name in os.listdir(data_dir): - if fnmatch.fnmatch(file_name, pattern): - src = os.path.join(data_dir, file_name) - dst = os.path.join(comout_dir, file_name) - logger.info(f"Copying {src} to {dst}") - fh.copy(src, dst) - processed_files.append(file_name) - - self.task_config.update({'processed_files': processed_files}) - logger.info("Chemical emissions finalization complete") - - @logit(logger) - def _find_gbbepx_files(self, dates, version='v5r0'): - """Find GBBEPx files for the given date - - Parameters - ---------- - dates : str - Date for which to find GBBEPx files - version : str - Version of GBBEPx files to search for - - Returns - ------- - List[str] - List of GBBEPx files for the given date - """ - logger.info(f'Finding GBBEPx files for {dates}') - - # Find all possible months - months = self._get_unique_months() - - files_found = [] - # Find all possible files - for mon in months: - emis_file_dir = os.path.join(self.task_config.AERO_EMIS_FIRE_DIR, version, mon) - all_files = os.listdir(emis_file_dir) - - matching_files = [] - - pattern = r"s(\d{8})_e(\d{8})_c(\d{8})" - - for file_name in all_files: - match = re.match(pattern, file_name) - if match: - # start_date = match.group(1) - # end_date = match.group(2) - create_date = match.group(3) - - if dates[0] <= create_date and dates[-1] <= create_date: - matching_files.append(file_name) - files_found.extend(matching_files) - - return files_found - - @logit(logger) - def _find_qfed_files(self, dates, vars, version='061'): - """Find QFED files for the given date - - Parameters - ---------- - dates : str or datetime - Date for which to find QFED files - vars : list - List of variables to search for (e.g., bc, oc, co, etc.) - version : str - Version of QFED files to search for - - Returns - ------- - List[str] - List of QFED files for the given date - """ - logger.info(f'Finding QFED files for {dates}') - - # ensure version is a string - version = str(version).zfill(3) - - # Convert single date to list for consistent processing - if not isinstance(dates, list): - dates = [dates] - - # Format dates properly - date_strings = [d.strftime('%Y%m%d') if hasattr(d, 'strftime') else str(d) for d in dates] - - files_found = [] - - for date in dates: - # Extract year and month from the date - if hasattr(date, 'year') and hasattr(date, 'month'): - year = str(date.year) - month = f"{date.month:02d}" - else: - # If date is a string, try to parse it - date_str = str(date) - if len(date_str) >= 8: # YYYYMMDD format - year = date_str[:4] - month = date_str[4:6] - else: - logger.warning(f"Cannot parse date format: {date}") - continue - - emis_file_dir = os.path.join(self.task_config.AERO_EMIS_FIRE_DIR, year, month) - - if not os.path.exists(emis_file_dir): - logger.warning(f"Directory does not exist: {emis_file_dir}") - continue - - # Format date string for file matching - date_str = date.strftime('%Y%m%d') if hasattr(date, 'strftime') else str(date) - if len(date_str) > 8: # Format may be YYYY-MM-DD - date_str = date_str.replace('-', '') - - for v in vars: - # Match pattern like qfed2.emis_bc.{version}.20200118.nc4 - v_pattern = f"qfed2.emis_{v}.{version}.{date_str}.nc4" - full_path = os.path.join(emis_file_dir, v_pattern) - - # If exact match exists - if os.path.exists(full_path): - files_found.append(full_path) - logger.debug(f"Found exact QFED file: {full_path}") - - if not full_path: - logger.warning(f"File not found: {full_path}") - if not files_found: - logger.warning(f"No QFED files found for dates {date_strings} and variables {vars}") - - return files_found - - @logit(logger) - def GBBEPx_to_COARDS(fname: Union[str, os.PathLike]) -> xr.Dataset: - """Convert GBBEPx file to COARDS compliant format - - Parameters - ---------- - fname : str | os.PathLike - Input GBBEPx file path - - Returns - ------- - xr.Dataset - COARDS compliant dataset - """ - logger.info(f"Converting {fname} to COARDS format") - f = xr.open_dataset(fname, decode_cf=False) - - # Handle time dimension - if 'Time' in f.dims: - f = f.rename({"Time": 'time'}) - f.time.attrs['long_name'] = 'time' - - # Modify latitude and longitude attributes - f = f.rename({'Longitude': 'lon', 'Latitude': 'lat'}) - - # Validate and normalize coordinates - # Check longitude range and monotonicity - if not (f.lon.diff('lon') > 0).all(): - raise WorkflowException("Longitude values must be strictly increasing") - - # Ensure longitude is in [-180, 180] range - f['lon'] = xr.where(f.lon > 180, f.lon - 360, f.lon) - f = f.sortby('lon') # Sort after potential wrapping - - # Check latitude monotonicity - if not (f.lat.diff('lat') > 0).all(): - raise WorkflowException("Latitude values must be strictly increasing") - - f.lon.attrs.update({'long_name': 'Longitude', 'units': 'degrees_east'}) - f.lat.attrs.update({'long_name': 'Latitude', 'units': 'degrees_north'}) - - # Remove Element dimension if present - if 'Element' in f.dims: - f = f.drop_dims('Element') - - # Update variable attributes - for v in f.data_vars: - if v not in ['FirePerc', 'QCAll', 'NumSensor', 'CloudPerc']: - f[v].attrs['_FillValue'] = -9999.0 - elif v == 'FirePerc': - f[v].attrs.update({'units': '-', 'long_name': 'percent_of_fire_in_grid_cell'}) - elif v == 'CloudPerc': - f[v].attrs.update({'units': '-', 'long_name': 'percent_of_clouds_in_grid_cell'}) - elif v == 'NumSensor': - f[v].attrs['units'] = '-' - - # Set global attributes - f.attrs.update({'format': 'NetCDF', 'title': 'GBBEPx Fire Emissions'}) - - return f diff --git a/ush/python/pygfs/task/analysis.py b/ush/python/pygfs/task/analysis.py index 2c9a98dc9d4..0111fca0444 100644 --- a/ush/python/pygfs/task/analysis.py +++ b/ush/python/pygfs/task/analysis.py @@ -73,9 +73,9 @@ def __init__(self, config: Dict[str, Any]): 'WINDOW_END': _window_end, 'WINDOW_LENGTH': f"PT{self.task_config.assim_freq}H", 'next_cycle': _next_cycle, - 'OPREFIX': f"{self.task_config.RUN.replace('enkf','')}.t{self.task_config.cyc:02d}z.", - 'APREFIX': f"{self.task_config.RUN.replace('enkf','')}.t{self.task_config.cyc:02d}z.", - 'APREFIX_ENS': f"enkf{self.task_config.RUN.replace('enkf','')}.t{self.task_config.cyc:02d}z.", + 'OPREFIX': f"{self.task_config.RUN.replace('enkf', '')}.t{self.task_config.cyc:02d}z.", + 'APREFIX': f"{self.task_config.RUN.replace('enkf', '')}.t{self.task_config.cyc:02d}z.", + 'APREFIX_ENS': f"enkf{self.task_config.RUN.replace('enkf', '')}.t{self.task_config.cyc:02d}z.", 'GPREFIX': f"{_da_prefix}.t{self.task_config.previous_cycle.hour:02d}z.", 'GPREFIX_ENS': f"enkf{_da_prefix}.t{self.task_config.previous_cycle.hour:02d}z.", 'OCNRES': f"{self.task_config.OCNRES:03d}", diff --git a/ush/python/pygfs/task/chem_fire_emission.py b/ush/python/pygfs/task/chem_fire_emission.py new file mode 100644 index 00000000000..90362bb7719 --- /dev/null +++ b/ush/python/pygfs/task/chem_fire_emission.py @@ -0,0 +1,946 @@ +#!/usr/bin/env python3 + +import os +import re +import datetime +import xarray as xr +import shutil +from logging import getLogger +from typing import Dict, Any, Union, List +from dateutil.rrule import DAILY, rrule +import traceback +from wxflow import (AttrDict, + parse_j2yaml, + FileHandler, + logit, + Task, + to_timedelta, + WorkflowException) +logger = getLogger(__name__.split('.')[-1]) + + +class ChemFireEmissions(Task): + """Chemistry Emissions pre-processing Task + """ + + @logit(logger, name="ChemFireEmissions") + def __init__(self, config: Dict[str, Any]) -> None: + """Constructor for the Chemistry Fire Emissions task + + Parameters + ---------- + config : Dict[str, Any] + Incoming configuration for the task from the environment + + Returns + ------- + None + """ + super().__init__(config) + + self.historical = bool(self.task_config.get('AERO_EMIS_FIRE_HIST', 1)) + logger.info(f"Historical emissions flag: {self.historical}") + self.AERO_INPUTS_DIR = self.task_config.get('AERO_INPUTS_DIR', None) + self.COMOUT_CHEM_INPUT = self.task_config.get('COMOUT_CHEM_INPUT', None) + + # get the nforecast hours - gcdas will use FHMAX and gcafs will use FHMAX_GFS + if 'das' in self.task_config['RUN']: + nforecast_hours = self.task_config["FHMAX"] + else: + nforecast_hours = self.task_config["FHMAX_GFS"] + logger.info(f"Number of forecast hours: {nforecast_hours}") + + logger.info(f"current cycle : {self.task_config['current_cycle']}") + self.start_date = self.task_config["current_cycle"] - to_timedelta('24H') # include previous day + logger.info(f"Start date: {self.start_date}") + + # end date = SDATE + nforecast hours + 36 + self.end_date = self.task_config["current_cycle"] + to_timedelta(f'{nforecast_hours + 36}H') + logger.info(f"End date: {self.end_date}") + + # Calculate number of days spanned by start and end date (inclusive) + numdays = (self.end_date.date() - self.start_date.date()).days + 1 + logger.info(f"Number of days in forecast period: {numdays}") + + self.forecast_dates = list(rrule(freq=DAILY, dtstart=self.start_date, count=numdays)) + logger.info(f"Forecast dates: {self.forecast_dates}") + + @logit(logger) + def initialize(self) -> None: + """Initialize the work directory and process chemical emissions configuration. + + This method performs the following steps: + 1. Loads and parses the fire_emission.yaml.j2 template + 2. Sets up template variables for emission configuration + 3. Creates necessary working directories + 4. Copies required input files to working directory + 5. Sets up forecast dates and file paths for each date + + Parameters + ---------- + None + + Returns + ------- + None + + Raises + ------ + WorkflowException + If the YAML template file is not found + If required directories cannot be created + If file copying operations fail + + Notes + ----- + The method expects the following configuration to be available: + - HOMEgfs : str + Base directory containing workflow configuration + - DATA : str + Working directory path + - COMOUT_CHEM_INPUT : str + Output directory for chemical input files + - AERO_EMIS_FIRE_DIR : str + Directory containing fire emission data + - AERO_EMIS_FIRE_VERSION : str + Version of fire emission data (GBBEPx or QFED) + + The configuration is processed through a Jinja2 template system + and the resulting setup is stored in self.task_config. + """ + + if self.historical: + logger.info(f'Processing historical emissions for {self.start_date} to {self.end_date}') + + # print(self.task_config) + aero_inputs_dir = str(self.task_config.AERO_INPUTS_DIR) + aero_emis_fire = str(self.task_config.AERO_EMIS_FIRE) + aero_emis_fire_version = str(self.task_config.AERO_EMIS_FIRE_VERSION) + + logger.info(f'Using AERO_INPUTS_DIR: {aero_inputs_dir}') + logger.info(f'Using AERO_EMIS_FIRE: {aero_emis_fire}') + logger.info(f'Using AERO_EMIS_FIRE_VERSION: {aero_emis_fire_version}') + + fire_emission_template = os.path.join(self.task_config.HOMEgfs, 'parm', 'chem', 'fire_emission.yaml.j2') + if not os.path.exists(fire_emission_template): + raise WorkflowException(f"Fire emission template file not found: {fire_emission_template}") + + if os.path.exists(self.task_config.FIRE_EMIS_DIR): + logger.info(f"AERO_EMIS_FIRE_DIR already set: {self.task_config.FIRE_EMIS_DIR}") + AERO_EMIS_FIRE_DIR = self.task_config.FIRE_EMIS_DIR + else: + logger.info("AERO_EMIS_FIRE_DIR not set, constructing from AERO_INPUTS_DIR and AERO_EMIS_FIRE") + AERO_EMIS_FIRE_DIR = os.path.join(aero_inputs_dir, + "nexus", + aero_emis_fire.upper()) + + logger.info(f'Final AERO_EMIS_FIRE_DIR: {AERO_EMIS_FIRE_DIR}') + + # find the forecast dates that are in the historical period for the given emission dataset + files_found = [] + for dates in self.forecast_dates: + if self.task_config.AERO_EMIS_FIRE.lower() == 'gbbepx': + files = self._find_gbbepx_files(dates, + version=self.task_config.AERO_EMIS_FIRE_VERSION, + aero_emis_fire_dir=AERO_EMIS_FIRE_DIR) + elif self.task_config.AERO_EMIS_FIRE.lower() == 'qfed': + + qfed_vars = self.task_config.get('qfed_vars', ["co", "nox", "so2", "nh3", "bc", "oc"]) + files = self._find_qfed_files(dates, + qfed_vars, + version=self.task_config.AERO_EMIS_FIRE_VERSION, + aero_emis_fire_dir=AERO_EMIS_FIRE_DIR) + files_found.extend(files) + logger.info(f'Found {len(files_found)} files for historical period') + self.task_config["AERO_EMIS_FIRE_DIR"] = AERO_EMIS_FIRE_DIR + else: + # =============================================== + # NRT Forecast emissions + # =============================================== + logger.info(f'Processing forecast emissions for {self.start_date}') + + # GBBEPx NRT files are in a different directory structure + # Render the template with the current cycle to get the correct path + tmp_dict = {'sdate': self.start_date, + 'FIRE_EMIS_NRT_DIR': self.task_config.FIRE_EMIS_NRT_DIR} + yaml_config = self.render_template(tmp_dict) + if self.task_config.AERO_EMIS_FIRE.lower() == 'gbbepx': + self.task_config['AERO_EMIS_FIRE_DIR'] = yaml_config.fire_emission.config.NRT_DIRECTORY + files_found = self._find_gbbepx_nrt_fires(yaml_config.fire_emission.config.NRT_DIRECTORY) + logger.info(f'Found {len(files_found)} GBBEPx NRT files for {self.start_date}') + logger.info(f"files found: {files_found}") + elif self.task_config.AERO_EMIS_FIRE.lower() == 'qfed': + # Get QFED variables with safe defaults + qfed_vars = self.task_config.get('qfed_vars', ["co", "nox", "so2", "nh3", "bc", "oc"]) + if isinstance(qfed_vars, str): + qfed_vars = qfed_vars.split() + # Ensure version is properly formatted + version = self.task_config.AERO_EMIS_FIRE_VERSION + if isinstance(version, int) or version.isdigit(): + version = str(version).zfill(3) # Pad with leading zeros if needed + + # Get fire emissions directory + aero_emis_fire_dir = getattr(self.task_config, 'AERO_EMIS_FIRE_DIR', None) + + files_found = self._find_qfed_files( + self.start_date, + vars=qfed_vars, + version=version, + aero_emis_fire_dir=aero_emis_fire_dir + ) + + # Fill the COMOUT_CHEM_INPUT with environment variables to create the full output path + processed_files = [] + for dt in self.forecast_dates: + processed_files.append( + dt.strftime("FIRE_EMIS_%Y%m%d.nc") + ) + + # Debug output for chemistry history directory + logger.info(f"Outputing files prescribed to {self.task_config.COMOUT_CHEM_INPUT}") + tmpl_dict = { + 'DATA': self.task_config.DATA, + 'COMOUT_CHEM_INPUT': self.task_config.COMOUT_CHEM_INPUT, + 'AERO_EMIS_FIRE_DIR': self.task_config.AERO_EMIS_FIRE_DIR, + 'AERO_EMIS_FIRE_VERSION': self.task_config.AERO_EMIS_FIRE_VERSION, + 'historical': self.historical, + 'forecast_dates': self.task_config.get('forecast_dates', []), + 'qfed_vars': self.task_config.get('qfed_vars', ["co", "nox", "so2", "nh3", "bc", "oc"]), + 'gbbepx_vars': ["co", "nox", "so2", "nh3", "bc", "oc"], + "rawfiles": files_found, + "startdate": self.start_date.strftime('%Y%m%d'), + "processed_files": processed_files, + "nmem_ens": self.task_config.NMEM_ENS, + } + + # Parse template and update task configuration + yaml_template = os.path.join(self.task_config.HOMEgfs, 'parm', 'chem', 'fire_emission.yaml.j2') + if not os.path.exists(yaml_template): + logger.warning(f"Template file not found: {yaml_template}, using default configuration") + yaml_config = {'fire_emission': {}} + else: + logger.debug(f'Parsing YAML template: {yaml_template}') + yaml_config = parse_j2yaml(yaml_template, tmpl_dict) + + self.task_config = AttrDict(**self.task_config, **yaml_config) + + # Create working directory and sync files using FileHandler + FileHandler(yaml_config.fire_emission.data_in).sync() + + input_files = {"rawfiles": [os.path.join(self.task_config.DATA, os.path.basename(file)) for file in files_found]} + self.task_config = AttrDict(**self.task_config, **input_files) + + @logit(logger) + def execute(self) -> None: + """Process emission files based on configuration. + + For GBBEPx files, converts them to COARDS compliant format and renames + according to template pattern. + For QFED files, combines all data into separate files for each forecast date. + + Parameters + ---------- + None + + Returns + ------- + None + + Notes + ----- + - Uses the task_config to determine the type of emissions to process + - For GBBEPx, it uses the GBBEPx_to_COARDS method to convert files + - For QFED, it combines files by date using the combine_qfed_files method + - Creates a separate output file for each date in self.forecast_dates + - Output files are named with pattern FIRE_EMIS_YYYYMMDD.nc for each date + - The processed files are added to the task_config for later use + - Uses the FileHandler for file operations + - Uses the logit decorator for logging + - Uses decode_cf=False when processing QFED files + """ + logger.info(f"Processing emission files in {self.task_config.DATA}") + + workdir = self.task_config.DATA + + processed_files = [] + + if self.task_config.AERO_EMIS_FIRE.lower() == 'gbbepx': + # Process GBBEPx files separately for each date + processed_files.extend(self._process_gbbepx_files(workdir)) + + elif self.task_config.AERO_EMIS_FIRE.lower() == 'qfed': + # Process QFED files for each forecast date + processed_files.extend(self._process_qfed_files(workdir)) + else: + logger.warning(f"Unknown AERO_EMIS_FIRE type: {self.task_config.AERO_EMIS_FIRE}") + raise WorkflowException(f"Unsupported AERO_EMIS_FIRE type: {self.task_config.AERO_EMIS_FIRE}") + + # Add processed files to task_config + outdict = {'processed_files': processed_files} + self.task_config = AttrDict(**self.task_config, **outdict) + + logger.info("Emission processing execute phase complete") + + @logit(logger) + def finalize(self) -> None: + """Perform closing actions of the task. + Copy processed files from the DATA directory to COMOUT_CHEM_INPUT. + + Returns + ------- + None + + Notes + ----- + Only copies processed GBBEPx files or QFED files based on configuration + Uses FileHandler for reliable file operations with logging + """ + logger.info("Finalizing chemical emissions processing") + + FileHandler(self.task_config.fire_emission.data_out).sync() + + logger.info("Chemical emissions finalization complete") + + @logit(logger) + def _get_unique_months(self): + """Extract unique months and years from forecast dates. + + This method finds all unique months and years present in the forecast dates + range. Useful for monthly-based emissions processing. + + Returns + ------- + tuple + A tuple containing: + - set of unique months as zero-padded strings (01-12) + - set of unique years as integers + + Notes + ----- + Uses self.forecast_dates which should be populated during initialization + Months are returned as strings with leading zeros (e.g., '01' for January) + Years are returned as integers + """ + months = set(f"{date.month:02d}" for date in self.forecast_dates) + years = set(date.year for date in self.forecast_dates) + return months, years + + @logit(logger) + def _find_gbbepx_nrt_fires(self, NRT_DIRECTORY: str) -> List[str]: + """Find GBBEPx NRT fire files in the specified directory. + + Parameters + ---------- + emis_file_dir : str + Directory to search for GBBEPx NRT fire files + + Returns + ------- + List[str] + List of found GBBEPx NRT fire files + + Notes + ----- + Searches for files matching the pattern "GBBEPx-all01GRID_v4r0_blend_sYYYYMMDD000000_eYYYYMMDD235959_cYYYYMMDDHHMMSS.nc" + where YYYYMMDD represents the date components. + """ + logger.info(f'Finding GBBEPx NRT fire files in {NRT_DIRECTORY}') + dates_to_look_for = range(0, 3) # today and two previous days + + for find_date_index in dates_to_look_for: + find_date = self.start_date - datetime.timedelta(days=find_date_index) + logger.info(f'Looking for files for date: {find_date.strftime("%Y%m%d")}') + NRT_DIRECTORY = NRT_DIRECTORY.replace(self.start_date.strftime('%Y%m%d'), + find_date.strftime('%Y%m%d')) + if not os.path.exists(NRT_DIRECTORY): + logger.warning(f"Directory does not exist: {NRT_DIRECTORY}") + continue + else: + break + + if not os.path.exists(NRT_DIRECTORY): + logger.error(f"Could not find a valid NRT_DIRECTORY for GBBEPx files") + return [] + + all_files = os.listdir(NRT_DIRECTORY) + matching_files = [] + logger.info(f"Searching in directory: {NRT_DIRECTORY}") + logger.debug(f"Total files in directory: {len(all_files)} files") + logger.debug(f"Files found in directory: {all_files}") + # Look for pattern: "GBBEPx-all01GRID_v4r0_blend_s202302240000000_e202302242359590_c202302250134090.nc" + pattern = r"GBBEPx-all01GRID.*_s(\d{8}).*_e(\d{8}).*\.nc" + + for file_name in all_files: + match = re.match(pattern, file_name) + if match: + full_path = os.path.join(NRT_DIRECTORY, file_name) + matching_files.append(full_path) + logger.debug(f"Found GBBEPx NRT fire file: {full_path}") + + # Remove duplicates while preserving order (safety check) + unique_files = [] + seen = set() + for file_path in matching_files: + if file_path not in seen: + unique_files.append(file_path) + seen.add(file_path) + + if len(unique_files) < len(matching_files): + logger.info(f"Found {len(unique_files)} unique GBBEPx NRT files (removed {len(matching_files) - len(unique_files)} duplicates)") + + return unique_files + + @logit(logger) + def _find_gbbepx_files(self, dates, aero_emis_fire_dir=None, version='v5r0'): + """Find GBBEPx files for the given date + + Parameters + ---------- + dates : str or list + Date or dates for which to find GBBEPx files + version : str + Version of GBBEPx files to search for + + Returns + ------- + List[str] + List of GBBEPx files for the given date(s) + """ + logger.info(f'Finding GBBEPx files for {dates}') + + # Find all possible months + months, years = self._get_unique_months() + + # Format dates properly for matching + if not isinstance(dates, list): + dates = [dates] + date_strings = [d.strftime('%Y%m%d') if hasattr(d, 'strftime') else str(d) for d in dates] + + files_found = [] + # Find all possible files + if not os.path.exists(aero_emis_fire_dir): + logger.warning(f"Directory does not exist: {aero_emis_fire_dir}") + return files_found + + for mon in months: + + emis_file_dir = aero_emis_fire_dir + + all_files = os.listdir(emis_file_dir) + + matching_files = [] + + # Look for both file patterns: + # Pattern 1: "GBBEPx-all01GRID_v4r0_blend_s202302240000000_e202302242359590_c202302250134090.nc" + # Pattern 2: "GBBEPx_all01GRID.emissions_v004_20150716.nc" + for file_name in all_files: + match_found = False + + # Try pattern 1 with s/e/c date format + pattern1 = r"GBBEPx-all01GRID.*_s(\d{8}).*_e(\d{8}).*\.nc" + match = re.match(pattern1, file_name) + if match: + start_date = match.group(1) + # end_date = match.group(2) + create_date = match.group(3) + + # Check if the file's date matches any of our target dates + for date_str in date_strings: + # Match if the file start date is within our target dates + if date_str in start_date: + full_path = os.path.join(emis_file_dir, file_name) + matching_files.append(full_path) + logger.debug(f"Found GBBEPx file (pattern 1): {full_path}") + match_found = True + break + + # If no match yet, try pattern 2 with YYYYMMDD format at the end + if not match_found and "GBBEPx" in file_name: + pattern2 = r".*_(\d{8})\.nc" + match = re.match(pattern2, file_name) + if match: + file_date = match.group(1) + + # Check if the file's date matches any of our target dates + for date_str in date_strings: + if date_str in file_date: + full_path = os.path.join(emis_file_dir, file_name) + matching_files.append(full_path) + logger.debug(f"Found GBBEPx file (pattern 2): {full_path}") + break + + files_found.extend(matching_files) + + # Remove duplicates while preserving order + unique_files = [] + seen = set() + for file_path in files_found: + if file_path not in seen: + unique_files.append(file_path) + seen.add(file_path) + + logger.info(f"Found {len(unique_files)} unique GBBEPx files (removed {len(files_found) - len(unique_files)} duplicates)") + return unique_files + + @logit(logger) + def _find_qfed_files(self, dates, vars, version='061', aero_emis_fire_dir=None): + """Find QFED files for the given date(s) + + Parameters + ---------- + dates : str, datetime, or list + Date or dates for which to find QFED files + vars : list + List of variables to search for (e.g., bc, oc, co, etc.) + version : str + Version of QFED files to search for, will be zero-padded to 3 digits + aero_emis_fire_dir : str, optional + Directory containing fire emission data. If None, uses self.task_config.AERO_EMIS_FIRE_DIR + + Returns + ------- + List[str] + List of QFED files for the given date(s) and variables + """ + logger.info(f'Finding QFED files for {dates}') + + # Use provided directory or fall back to config value + logger.info(f'Using emissions directory: {aero_emis_fire_dir}') + + # ensure version is a string + version = str(version).zfill(3) + + # Convert single date to list for consistent processing + if not isinstance(dates, list): + dates = [dates] + + # Format dates properly + date_strings = [d.strftime('%Y%m%d') if hasattr(d, 'strftime') else str(d) for d in dates] + + files_found = [] + + for date in dates: + # Extract year and month from the date + if hasattr(date, 'year') and hasattr(date, 'month'): + year = str(date.year) + month = f"{date.month:02d}" + else: + # If date is a string, try to parse it + date_str = str(date) + if len(date_str) >= 8: # YYYYMMDD format + year = date_str[:4] + month = date_str[4:6] + else: + logger.warning(f"Cannot parse date format: {date}") + continue + + emis_file_dir = os.path.join(aero_emis_fire_dir, year, month) + + if not os.path.exists(emis_file_dir): + logger.warning(f"Directory does not exist: {emis_file_dir}") + continue + + # Format date string for file matching + date_str = date.strftime('%Y%m%d') if hasattr(date, 'strftime') else str(date) + if len(date_str) > 8: # Format may be YYYY-MM-DD + date_str = date_str.replace('-', '') + + for v in vars: + # Match pattern like qfed2.emis_bc.{version}.20200118.nc4 + v_pattern = f"qfed2.emis_{v}.{version}.{date_str}.nc4" + full_path = os.path.join(emis_file_dir, v_pattern) + + # If exact match exists + if os.path.exists(full_path): + files_found.append(full_path) + logger.debug(f"Found exact QFED file: {full_path}") + + if not files_found: + logger.warning(f"No QFED files found for dates {date_strings} and variables {vars}") + else: + # Remove duplicates while preserving order + unique_files = [] + seen = set() + for file_path in files_found: + if file_path not in seen: + unique_files.append(file_path) + seen.add(file_path) + + if len(unique_files) < len(files_found): + logger.info(f"Found {len(unique_files)} unique QFED files (removed {len(files_found) - len(unique_files)} duplicates)") + files_found = unique_files + + return files_found + + @logit(logger) + def GBBEPx_to_COARDS(self, fname: Union[str, os.PathLike]) -> xr.Dataset: + """Convert GBBEPx file to COARDS compliant format + + Parameters + ---------- + fname : str | os.PathLike + Input GBBEPx file path + + Returns + ------- + xr.Dataset + COARDS compliant dataset + """ + logger.info(f"Converting {fname} to COARDS format") + f = xr.open_dataset(fname, decode_cf=False) + f = f[['OC', 'BC', 'SO2', 'NOx', 'CO', 'NH3']] + if 'time' in f.dims and 'lon' in f.dims and 'lat' in f.dims: + logger.info("File already in COARDS format") + return None # Already in COARDS format + + # Handle time dimension + if 'Time' in f.dims: + f = f.rename({"Time": 'time'}) + f.time.attrs['long_name'] = 'time' + + # Modify latitude and longitude attributes + f = f.rename({'Longitude': 'lon', 'Latitude': 'lat'}) + + # Validate and normalize coordinates + # Check longitude range and monotonicity + if not (f.lon.diff('lon') > 0).all(): + raise WorkflowException("Longitude values must be strictly increasing") + + # Ensure longitude is in [-180, 180] range + f['lon'] = xr.where(f.lon > 180, f.lon - 360, f.lon) + f = f.sortby('lon') # Sort after potential wrapping + + # Check latitude monotonicity + if not (f.lat.diff('lat') > 0).all(): + raise WorkflowException("Latitude values must be strictly increasing") + + f.lon.attrs.update({'long_name': 'Longitude', 'units': 'degrees_east'}) + f.lat.attrs.update({'long_name': 'Latitude', 'units': 'degrees_north'}) + + # remove unnessicary attributes + f['lat'].attrs.pop('valid_range', None) + f['lat'].attrs.pop('scale_factor', None) + f['lat'].attrs.pop('add_offset', None) + f['lat'].attrs.pop('_FillValue', None) + f['time'].attrs.pop('begin_date', None) + f['time'].attrs.pop('begin_time', None) + f['time'].attrs.pop('time_increment', None) + f['time'].attrs.pop('calendar', None) + + # Remove Element dimension if present + if 'Element' in f.dims: + f = f.drop_dims('Element') + + # Update variable attributes + for v in f.data_vars: + if v not in ['FirePerc', 'QCAll', 'NumSensor', 'CloudPerc']: + f[v].attrs['_FillValue'] = -9999.0 + elif v == 'FirePerc': + f[v].attrs.update({'units': '-', 'long_name': 'percent_of_fire_in_grid_cell'}) + elif v == 'CloudPerc': + f[v].attrs.update({'units': '-', 'long_name': 'percent_of_clouds_in_grid_cell'}) + elif v == 'NumSensor': + f[v].attrs['units'] = '-' + if 'coordinates' in f[v].attrs: + del f[v].attrs['coordinates'] + + # Set global attributes + f.attrs.update({'format': 'NetCDF', 'title': 'GBBEPx Fire Emissions'}) + + return f + + @logit(logger) + def combine_qfed_files(self, qfed_files: List[str], output_path: str = None) -> xr.Dataset: + """Combine multiple QFED emission files into a single NetCDF file. + + Parameters + ---------- + qfed_files : List[str] + List of QFED file paths to combine + output_path : str, optional + Path where to save the combined file. If None, returns the dataset without saving. + + Returns + ------- + xr.Dataset + Combined dataset containing all QFED variables + + Notes + ----- + This function loads each file individually and combines them without using dask. + Uses decode_cf=False as required for QFED files. + Preprocessing renames biomass variables to uppercase emission type (e.g., biomass -> BC), + as well as related variables like biomass_tf -> BC_tf to avoid conflicts during merge. + Files are grouped by variable type, processed, and then combined using merge with compat='override'. + """ + if not qfed_files: + logger.warning("No QFED files provided to combine") + return None + + logger.info(f"Combining {len(qfed_files)} QFED files") + + try: + # Group files by variable type for easier processing + var_groups = {} + for file_path in qfed_files: + file_name = os.path.basename(file_path) + if "qfed2.emis_" in file_name: + parts = file_name.split('.') + if len(parts) >= 3: + var_type = parts[1].split('_')[1].lower() # Extract variable after emis_ + if var_type not in var_groups: + var_groups[var_type] = [] + var_groups[var_type].append(file_path) + + # Process each variable group + datasets_by_var = {} + for var_type, files in var_groups.items(): + logger.info(f"Processing {len(files)} files for variable: {var_type}") + var_datasets = [] + + for file_path in files: + logger.info(f"Opening file: {file_path}") + # Open dataset + ds = xr.open_dataset(file_path, decode_cf=False) + + # Get uppercase variable name for this type + var_name = var_type.upper() + + # Find all variables that need to be renamed for this emission type + rename_dict = {} + for dvar in ds.data_vars: + # Main biomass variable + if dvar == 'biomass': + rename_dict[dvar] = var_name + # Related variables like biomass_tf, biomass_xxx, etc. + elif dvar.startswith('biomass_'): + # Keep the suffix but prefix with the variable type + suffix = dvar[8:] # Get part after 'biomass_' + rename_dict[dvar] = f"{var_name}_{suffix}" + + # Rename all identified variables + if rename_dict: + logger.info(f"Renaming variables in {os.path.basename(file_path)}: {rename_dict}") + ds = ds.rename(rename_dict) + + var_datasets.append(ds) + + # Concatenate datasets for this variable along the time dimension if needed + if len(var_datasets) > 1: + try: + concat_ds = xr.concat(var_datasets, dim='time') + datasets_by_var[var_type] = concat_ds + except (ValueError, KeyError) as e: + logger.warning(f"Could not concatenate along time for {var_type}: {e}") + # If concatenation fails, just use the first dataset + datasets_by_var[var_type] = var_datasets[0] + for ds in var_datasets[1:]: + ds.close() + else: + datasets_by_var[var_type] = var_datasets[0] + + # Merge datasets across different variables + if datasets_by_var: + var_list = list(datasets_by_var.values()) + combined_ds = var_list[0] + + # Merge remaining datasets with compat='override' to handle conflicting values + for i in range(1, len(var_list)): + combined_ds = combined_ds.merge(var_list[i], compat='override') + + # Add global attributes + combined_ds.attrs.update({ + 'title': 'Combined QFED emissions', + 'source': 'QFED', + 'created_by': 'AerosolEmissions.combine_qfed_files', + 'creation_date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + }) + + # Save to file if output path is provided + if output_path: + logger.info(f"Saving combined QFED dataset to {output_path}") + combined_ds.to_netcdf(output_path) + + # Close individual datasets to free memory + for ds_list in datasets_by_var.values(): + if hasattr(ds_list, 'close'): + ds_list.close() + + return combined_ds + else: + logger.warning("No valid datasets found to combine") + return None + + except Exception as e: + logger.error(f"Error combining QFED files: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + return None + + @logit(logger) + def _process_gbbepx_files(self, workdir: str) -> List[str]: + """Process GBBEPx files for each forecast date. + + Parameters + ---------- + workdir : str + Working directory path where processed files will be saved + + Returns + ------- + List[str] + List of processed file paths + + Notes + ----- + This method processes GBBEPx files for each forecast date by: + 1. Filtering raw files by date (if date filtering logic is implemented) + 2. Converting files to COARDS format using GBBEPx_to_COARDS + 3. Saving the processed dataset to a NetCDF file + 4. Returning the list of processed file paths + """ + logger.info(f"Processing GBBEPx files for {len(self.forecast_dates)} forecast dates") + processed_files = [] + + if not self.historical: # only one file to process for multiple dates (need to change time in each file) + logger.info("Non-historical GBBEPx processing - only one file expected") + if self.task_config.rawfiles: + logger.info(f"Processing single GBBEPx file: {self.task_config.rawfiles[0]}") + ds = self.GBBEPx_to_COARDS(self.task_config.rawfiles[0]) + + for index, forecast_date in enumerate(self.forecast_dates): + logger.info(f"Setting time for forecast date: {forecast_date}") + # Set time dimension to index for days since (0, 1, 2, ..., nforecast_dates -1) + # ds = ds.assign(time=[float(index)]) + ds.time.attrs['long_name'] = 'time' + ds.time.attrs['units'] = f'minutes since {forecast_date.strftime("%Y-%m-%d 12:00:00")}' + + # Save the processed dataset + outfile_name = f"FIRE_EMIS_{forecast_date.strftime('%Y%m%d')}.nc" + outfile = os.path.join(workdir, outfile_name) + comp = dict(zlib=True, complevel=2, _FillValue=None) + encoding = {var: comp for var in ds.data_vars} + ds.to_netcdf(outfile, encoding=encoding, unlimited_dims=['time']) + logger.info(f"Processed emission file saved to {outfile}") + processed_files.append(outfile) + ds.close() + else: + logger.warning("No raw GBBEPx files found for non-historical processing") + else: + logger.info(f"RAWFILES for historical GBBEPx processing: {self.task_config.rawfiles}") + for forecast_date, date_file in zip(self.forecast_dates, self.task_config.rawfiles): + date_str = forecast_date.strftime('%Y%m%d') + logger.info(f"Processing GBBEPx files for date {date_str} from file {date_file}") + + # Create output filename with date + outfile_name = f"FIRE_EMIS_{date_str}.nc" + outfile = os.path.join(workdir, outfile_name) + + ds = self.GBBEPx_to_COARDS(date_file) + + if ds is None: # file was already in COARDS format + logger.info(f"File {date_file} already in COARDS format, copying to {outfile}") + shutil.copy(date_file, outfile) + else: + # Save the processed dataset + comp = dict(zlib=True, complevel=2) + encoding = {var: comp for var in ds.data_vars} + ds.to_netcdf(outfile, encoding=encoding, unlimited_dims=['time']) + logger.info(f"Processed emission file saved to {outfile}") + + # Close dataset + ds.close() + + processed_files.append(outfile) + + return processed_files + + @logit(logger) + def _process_qfed_files(self, workdir: str) -> List[str]: + """Process QFED files for each forecast date. + + Parameters + ---------- + workdir : str + Working directory path where processed files will be saved + + Returns + ------- + List[str] + List of processed file paths + + Notes + ----- + This method processes QFED files for each forecast date by: + 1. Filtering raw files by date + 2. Combining files for each date using combine_qfed_files + 3. Saving the combined dataset to a NetCDF file + 4. Returning the list of processed file paths + """ + logger.info(f"Processing QFED files for {len(self.forecast_dates)} forecast dates") + processed_files = [] + + for forecast_date in self.forecast_dates: + date_str = forecast_date.strftime('%Y%m%d') + logger.info(f"Processing QFED files for date {date_str}") + + # Filter files for this date + date_files = [] + for file_path in self.task_config.rawfiles: + file_name = os.path.basename(file_path) + if date_str in file_name: + date_files.append(file_path) + + if date_files: + logger.info(f"Found {len(date_files)} QFED files for date {date_str}") + + # Combine QFED files for this date + ds = self.combine_qfed_files(date_files) + + if ds is not None: + # Create output filename with date + outfile_name = f"FIRE_EMIS_{date_str}.nc" + outfile = os.path.join(workdir, outfile_name) + + # Save the processed dataset + comp = dict(zlib=True, complevel=2) + encoding = {var: comp for var in ds.data_vars} + ds.to_netcdf(outfile, encoding=encoding) + logger.info(f"Processed emission file for {date_str} saved to {outfile}") + + # Add to processed files list + processed_files.append(outfile) + + # Close dataset + ds.close() + else: + logger.warning(f"Failed to combine QFED files for date {date_str}") + else: + logger.warning(f"No QFED files found for date {date_str}") + + return processed_files + + @logit(logger) + def render_template(self, tmpl_dict: Dict[str, Any]) -> None: + """Render the YAML template and set up task configuration. + + This method performs the following steps: + 1. Loads and parses the YAML template file using Jinja2 + 2. Fills in configuration parameters using environment variables and task attributes + 3. Updates the task configuration with the rendered YAML content + + Parameters + ---------- + tmp_dict : Dict + Dictionary containing template variables and their values + + """ + logger.info("Rendering YAML template") + # Parse template and update task configuration + yaml_template = os.path.join(self.task_config.HOMEgfs, 'parm', 'chem', 'fire_emission.yaml.j2') + if not os.path.exists(yaml_template): + logger.warning(f"Template file not found: {yaml_template}, using default configuration") + yaml_config = {'fire_emission': {}} + else: + logger.debug(f'Parsing YAML template: {yaml_template}') + yaml_config = parse_j2yaml(yaml_template, tmpl_dict) + return yaml_config diff --git a/ush/python/pygfs/task/nexus_emission.py b/ush/python/pygfs/task/nexus_emission.py new file mode 100644 index 00000000000..dfc4a3ef360 --- /dev/null +++ b/ush/python/pygfs/task/nexus_emission.py @@ -0,0 +1,1065 @@ +#!/usr/bin/env python3 + +import os +import re +from pathlib import Path +from collections import defaultdict +import xarray as xr +from logging import getLogger +from datetime import datetime, timedelta +from typing import Dict, Any, Union, List +from dateutil.rrule import DAILY, HOURLY, rrule +from collections import defaultdict +from datetime import timedelta +from wxflow import (AttrDict, + FileHandler, + parse_j2yaml, + logit, + Task, + Jinja, + to_timedelta, + WorkflowException, + Executable) + +# Try to import toml, fail gracefully if not installed +try: + import toml +except ImportError: + try: + import tomllib as toml # Python 3.11+ + except ImportError: + class DummyTOML: + def load(self, f): + return {} + toml = DummyTOML() + +logger = getLogger(__name__.split('.')[-1]) + + +class NEXUSEmissions(Task): + """NEXUS Emissions pre-processing Task + """ + + @logit(logger, name="NEXUSEmissions") + def __init__(self, config: Dict[str, Any]) -> None: + """Constructor for the NEXUS Emissions task + + Parameters + ---------- + config : Dict[str, Any] + Incoming configuration for the task from the environment + + Returns + ------- + None + """ + super().__init__(config) + + # self.task_config = AttrDict(config) + self.AERO_INPUTS_DIR = self.task_config.get('AERO_INPUTS_DIR', None) + self.COMOUT_CHEM_INPUT = self.task_config.get('COMOUT_CHEM_INPUT', None) + + # get the nforecast hours - gcdas will use FHMAX and gcafs will use FHMAX_GFS + if 'das' in self.task_config['RUN']: + nforecast_hours = self.task_config["FHMAX"] + else: + nforecast_hours = self.task_config["FHMAX_GFS"] + + self.start_date = self.task_config["current_cycle"] + self.total_hrs = nforecast_hours + 1 + self.end_date = self.task_config["current_cycle"] + to_timedelta(f'{self.total_hrs}H') + + logger.info(f'start_date: {self.start_date}') + logger.info(f'nforecast_hours: {nforecast_hours}') + logger.info(f'Computed end_date: {self.end_date} (total_hrs={self.total_hrs})') + + # Create the forecast dates based on start_date and end_date + frequency = self.task_config.get("NEXUS_DIAG_FREQ", "Hourly") + if frequency == "Hourly": + self.forecast_dates = list(rrule(freq=HOURLY, dtstart=self.start_date, until=self.end_date)) + elif frequency == 'Daily': + self.forecast_dates = list(rrule(freq=DAILY, dtstart=self.start_date, until=self.end_date)) + else: + raise WorkflowException(f"Unsupported NEXUS_DIAG_FREQ: {frequency}") + + self.forecast_dates_daily = list(rrule(freq=DAILY, dtstart=self.start_date, until=self.end_date)) + + logger.info(f"NEXUSEmissions initialized with start date: {self.start_date}, end date: {self.end_date}") + + @logit(logger) + def initialize(self) -> None: + """Initialize the work directory and process chemical emissions configuration. + + This method performs the following steps: + 1. Render the NEXUS configuration files using Jinja2 templates + found in `parm/chem/nexus/$NEXUS_CONFIG` + 2. Sets up template variables for emission configuration + 3. Creates necessary working directories + 4. Copies required input files to working directory + 5. Sets up forecast dates and file paths for each date + + Parameters + ---------- + None + + Returns + ------- + None + + Raises + ------ + WorkflowException + If the YAML template file is not found + If required directories cannot be created + If file copying operations fail + + Notes + ----- + The method expects the following configuration to be available: + - HOMEgfs : str + Base directory containing workflow configuration + - DATA : str + Working directory path + - COMOUT_CHEM_INPUT : str + Output directory for chemical input files + - AERO_EMIS_FIRE_DIR : str + Directory containing fire emission data + + The configuration is processed through a Jinja2 template system + and the resulting setup is stored in self.task_config. + """ + logger.info("Initializing NEXUS emissions pre-processing task") + + # + logger.info("Rendering NEXUS configuration files") + # Check for required NEXUS configuration parameters + required_nexus_params = [ + 'NEXUS_CONFIG', + 'NEXUS_CONFIG_DIR', + 'NEXUS_INPUT_DIR', + ] + for param in required_nexus_params: + if not self.task_config.get(param, None): + raise WorkflowException(f"{param} must be set in task configuration") + + nexus_config_set = self.task_config.get('NEXUS_CONFIG', None) + nexus_config_dir = self.task_config.get('NEXUS_CONFIG_DIR', None) + nexus_input_dir = self.task_config.get('NEXUS_INPUT_DIR', None) + + # Default NEXUS_TSTEP to 3600 seconds (1 hour) if not set + nexus_tstep = self.task_config.get('NEXUS_TSTEP', 3600) + + logger.info(f"Using NEXUS_CONFIG: {nexus_config_set}") + logger.info(f"Using NEXUS_CONFIG_DIR: {nexus_config_dir}") + logger.info(f"Using NEXUS_INPUT_DIR: {nexus_input_dir}") + logger.info(f"Using NEXUS_TSTEP: {nexus_tstep}") + + # Check for grid parameters + required_grid_params = [ + 'NEXUS_NX', + 'NEXUS_NY', + 'NEXUS_NZ', + 'NEXUS_XMIN', + 'NEXUS_XMAX', + 'NEXUS_YMIN', + 'NEXUS_YMAX' + ] + for param in required_grid_params: + if not self.task_config.get(param, None): + raise WorkflowException(f"{param} must be set in task configuration") + + logger.info(f"Grid parameters: NEXUS_NX={self.task_config.NEXUS_NX}") + logger.info(f"Grid parameters: NEXUS_NY={self.task_config.NEXUS_NY}") + logger.info(f"Grid parameters: NEXUS_NZ={self.task_config.NEXUS_NZ}") + logger.info(f"Grid parameters: NEXUS_XMIN={self.task_config.NEXUS_XMIN}") + logger.info(f"Grid parameters: NEXUS_XMAX={self.task_config.NEXUS_XMAX}") + logger.info(f"Grid parameters: NEXUS_YMIN={self.task_config.NEXUS_YMIN}") + logger.info(f"Grid parameters: NEXUS_YMAX={self.task_config.NEXUS_YMAX}") + + processed_nexus_files = [] + final_output_files = [] + sorted_dates = sorted(self.forecast_dates) + for d in sorted_dates[:-1]: + fname = f"{self.task_config.NEXUS_DIAG_PREFIX}.{d.strftime('%Y%m%d%H')}00.nc" + fname_final = f"{self.task_config.NEXUS_DIAG_PREFIX}.{d.strftime('%Y%m%d')}.nc" + processed_nexus_files.append(fname) + final_output_files.append(fname_final) + final_output_files = list(set(final_output_files)) + logger.info(f"Final output files: {final_output_files}") + self.processed_nexus_files = processed_nexus_files + # render the NEXUS configuration files + if not os.path.exists(nexus_config_dir): + raise WorkflowException(f"NEXUS configuration file not found: {nexus_config_dir}") + + logger.info(f"Rendering NEXUS configuration from {nexus_config_dir}") + tmpl_dict = { + 'NEXUS_CONFIG': nexus_config_set, + 'NEXUS_CONFIG_DIR': nexus_config_dir, + 'NEXUS_INPUT_DIR': nexus_input_dir, + 'NEXUS_DIAG_PREFIX': self.task_config.NEXUS_DIAG_PREFIX, + 'NEXUS_TSTEP': nexus_tstep, + 'NEXUS_NX': self.task_config.NEXUS_NX, + 'NEXUS_NY': self.task_config.NEXUS_NY, + 'NEXUS_NZ': self.task_config.NEXUS_NZ, + 'NEXUS_XMIN': self.task_config.NEXUS_XMIN, + 'NEXUS_XMAX': self.task_config.NEXUS_XMAX, + 'NEXUS_YMIN': self.task_config.NEXUS_YMIN, + 'NEXUS_YMAX': self.task_config.NEXUS_YMAX, + 'LOCAL_INPUT_DIR': os.path.join(self.task_config.DATA, 'INPUT'), + 'NEXUS_EXECUTABLE': os.path.join(self.task_config.get('HOMEgfs', None), "exec/nexus.x"), + "DATA": self.task_config.DATA, + "NEXUS_DO_MEGAN": self.task_config.get('NEXUS_DO_MEGAN', False), + "NEXUS_DO_CEDS2019": self.task_config.get('NEXUS_DO_CEDS2019', True), + "NEXUS_DO_CEDS2024": self.task_config.get('NEXUS_DO_CEDS2024', False), + "NEXUS_DO_HTAPv2": self.task_config.get('NEXUS_DO_HTAPv2', True), + "NEXUS_DO_HTAPv3": self.task_config.get('NEXUS_DO_HTAPv3', False), + "NEXUS_DO_CAMS": self.task_config.get('NEXUS_DO_CAMS', False), + "NEXUS_DO_CAMSTEMPO": self.task_config.get('NEXUS_DO_CAMSTEMPO', False), + "start_date": self.start_date.strftime('%Y-%m-%d %H:%M:%S'), + "end_date": self.end_date.strftime('%Y-%m-%d %H:%M:%S'), + "FINAL_OUTPUT": final_output_files, + "COMOUT_CHEM_INPUT": self.task_config.COMOUT_CHEM_INPUT, + "COMOUT_CHEM_RESTART": self.task_config.COMOUT_CHEM_RESTART, + "RestartFile": f"HEMCO_restart.{self.end_date.strftime('%Y%m%d%H')}00.nc", + "processed_nexus_files": processed_nexus_files, + "nmem_ens": self.task_config.NMEM_ENS, + } + + # Render NEXUS Grid File + nexus_grid_template = os.path.join(nexus_config_dir, f"{self.task_config.NEXUS_GRID_NAME}.j2") + logger.info(f"Rendering NEXUS grid file using template: {nexus_grid_template}") + if not os.path.exists(nexus_grid_template): + raise WorkflowException(f"NEXUS grid template file not found: {nexus_grid_template}") + j2_renderer = Jinja(nexus_grid_template, tmpl_dict) + outfile = os.path.join(self.task_config.DATA, self.task_config.NEXUS_GRID_NAME) + j2_renderer.save(outfile) + logger.info(f"NEXUS grid file rendered successfully: written to {outfile}") + + # Render NEXUS Config File + nexus_config_template = os.path.join(nexus_config_dir, f"{self.task_config.NEXUS_CONFIG_NAME}.j2") + logger.info(f"Rendering NEXUS config file using template: {nexus_config_template}") + if not os.path.exists(nexus_config_template): + raise WorkflowException(f"NEXUS config template file not found: {nexus_config_template}") + j2_renderer = Jinja(nexus_config_template, tmpl_dict) + outfile = os.path.join(self.task_config.DATA, self.task_config.NEXUS_CONFIG_NAME) + j2_renderer.save(outfile) + logger.info(f"NEXUS config file rendered successfully: written to {outfile}") + + # Render NEXUS Time File + nexus_time_template = os.path.join(nexus_config_dir, f"{self.task_config.NEXUS_TIME_NAME}.j2") + logger.info(f"Rendering NEXUS time file using template: {nexus_time_template}") + if not os.path.exists(nexus_time_template): + raise WorkflowException(f"NEXUS time template file not found: {nexus_time_template}") + j2_renderer = Jinja(nexus_time_template, tmpl_dict) + outfile = os.path.join(self.task_config.DATA, self.task_config.NEXUS_TIME_NAME) + j2_renderer.save(outfile) + logger.info(f"NEXUS time file rendered successfully: written to {outfile}") + + # Render NEXUS Diag File + nexus_diag_template = os.path.join(nexus_config_dir, f"{self.task_config.NEXUS_DIAG_NAME}.j2") + logger.info(f"Rendering NEXUS diag file using template: {nexus_diag_template}") + if not os.path.exists(nexus_diag_template): + raise WorkflowException(f"NEXUS diag template file not found: {nexus_diag_template}") + j2_renderer = Jinja(nexus_diag_template, tmpl_dict) + outfile = os.path.join(self.task_config.DATA, self.task_config.NEXUS_DIAG_NAME) + j2_renderer.save(outfile) + logger.info(f"NEXUS diag file rendered successfully: written to {outfile}") + + # Render NEXUS Spec File + nexus_spec_template = os.path.join(nexus_config_dir, f"{self.task_config.NEXUS_SPEC_NAME}.j2") + logger.info(f"Rendering NEXUS spec file using template: {nexus_spec_template}") + if not os.path.exists(nexus_spec_template): + raise WorkflowException(f"NEXUS spec template file not found: {nexus_spec_template}") + j2_renderer = Jinja(nexus_spec_template, tmpl_dict) + outfile = os.path.join(self.task_config.DATA, self.task_config.NEXUS_SPEC_NAME) + j2_renderer.save(outfile) + logger.info(f"NEXUS spec file rendered successfully: written to {outfile}") + + # find needed inputs + found_files, missing_files, root_path = gather_emissions_files_from_time_file( + hemco_config_path=os.path.join(self.task_config.DATA, self.task_config.NEXUS_CONFIG_NAME), + hemco_time_path=os.path.join(self.task_config.DATA, self.task_config.NEXUS_TIME_NAME), + ) + if len(missing_files) > 0: + for mf in missing_files: + logger.error(f"Missing NEXUS emission input file: {mf}") + raise WorkflowException(f"Missing {len(missing_files)} NEXUS emission input files, cannot proceed") + + tmpl_dict["NEXUS_INPUT_FILES"] = found_files + tmpl_dict["NEXUS_COPY_TO_FILES"] = [os.path.join(self.task_config.DATA, 'INPUT', os.path.relpath(f, root_path)) for f in found_files] + tmpl_dict["NEXUS_INPUT_DIR"] = os.path.join(self.task_config.DATA, 'INPUT') + # Create all necessary directories for the destination files + for dest_file in tmpl_dict["NEXUS_COPY_TO_FILES"]: + dest_dir = os.path.dirname(dest_file) + os.makedirs(dest_dir, exist_ok=True) + + yaml_template = os.path.join(self.task_config.HOMEgfs, 'parm', 'chem', 'nexus_emission.yaml.j2') + if not os.path.exists(yaml_template): + logger.warning(f"Template file not found: {yaml_template}, using default configuration") + yaml_config = {'nexus_emission': {}} + else: + logger.debug(f'Parsing YAML template: {yaml_template}') + yaml_config = parse_j2yaml(yaml_template, tmpl_dict) + # Add yaml configuration to task_config + self.task_config = AttrDict(**self.task_config, **yaml_config) + + # Link NEXUS input directory to the working directory + FileHandler(self.task_config.nexus_emission.data_in).sync() + logger.info(f"NEXUS input directory linked to {self.task_config.DATA}") + + # Rerender NEXUS config files with updated input files + # Render NEXUS Config File + nexus_config_template = os.path.join(nexus_config_dir, f"{self.task_config.NEXUS_CONFIG_NAME}.j2") + logger.info(f"Rendering NEXUS config file using template: {nexus_config_template}") + if not os.path.exists(nexus_config_template): + raise WorkflowException(f"NEXUS config template file not found: {nexus_config_template}") + j2_renderer = Jinja(nexus_config_template, tmpl_dict) + outfile = os.path.join(self.task_config.DATA, self.task_config.NEXUS_CONFIG_NAME) + j2_renderer.save(outfile) + logger.info(f"NEXUS config file rendered successfully: written to {outfile}") + + # create a directory in the self.task_config.DATA/Restarts + os.makedirs(os.path.join(self.task_config.DATA, 'Restarts'), exist_ok=True) + logger.info(f"Created Restarts directory: {os.path.join(self.task_config.DATA, 'Restarts')}") + + @logit(logger) + def execute(self) -> None: + """Run NEXUS emission preprocessor based on configuration. + + This will run the NEXUS preprocessor executable with the provided configuration. + It will process the emission files based on the task configuration and forecast dates. + It will also handle different types of emissions based on the configuration. + + Parameters + ---------- + None + + Returns + ------- + None + + Notes + ----- + - This method assumes that the NEXUS preprocessor executable is available in the PATH. + - It will log the processing steps and any issues encountered. + Raises + ------ + WorkflowException + If the NEXUS preprocessor executable is not found + If the working directory does not exist + If no emission files are found for processing + """ + logger.info(f"Running NEXUS emission preprocessor in {self.task_config.DATA}") + logger.info(f"NEXUS Logs: {self.task_config.DATA}/stdout") + logger.info(f"NEXUS Logs: {self.task_config.DATA}/stderr") + logger.info(f"NEXUS Logs: {self.task_config.DATA}/NEXUS.log") + + if not os.path.exists(self.task_config.DATA): + raise WorkflowException(f"Working directory does not exist: {self.task_config.DATA}") + + exe = Executable(self.task_config.APRUN) + + if os.path.exists("nexus.x") is False: + raise WorkflowException("NEXUS preprocessor executable 'nexus.x' not found in PATH") + + arg_list = ['./nexus.x', '-c', self.task_config.NEXUS_CONFIG_NAME] + exe(*arg_list, output='stdout', error='stderr') + + logger.info("Concatenating processed NEXUS files...") + + # sort the files even though they should be sorted already | safety check + files = sorted(self.processed_nexus_files) + + for i in files: + + if not os.path.exists(i): + logger.warning(f"NEXUS file not found: {i}") + continue + else: + logger.info(f"NEXUS file found: {i}") + + for f, dates in zip(files, self.forecast_dates): + logger.info(f" - {f}, {dates}") + + day_indexes = _get_day_indices(self.forecast_dates[:-1]) # hemco doesn't write out the last timestep + # now loop over each days + for date, indexes in day_indexes.items(): + day_str = date.strftime('%Y%m%d') + logger.info(f"Processing NEXUS files for date: {date}") + + dsets = [] + for index in indexes: + # list files for log + logger.info(f" - {files[index]}, {index}") + + # now concatenate the files per day + if os.path.exists(files[index]) is False: + break + ds = xr.open_dataset(files[index], decode_cf=False) + + # update time coordinate + ds = ds.assign_coords(time=('time', [index])) + + # set time units to reference start-date + ds.time.attrs['units'] = self.start_date.strftime('hours since %Y-%m-%d %H:00:00') + + # append + dsets.append(ds) + + # concatenate all the files for this day + if len(dsets) == 0: + break + else: + ds = xr.concat(dsets, dim='time') + + encoding = {var: {"zlib": True, "complevel": 2} for var in ds.data_vars} + outname = f"{self.task_config.NEXUS_DIAG_PREFIX}.{day_str}.nc" + ds.to_netcdf(outname, format="NETCDF4", encoding=encoding) + logger.info(f"Wrote daily output: {outname}") + + logger.info("NEXUS emission processing execute phase complete") + + @logit(logger) + def finalize(self) -> None: + """Perform closing actions of the task. + Copy processed files from the DATA directory to COMOUT_CHEM_INPUT. + + Returns + ------- + None + + Notes + ----- + Only copies processed NEXUS files to the output directory. + Uses FileHandler for reliable file operations with logging + """ + logger.info("Finalizing NEXUS emissions processing") + + FileHandler(self.task_config.nexus_emission.data_out).sync() + + logger.info("Chemical emissions finalization complete") + + +def _write_txt_file(content: str, file_path: Union[str, os.PathLike]) -> None: + """Write content to a text file. + + Parameters + ---------- + content : str + Content to write to the file. + file_path : Union[str, os.PathLike] + Path where the file will be created. + + Returns + ------- + None + + Notes + ----- + If the directory does not exist, it will be created. + """ + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, 'w') as f: + f.write(content) + + +def _get_day_indices(datetimes: List[datetime]) -> Dict[datetime, List[int]]: + """ + Group indices of datetimes by day, including midnight in both days. + + Parameters + ---------- + datetimes : list of datetime.datetime + List of datetime objects. + + Returns + ------- + dict + Dictionary mapping datetime.datetime (at midnight) to list of indices. + Each day includes all hours from 00:00 of that day through 00:00 of the next day, + and the midnight index is included in both days. + """ + + grouped = defaultdict(list) + + for idx, dt in enumerate(datetimes): + day_dt = dt.replace(hour=0, minute=0, second=0, microsecond=0) + grouped[day_dt].append(idx) + # If this is exactly midnight, also add to previous day + if dt.hour == 0 and dt.minute == 0 and dt.second == 0 and dt.microsecond == 0: + prev_day = day_dt - timedelta(days=1) + grouped[prev_day].append(idx) + + return dict(grouped) + + +def daterange(start_date: datetime, end_date: datetime): + """ + Generate dates from start_date to end_date inclusive. + + Parameters + ---------- + start_date : datetime + Start date (inclusive). + end_date : datetime + End date (inclusive). + + Yields + ------ + datetime + Each date in the range. + """ + for n in range(int((end_date - start_date).days) + 1): + yield start_date + timedelta(n) + + +def parse_year_bounds(hemco_time_str: str) -> tuple[None | int, None | int]: + """ + Parses a HEMCO time string for year bounds. + + Parameters + ---------- + hemco_time_str : str + HEMCO time string, e.g. '2000-2022/1-12/1/0'. + + Returns + ------- + tuple of (int or None, int or None) + (start_year, end_year) or (None, None) if wildcard or invalid. + """ + if not hemco_time_str or hemco_time_str.strip() == "*": + return None, None + + date_part = hemco_time_str.split('/')[0].strip() + if "-" in date_part: + try: + parts = date_part.split('-') + start_y = int(parts[0]) + end_y = int(parts[-1]) + return start_y, end_y + except ValueError: + return None, None + else: + try: + val = int(date_part) + return val, val + except ValueError: + return None, None + + +def resolve_variables(path: str, var_definitions: dict[str, str]) -> str: + """ + Replace variables in a path string using a dictionary. + + Parameters + ---------- + path : str + Path string with variables (e.g., $ROOT). + var_definitions : dict of str + Dictionary of variable names to values. + + Returns + ------- + str + Path with variables replaced. + """ + resolved_path = path.replace("$$", "$") + for key in sorted(var_definitions.keys(), key=len, reverse=True): + if key in resolved_path: + resolved_path = resolved_path.replace(key, var_definitions[key]) + return resolved_path + + +def expand_filenames( + file_template: str, + hemco_time_str: str, + sector_conf: dict[str, Any], + start_date: datetime, + end_date: datetime +) -> set[str]: + """ + Expand file templates into actual filenames for a date range and frequency. + + Parameters + ---------- + file_template : str + Template for file names (e.g., "$YYYY$MM$DD.nc"). + hemco_time_str : str + HEMCO time string for year bounds. + sector_conf : dict + Sector configuration, must contain 'frequency' and optionally 'patterns'. + start_date : datetime + Start date for expansion. + end_date : datetime + End date for expansion. + + Returns + ------- + set of str + Set of expanded file names. + """ + generated_files = set() + freq = sector_conf.get("frequency", "monthly") + patterns = sector_conf.get("patterns", []) + + min_year, max_year = parse_year_bounds(hemco_time_str) + + def get_effective_year(target_year: int) -> str: + if max_year is not None and target_year > max_year: + return str(max_year) + if min_year is not None and target_year < min_year: + return str(min_year) + return str(target_year) + + if freq == "daily": + for single_date in daterange(start_date, end_date): + eff_year = get_effective_year(single_date.year) + mm = f"{single_date.month:02d}" + dd = f"{single_date.day:02d}" + fname = file_template.replace("$YYYY", eff_year).replace("$MM", mm).replace("$DD", dd) + generated_files.add(fname) + elif freq == "monthly": + unique_months = set((d.year, d.month) for d in daterange(start_date, end_date)) + for (year, month) in unique_months: + eff_year = get_effective_year(year) + mm = f"{month:02d}" + fname = file_template.replace("$YYYY", eff_year).replace("$MM", mm) + if "$YYYY" not in file_template: + fname = fname.replace("$YYYY", "") + generated_files.add(fname) + elif freq == "representative": + unique_months = set((d.year, d.month) for d in daterange(start_date, end_date)) + for (year, month) in unique_months: + eff_year = get_effective_year(year) + mm = f"{month:02d}" + base_name = file_template.replace("$YYYY", eff_year).replace("$MM", mm) + for pat in patterns: + if "$DAY" in base_name: + fname = base_name.replace("$DAY", pat) + elif "$D" in base_name: + fname = base_name.replace("$D", pat) + else: + fname = base_name + generated_files.add(fname) + else: + unique_years = set(d.year for d in daterange(start_date, end_date)) + for year in unique_years: + if "$YYYY" not in file_template: + generated_files.add(file_template) + else: + eff_year = get_effective_year(year) + fname = file_template.replace("$YYYY", eff_year) + generated_files.add(fname) + return generated_files + + +def extract_dataset_name(file_path: str, root_path: str) -> str: + """ + Extract dataset name from file path by finding the directory after ROOT. + + Parameters + ---------- + file_path : str + Full file path. + root_path : str + Root directory path. + + Returns + ------- + str + Dataset name or "unknown" if not found. + """ + try: + rel_path = os.path.relpath(file_path, root_path) + parts = rel_path.split(os.sep) + if len(parts) > 0: + return parts[0] + except ValueError: + parts = file_path.split(os.sep) + for i, part in enumerate(parts): + if part in ['nexus', 'emissions', 'data'] and i < len(parts) - 1: + return parts[i + 1] + return "unknown" + + +def copy_files_with_structure( + file_list: list[str], + root_path: str, + copy_dir: str +) -> tuple[int, int]: + """ + Copy files to local directory maintaining dataset structure. + + Parameters + ---------- + file_list : list of str + List of file paths to copy. + root_path : str + Root directory path to strip from file paths. + copy_dir : str + Directory to copy files to. + + Returns + ------- + tuple of (int, int) + (copied_count, failed_count) + """ + copied_count = 0 + failed_count = 0 + os.makedirs(copy_dir, exist_ok=True) + for file_path in file_list: + try: + dataset_name = extract_dataset_name(file_path, root_path) + try: + rel_path = os.path.relpath(file_path, root_path) + except ValueError: + rel_path = file_path.lstrip('/') + dest_path = os.path.join(copy_dir, rel_path) + dest_dir = os.path.dirname(dest_path) + os.makedirs(dest_dir, exist_ok=True) + shutil.copy2(file_path, dest_path) + copied_count += 1 + except Exception as e: + failed_count += 1 + return copied_count, failed_count + + +def parse_hemco( + rc_path: str, + toml_path: str, + start_date: datetime, + end_date: datetime +) -> tuple[list[str], str | None]: + """ + Parse HEMCO config and return (file_list, root_path). + + Parameters + ---------- + rc_path : str + Path to HEMCO config file. + toml_path : str + Path to TOML rules file. + start_date : datetime + Start date for file search. + end_date : datetime + End date for file search. + + Returns + ------- + tuple + (file_list, root_path) + file_list : list of str + List of emission file paths. + root_path : str or None + Root directory path from config. + """ + + # Load sector rules with better defaults + try: + with open(toml_path, 'r') as tf: + sector_rules = toml.load(tf) + except Exception: + # Create default rules for common patterns + sector_rules = { + "default": {"frequency": "monthly"}, + "CEDS": {"frequency": "yearly"}, + "GFED": {"frequency": "daily"}, + "FINN": {"frequency": "daily"} + } + + defined_vars = {} + all_files = set() + enabled_extensions = set() + enabled_collections = set() + root_path = None + + var_pattern = re.compile(r'^\s*([A-Za-z0-9_]+)\s*:\s*(.*)') + data_sections = ["BASE EMISSIONS", "SCALE FACTORS", "MASKS"] + current_section = None + in_conditional_section = None + + with open(rc_path, 'r') as f: + lines = f.readlines() + + for line in lines: + raw = line.strip() + + # Handle comments, but allow section headers that start with ### + if not raw: + continue + if (raw.startswith("!") or + (raw.startswith("#") and "BEGIN SECTION" not in raw and "END SECTION" not in raw)): + continue + + # Section Detection + if "BEGIN SECTION" in raw: + if "SETTINGS" in raw: + current_section = "SETTINGS" + elif "EXTENSION SWITCHES" in raw: + current_section = "EXTENSION SWITCHES" + else: + for s in data_sections: + if s in raw: + current_section = s + continue + if "END SECTION" in raw: + current_section = None + continue + + # Handle conditional sections like (((CEDS and )))CEDS + if raw.startswith("((("): + collection_name = raw[3:] + in_conditional_section = collection_name + continue + if raw.startswith(")))"): + in_conditional_section = None + continue + + # Extension Switches - parse to determine what's enabled + if current_section == "EXTENSION SWITCHES": + if "-->" in raw and ":" in raw: + # Format: --> CEDS : on true + parts = raw.split(":") + if len(parts) >= 2: + ext_name = parts[0].strip().replace("-->", "").strip() + ext_value = parts[1].strip().lower() + if "on" in ext_value and "true" in ext_value: + enabled_collections.add(ext_name) + continue + + # Settings + if current_section == "SETTINGS": + match = var_pattern.match(raw) + if match: + k, v = match.groups() + clean_val = v.split('!')[0].split('#')[0].strip() + defined_vars[f"${k}"] = clean_val + # Capture ROOT path for copying functionality + if k == "ROOT": + root_path = clean_val + continue + + # Data Sections + if current_section in data_sections: + # Skip if we're in a conditional section that's not enabled + if in_conditional_section and in_conditional_section not in enabled_collections: + continue + + parts = raw.split() + + # Handle different section formats + if len(parts) >= 5: + if current_section == "BASE EMISSIONS": + # Format: ExtNr Name sourceFile sourceVar sourceTime ... + ext_nr = parts[0] + name = parts[1] + raw_file = parts[2] + source_var = parts[3] + raw_time = parts[4] + + # Skip disabled extensions (only process extension 0 and *) + if ext_nr != "0" and ext_nr != "*": + continue + + elif current_section == "SCALE FACTORS": + # Format: ScalID Name sourceFile sourceVar sourceTime ... + scale_id = parts[0] + name = parts[1] + raw_file = parts[2] + source_var = parts[3] + raw_time = parts[4] + + else: + # Other sections - try to parse similar format + ext_nr = parts[0] + name = parts[1] + raw_file = parts[2] + source_var = parts[3] + raw_time = parts[4] + + # Common filtering for all sections + # Filtering garbage + if raw_file == '-' or raw_file.startswith("MATH:") or raw_file.upper() == "MASK" or raw_file == "1.0": + continue + if not any(c.isalpha() or c == '$' or c == '/' for c in raw_file): + continue + + # 1. Resolve Variables + resolved_path = resolve_variables(raw_file, defined_vars) + + # 2. Get Rules - try exact name match first, then collection, then default + rules = sector_rules.get( + name, + sector_rules.get( + in_conditional_section if in_conditional_section else "default", + sector_rules.get("default", {}) + ) + ) + + # 3. Expand with Year Clamping + files = expand_filenames(resolved_path, raw_time, rules, start_date, end_date) + all_files.update(files) + + return sorted(list(all_files)), root_path + + +def parse_hemco_time_file(time_file_path: str) -> tuple[datetime | None, datetime | None]: + """ + Parse HEMCO_sa_Time.rc to extract start and end dates. + + Parameters + ---------- + time_file_path : str + Path to HEMCO time file. + + Returns + ------- + tuple + (start_date, end_date) as datetime or None if not found. + """ + start_date = None + end_date = None + if os.path.exists(time_file_path): + with open(time_file_path, 'r') as f: + for line in f: + line = line.strip() + if line.startswith('START:'): + date_str = line.split(':')[1].strip().split()[0] + start_date = datetime.strptime(date_str, '%Y-%m-%d') + elif line.startswith('END:'): + date_str = line.split(':')[1].strip().split()[0] + end_date = datetime.strptime(date_str, '%Y-%m-%d') + return start_date, end_date + + +def gather_emissions_files( + hemco_config_path: str, + start_date: datetime, + end_date: datetime, + toml_rules_path: str = "nexus_sectors.toml", + verbose: bool = False +) -> tuple[list[str], list[str], str | None]: + """ + Main API function to gather emissions files for a date range. + + Parameters + ---------- + hemco_config_path : str + Path to HEMCO config file (e.g., NEXUS_Config.rc) + start_date : datetime + Start date for file search + end_date : datetime + End date for file search + toml_rules_path : str, optional + Path to TOML rules file + verbose : bool, optional + Print detailed progress information + + Returns + ------- + tuple + (found_files, missing_files, root_path) + found_files : list of str + List of existing file paths + missing_files : list of str + List of missing file paths + root_path : str or None + ROOT directory path from config + """ + if not os.path.exists(hemco_config_path): + raise FileNotFoundError(f"HEMCO config file not found: {hemco_config_path}") + potential_files, root_path = parse_hemco(hemco_config_path, toml_rules_path, start_date, end_date) + found = [] + missing = [] + for fpath in potential_files: + clean = os.path.expanduser(fpath) + if os.path.exists(clean) and os.path.isfile(clean): + found.append(clean) + else: + missing.append(clean) + return found, missing, root_path + + +def gather_emissions_files_from_time_file( + hemco_config_path: str, + hemco_time_path: str, + toml_rules_path: str = "nexus_sectors.toml", + verbose: bool = False +) -> tuple[list[str], list[str], str | None]: + """ + Gather emissions files using dates from HEMCO time file. + + Parameters + ---------- + hemco_config_path : str + Path to HEMCO config file + hemco_time_path : str + Path to HEMCO time file (e.g., HEMCO_sa_Time.rc) + toml_rules_path : str, optional + Path to TOML rules file + verbose : bool, optional + Print detailed progress information + + Returns + ------- + tuple + (found_files, missing_files, root_path) + found_files : list of str + List of existing file paths + missing_files : list of str + List of missing file paths + root_path : str or None + ROOT directory path from config + """ + start_date, end_date = parse_hemco_time_file(hemco_time_path) + if not start_date or not end_date: + raise ValueError(f"Could not parse dates from time file: {hemco_time_path}") + return gather_emissions_files(hemco_config_path, start_date, end_date, toml_rules_path, verbose) + + +def copy_emissions_files( + file_list: list[str], + root_path: str, + destination_dir: str, + verbose: bool = False +) -> tuple[int, int]: + """ + Copy emission files to local directory with organized structure. + + Parameters + ---------- + file_list : list of str + List of file paths to copy + root_path : str + ROOT directory path to strip from file paths + destination_dir : str + Directory to copy files to + verbose : bool, optional + Print detailed progress information + + Returns + ------- + tuple + (copied_count, failed_count) + """ + if not file_list: + return 0, 0 + if not root_path: + root_path = "/" + return copy_files_with_structure(file_list, root_path, destination_dir) + + +def save_file_list(file_list: list[str], output_path: str) -> None: + """ + Save file list to text file. + + Parameters + ---------- + file_list : list of str + List of file paths + output_path : str + Output file path + """ + with open(output_path, "w") as f: + for fpath in file_list: + f.write(fpath + "\n") diff --git a/ush/python/pygfs/task/oceanice_products.py b/ush/python/pygfs/task/oceanice_products.py index 775d13e07f9..5fcf946cdde 100644 --- a/ush/python/pygfs/task/oceanice_products.py +++ b/ush/python/pygfs/task/oceanice_products.py @@ -66,7 +66,7 @@ def __init__(self, config: Dict[str, Any]) -> None: # TODO: This is a bit of a hack, but it works for now # FIXME: find a better way to provide the averaging period - avg_period = f"{forecast_hour-interval:03d}-{forecast_hour:03d}" + avg_period = f"{forecast_hour - interval:03d} - {forecast_hour:03d}" # Extend task_config with localdict localdict = AttrDict(