From accc2e032a939fbda7f34b36273634f808615a08 Mon Sep 17 00:00:00 2001 From: ashleymedin Date: Thu, 22 Jan 2026 22:31:42 +0900 Subject: [PATCH 1/6] Glacier TWS changing to get correct variables --- src/symfluence/evaluation/evaluators/tws.py | 16 ++++++++-------- src/symfluence/models/summa/glacier_manager.py | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/symfluence/evaluation/evaluators/tws.py b/src/symfluence/evaluation/evaluators/tws.py index bd0d91ee..4d455c6e 100644 --- a/src/symfluence/evaluation/evaluators/tws.py +++ b/src/symfluence/evaluation/evaluators/tws.py @@ -24,7 +24,7 @@ - Canopy water: scalarCanopyWat (mm) - Soil water: scalarTotalSoilWat (mm) - Groundwater: scalarAquiferStorage (m → mm via ×1000) - - Glacier mass (optional): glacMass4AreaChange (kg/m² → mm) + - Glacier ice mass change (optional): scalarGlceWE (kg/m² → mm) Configuration: TWS_GRACE_COLUMN: GRACE data column name (default: 'grace_jpl_anomaly') @@ -84,12 +84,12 @@ class TWSEvaluator(ModelEvaluator): - scalarCanopyWat: mm (water depth) - scalarTotalSoilWat: mm (water depth) - scalarAquiferStorage: m → mm (multiply by 1000.0) - - glacMass4AreaChange: kg/m² → mm (glacier mass only in timestep files) + - scalarIceWE: kg/m² → mm (multiply by 1.0, density=1000 kg/m³) File Search Strategy: 1. Prefer daily NetCDF files (*_day.nc) for most storage variables 2. Check original file from get_simulation_files() - 3. Search for timestep files (*_timestep.nc) for glacier mass + 3. Search for timestep files (*_timestep.nc) for more storage variables 4. Select file with maximum matching variables Metrics Calculation: @@ -139,7 +139,7 @@ def __init__(self, config: 'SymfluenceConfig', project_dir: Path, logger: loggin TWS_UNIT_CONVERSION: Scaling factor for model storage (default: 1.0) TWS_STORAGE_COMPONENTS: Comma-separated SUMMA storage variables - Default: 'scalarSWE, scalarCanopyWat, scalarTotalSoilWat, scalarAquiferStorage' - - Can include glacier mass: 'glacMass4AreaChange' + - Can include glacier mass change: 'scalarGlceWE' TWS_DETREND: Remove linear trend from both sim and obs (default: False) - Critical for glacierized basins (glacier mass loss trend obscures seasonal signal) - Improves correlation from ~0.3 to ~0.8 for glacier domains @@ -186,7 +186,7 @@ def get_simulation_files(self, sim_dir: Path) -> List[Path]: """Locate SUMMA output files containing water storage variables. Searches for NetCDF files containing any of the configured storage - components (SWE, canopy water, soil water, aquifer storage, glacier mass). + components (SWE, canopy water, soil water, aquifer storage, glacier mass change). Args: sim_dir: Directory containing SUMMA simulation output files @@ -210,7 +210,7 @@ def extract_simulated_data(self, sim_files: List[Path], **kwargs) -> pd.Series: 1. Start with provided sim_files[0] (typically daily output) 2. Search output directory for alternative files: - Daily files (*_day.nc): Preferred for most storage variables - - Timestep files (*_timestep.nc): Often contain glacier mass + - Timestep files (*_timestep.nc): Look for more storage variables 3. Test each file for availability of storage variables 4. Select file(s) with maximum matching variables 5. If no single file has all vars, try combining vars from multiple files @@ -234,6 +234,7 @@ def extract_simulated_data(self, sim_files: List[Path], **kwargs) -> pd.Series: Unit Conversions: - scalarSWE: kg/m² → mm (density ≈ 1, so 1 kg/m² ≈ 1 mm) - scalarAquiferStorage: m → mm (×1000) + - scalarGlceWE: kg/m² → mm (density ≈ 1, so 1 kg/m² ≈ 1 mm) - Others: Already mm (canopy, soil water depth) Args: @@ -255,7 +256,6 @@ def extract_simulated_data(self, sim_files: List[Path], **kwargs) -> pd.Series: output_file = sim_files[0] # Try to find a file with storage variables - # For glacier mode, glacMass4AreaChange is often only in timestep files output_dir = output_file.parent potential_files = [ output_file, # Original file @@ -264,7 +264,7 @@ def extract_simulated_data(self, sim_files: List[Path], **kwargs) -> pd.Series: for f in output_dir.glob('*_day.nc'): if f not in potential_files: potential_files.insert(0, f) # Prefer daily files - # Also check timestep files for glacier variables like glacMass4AreaChange + # Also check timestep files for f in output_dir.glob('*timestep.nc'): if f not in potential_files: potential_files.append(f) diff --git a/src/symfluence/models/summa/glacier_manager.py b/src/symfluence/models/summa/glacier_manager.py index 57dfb60e..e44ea27a 100644 --- a/src/symfluence/models/summa/glacier_manager.py +++ b/src/symfluence/models/summa/glacier_manager.py @@ -686,6 +686,7 @@ def _create_coldState_glac_from_shp( 'scalarCanairTemp': 283.16, 'scalarCanopyTemp': 283.16, 'glacMass4AreaChange': 0.0, + 'scalarGlceWE': 0.0, 'dt_init': 3600.0, } From 62f038c48e2b06c74c31f2ac5fced56132e66232 Mon Sep 17 00:00:00 2001 From: ashleymedin Date: Fri, 23 Jan 2026 23:53:06 +0900 Subject: [PATCH 2/6] just comments --- src/symfluence/evaluation/evaluators/tws.py | 134 ++++++++------------ 1 file changed, 52 insertions(+), 82 deletions(-) diff --git a/src/symfluence/evaluation/evaluators/tws.py b/src/symfluence/evaluation/evaluators/tws.py index 4d455c6e..13a5b67b 100644 --- a/src/symfluence/evaluation/evaluators/tws.py +++ b/src/symfluence/evaluation/evaluators/tws.py @@ -3,37 +3,9 @@ """Total Water Storage (TWS) Evaluator. -Evaluates simulated total water storage from SUMMA against GRACE satellite -observations of water storage anomalies. - -GRACE Satellites: - - GRACE (Gravity Recovery and Climate Experiment): Twin satellites measuring - Earth's gravity field to infer water storage changes - - GRACE Follow-On: Continuation mission (2018-present) - - Temporal resolution: Monthly anomalies - - Spatial resolution: ~300 km × 300 km footprint - - Sensitivity: 1-2 cm equivalent water height - -GRACE Data Products: - - Multiple processing centers: JPL (Jet Propulsion Lab), CSR (U.Texas), GSFC (NASA) - - Released as time-variable gravity fields → converted to water storage anomalies - - Units: mm equivalent water thickness (relative to 2004-2009 baseline) - -Water Storage Components (SUMMA): - - Snow Water Equivalent (SWE): scalarSWE (kg/m² → mm) - - Canopy water: scalarCanopyWat (mm) - - Soil water: scalarTotalSoilWat (mm) - - Groundwater: scalarAquiferStorage (m → mm via ×1000) - - Glacier ice mass change (optional): scalarGlceWE (kg/m² → mm) - -Configuration: - TWS_GRACE_COLUMN: GRACE data column name (default: 'grace_jpl_anomaly') - TWS_ANOMALY_BASELINE: Baseline period for anomaly calc ('overlap' or year range) - TWS_UNIT_CONVERSION: Scaling factor for model storage (default: 1.0) - TWS_STORAGE_COMPONENTS: Comma-separated storage variables to sum - TWS_DETREND: Remove linear trend (critical for glacierized basins) - TWS_SCALE_TO_OBS: Scale model variability to match observations - TWS_OBS_PATH: Direct path override for GRACE data +Evaluates simulated water storage from SUMMA using either total water storage change +and comparing against GRACE satellite data or using glacier only mass balance and +comparing against observed glacier mass balance data. """ import logging @@ -55,11 +27,11 @@ class TWSEvaluator(ModelEvaluator): """Total Water Storage evaluator comparing SUMMA to GRACE satellites. - Compares time series of simulated water storage (SWE + canopy + soil + groundwater) - from SUMMA land-surface model against monthly water storage anomalies from GRACE - satellite gravity measurements. + Evaluates simulated water storage from SUMMA using one of two metrics + 1) total water storage change against GRACE satellite observations of water storage anomalies. + 2) glacier annual maximum and minimum mass balance against observations - Key Features: + Key Features: - Multi-component storage summation: SWE + canopy + soil + aquifer - Flexible storage component selection via configuration - GRACE data support from multiple processing centers (JPL, CSR, GSFC) @@ -67,6 +39,21 @@ class TWSEvaluator(ModelEvaluator): - Advanced signal processing: detrending, variability scaling - Diagnostic exports for visualization and analysis + Output Variables: + - basin__StorageChange: sum of scalarSWE+calarCanopyWat+scalarTotalSoilWat + +scalarAquiferStorage+scalarGlceWE + - basin__MassBalanceMax: glacier mass balance units of mm of water, maximum per year + - basin__MassBalanceMin: glacier mass balance units of mm of water, minimum per year + + Configuration: + TWS_GRACE_COLUMN: GRACE data column name (default: 'grace_jpl_anomaly') + TWS_ANOMALY_BASELINE: Baseline period for anomaly calc ('overlap' or year range) + TWS_UNIT_CONVERSION: Scaling factor for model storage (default: 1.0) + TWS_STORAGE_COMPONENTS: Comma-separated storage variables to sum + TWS_DETREND: Remove linear trend (critical for glacierized basins) + TWS_SCALE_TO_OBS: Scale model variability to match observations + TWS_OBS_PATH: Direct path override for GRACE data + Physical Basis: GRACE measures vertically-integrated water column changes via: - Temporal gravity variations → water storage changes @@ -74,41 +61,6 @@ class TWSEvaluator(ModelEvaluator): - Monthly resolution averages diurnal/seasonal noise - Spatial footprint ~300 km (cannot resolve sub-grid variability) - Critical for Glacierized Basins: - Glacier mass loss creates long-term trend that dominates GRACE signal and - obscures seasonal patterns (r ≈ 0.3 with trend, r ≈ 0.8 after detrending). - TWS_DETREND option removes this trend for proper seasonal comparison. - - Storage Component Units (SUMMA): - - scalarSWE: kg/m² → mm (multiply by 1.0, density=1000 kg/m³) - - scalarCanopyWat: mm (water depth) - - scalarTotalSoilWat: mm (water depth) - - scalarAquiferStorage: m → mm (multiply by 1000.0) - - scalarIceWE: kg/m² → mm (multiply by 1.0, density=1000 kg/m³) - - File Search Strategy: - 1. Prefer daily NetCDF files (*_day.nc) for most storage variables - 2. Check original file from get_simulation_files() - 3. Search for timestep files (*_timestep.nc) for more storage variables - 4. Select file with maximum matching variables - - Metrics Calculation: - 1. Load absolute storage from SUMMA (daily or timestep data) - 2. Aggregate to monthly means (match GRACE temporal resolution) - 3. Calculate anomaly relative to overlap period mean - 4. Optional detrending: Remove linear trend (glacierized basins) - 5. Optional scaling: Match model variability to observations - 6. Calculate performance metrics (KGE, RMSE, NSE, etc.) - - Configuration: - TWS_GRACE_COLUMN: Column name in GRACE CSV (default: 'grace_jpl_anomaly') - TWS_ANOMALY_BASELINE: Baseline for anomaly ('overlap' or year-range) - TWS_UNIT_CONVERSION: Multiplicative scaling for model storage - TWS_STORAGE_COMPONENTS: Comma-separated variable names (default: SWE, canopy, soil, aquifer) - TWS_DETREND: Remove linear trend from both series (False by default) - TWS_SCALE_TO_OBS: Scale model std dev to match observations (False) - TWS_OBS_PATH: Direct path override for GRACE observation file - Attributes: grace_column: GRACE data column name anomaly_baseline: Baseline period specification @@ -125,7 +77,7 @@ class TWSEvaluator(ModelEvaluator): def __init__(self, config: 'SymfluenceConfig', project_dir: Path, logger: logging.Logger): """Initialize TWS evaluator with storage component and signal processing options. - Configures water storage components to sum (SWE, canopy, soil, aquifer), + Configures water storage components to sum (SWE, canopy, soil, aquifer, glacier), GRACE data column selection, and optional signal processing (detrending, variability scaling) to handle model-observation mismatches. @@ -140,6 +92,7 @@ def __init__(self, config: 'SymfluenceConfig', project_dir: Path, logger: loggin TWS_STORAGE_COMPONENTS: Comma-separated SUMMA storage variables - Default: 'scalarSWE, scalarCanopyWat, scalarTotalSoilWat, scalarAquiferStorage' - Can include glacier mass change: 'scalarGlceWE' + - Or just use total ' TWS_DETREND: Remove linear trend from both sim and obs (default: False) - Critical for glacierized basins (glacier mass loss trend obscures seasonal signal) - Improves correlation from ~0.3 to ~0.8 for glacier domains @@ -147,12 +100,6 @@ def __init__(self, config: 'SymfluenceConfig', project_dir: Path, logger: loggin - Centers both series on zero-mean, rescales model std dev to match obs - Useful when pattern is correct but amplitude is wrong - Storage Component Units: - - scalarSWE: kg/m² (converted to mm) - - scalarCanopyWat: mm (water depth, no conversion needed) - - scalarTotalSoilWat: mm (water depth, no conversion needed) - - scalarAquiferStorage: m (converted to mm via ×1000) - Args: config: Typed configuration object (SymfluenceConfig) project_dir: Project root directory @@ -231,11 +178,18 @@ def extract_simulated_data(self, sim_files: List[Path], **kwargs) -> pd.Series: Multiple HRU/GRU: Averages across dimension via nanmean() Result: Scalar time series (1D array of length = n_timesteps) - Unit Conversions: - - scalarSWE: kg/m² → mm (density ≈ 1, so 1 kg/m² ≈ 1 mm) - - scalarAquiferStorage: m → mm (×1000) - - scalarGlceWE: kg/m² → mm (density ≈ 1, so 1 kg/m² ≈ 1 mm) - - Others: Already mm (canopy, soil water depth) + Storage Component Units (SUMMA) by HRU or DOM: + - scalarSWE: kg/m² → mm (multiply by 1.0, density=1000 kg/m³) + - scalarCanopyWat: mm (water depth) + - scalarTotalSoilWat: mm (water depth) + - scalarAquiferStorage: m → mm (multiply by 1000.0) + - scalarGlceWE: kg/m² → mm (multiply by 1.0, density=1000 kg/m³) + OR could use cumulative basin__StorageChange by GRU, sums these (kg/m²/s → mm/s summed) + OR + Glacier Only Storage Components (SUMMA) by GRU + - Glacier mass balance: (change in basin__GlacierStorage)/basin__GlacierArea + - Units of Gt/m² → mm via x10⁻¹² and subtract initial value + - Calibrate to seasonal maximum and minimum Args: sim_files: List of SUMMA output files (typically daily NetCDF) @@ -361,6 +315,22 @@ def get_observed_data_path(self) -> Path: - Generic format: {domain}_grace_tws_anomaly.csv - Generic without domain: grace_tws_anomaly.csv, tws_anomaly.csv + GRACE Satellites: + - GRACE (Gravity Recovery and Climate Experiment): Twin satellites measuring + Earth's gravity field to infer water storage changes + - GRACE Follow-On: Continuation mission (2018-present) + - Temporal resolution: Monthly anomalies + - Spatial resolution: ~300 km × 300 km footprint + - Sensitivity: 1-2 cm equivalent water height + GRACE Data Products: + - Multiple processing centers: JPL (Jet Propulsion Lab), CSR (U.Texas), GSFC (NASA) + - Released as time-variable gravity fields → converted to water storage anomalies + - Units: mm equivalent water thickness (relative to 2004-2009 baseline) + OR + Glacier Mass Balance Data: + - Annually reported maximum in mm.w.e. and minimum mm.w.e. + - These are usually in April and October in the Northern Hemisphere + Directory Hierarchy: - Preprocessed subdirectory (preferred for processed data) - Root directory (fallback for raw/alternative formats) From 9b711bcab3e5560632bacb50c6c31e64d792aff0 Mon Sep 17 00:00:00 2001 From: ashleymedin Date: Sun, 25 Jan 2026 16:17:01 +0900 Subject: [PATCH 3/6] comments --- src/symfluence/evaluation/evaluators/tws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/symfluence/evaluation/evaluators/tws.py b/src/symfluence/evaluation/evaluators/tws.py index 13a5b67b..e9eadc97 100644 --- a/src/symfluence/evaluation/evaluators/tws.py +++ b/src/symfluence/evaluation/evaluators/tws.py @@ -133,7 +133,7 @@ def get_simulation_files(self, sim_dir: Path) -> List[Path]: """Locate SUMMA output files containing water storage variables. Searches for NetCDF files containing any of the configured storage - components (SWE, canopy water, soil water, aquifer storage, glacier mass change). + components (SWE, canopy water, soil water, aquifer storage, glacier ice mass change). Args: sim_dir: Directory containing SUMMA simulation output files From 25b14171041bcdb684f2460a28c184bd07fe628d Mon Sep 17 00:00:00 2001 From: ashleymedin Date: Tue, 3 Feb 2026 22:16:21 -0600 Subject: [PATCH 4/6] putting in mb target and fixing grace target --- src/symfluence/evaluation/evaluators/tws.py | 406 +++++++++++++----- .../workers/summa/metrics_calculation.py | 2 +- 2 files changed, 305 insertions(+), 103 deletions(-) diff --git a/src/symfluence/evaluation/evaluators/tws.py b/src/symfluence/evaluation/evaluators/tws.py index e9eadc97..62f99816 100644 --- a/src/symfluence/evaluation/evaluators/tws.py +++ b/src/symfluence/evaluation/evaluators/tws.py @@ -40,8 +40,8 @@ class TWSEvaluator(ModelEvaluator): - Diagnostic exports for visualization and analysis Output Variables: - - basin__StorageChange: sum of scalarSWE+calarCanopyWat+scalarTotalSoilWat - +scalarAquiferStorage+scalarGlceWE + - basin__Storage: sum of scalarSWE+calarCanopyWat+scalarTotalSoilWat + +scalarAquiferStorage+scalarGlceWE or cumsum of basin__StorageChange - basin__MassBalanceMax: glacier mass balance units of mm of water, maximum per year - basin__MassBalanceMin: glacier mass balance units of mm of water, minimum per year @@ -50,7 +50,7 @@ class TWSEvaluator(ModelEvaluator): TWS_ANOMALY_BASELINE: Baseline period for anomaly calc ('overlap' or year range) TWS_UNIT_CONVERSION: Scaling factor for model storage (default: 1.0) TWS_STORAGE_COMPONENTS: Comma-separated storage variables to sum - TWS_DETREND: Remove linear trend (critical for glacierized basins) + TWS_DETREND: Remove linear trend TWS_SCALE_TO_OBS: Scale model variability to match observations TWS_OBS_PATH: Direct path override for GRACE data @@ -62,6 +62,7 @@ class TWSEvaluator(ModelEvaluator): - Spatial footprint ~300 km (cannot resolve sub-grid variability) Attributes: + optimization_target: 'stor_grace' or 'stor_mb' grace_column: GRACE data column name anomaly_baseline: Baseline period specification unit_conversion: Scaling factor for model storage values @@ -94,8 +95,6 @@ def __init__(self, config: 'SymfluenceConfig', project_dir: Path, logger: loggin - Can include glacier mass change: 'scalarGlceWE' - Or just use total ' TWS_DETREND: Remove linear trend from both sim and obs (default: False) - - Critical for glacierized basins (glacier mass loss trend obscures seasonal signal) - - Improves correlation from ~0.3 to ~0.8 for glacier domains TWS_SCALE_TO_OBS: Scale model variability to match observations (default: False) - Centers both series on zero-mean, rescales model std dev to match obs - Useful when pattern is correct but amplitude is wrong @@ -110,13 +109,23 @@ def __init__(self, config: 'SymfluenceConfig', project_dir: Path, logger: loggin """ super().__init__(config, project_dir, logger) + self.optimization_target = self._get_config_value( + lambda: self.config.optimization.target, + default='streamflow', + dict_key='OPTIMIZATION_TARGET' + ) + if self.optimization_target not in ['stor_grace', 'stor_mb']: + eval_var = self.config_dict.get('EVALUATION_VARIABLE', '') + if eval_var in ['stor_grace', 'stor_mb']: + self.optimization_target = eval_var + else: + self.optimization_target = 'stor_grace' + self.grace_column = self.config_dict.get('TWS_GRACE_COLUMN', 'grace_jpl_anomaly') self.anomaly_baseline = self.config_dict.get('TWS_ANOMALY_BASELINE', 'overlap') self.unit_conversion = self.config_dict.get('TWS_UNIT_CONVERSION', 1.0) # Detrending option: removes linear trend from both series before comparison - # This is critical for glacierized basins where long-term glacier mass loss - # creates a trend mismatch that obscures the seasonal signal we want to calibrate self.detrend = self.config_dict.get('TWS_DETREND', False) # Scaling option: scale model variability to match observed @@ -248,36 +257,7 @@ def extract_simulated_data(self, sim_files: List[Path], **kwargs) -> pd.Series: try: ds = xr.open_dataset(output_file) total_tws = None - - for var in self.storage_vars: - if var in ds.data_vars: - data = ds[var].values.astype(float) - - # Replace fill values (-9999) with NaN - # SUMMA uses -9999 for missing/invalid data in some domains - data = np.where(data < -999, np.nan, data) - - # Unit conversion: aquifer storage is in meters, others usually in mm - if 'aquifer' in var.lower(): - self.logger.debug(f"Converting {var} from meters to mm") - data = data * 1000.0 - - if data.ndim > 1: - axes_to_sum = tuple(range(1, data.ndim)) - data = np.nanmean(data, axis=axes_to_sum) - - if total_tws is None: - total_tws = data.copy() - else: - # Use nansum behavior: NaN + value = value - total_tws = np.where(np.isnan(total_tws), data, - np.where(np.isnan(data), total_tws, total_tws + data)) - else: - self.logger.warning(f"Storage variable {var} not found in {output_file}") - - if total_tws is None: - raise ValueError(f"No storage variables {self.storage_vars} found in {output_file}") - + # Find time coordinate time_coord = None for var in ['time', 'Time', 'datetime']: @@ -288,9 +268,61 @@ def extract_simulated_data(self, sim_files: List[Path], **kwargs) -> pd.Series: if time_coord is None: raise ValueError("Could not extract time coordinate") + if self.optimization_target == 'stor_mb': + if 'basin__GlacierArea' in ds.variables and 'basin__GlacierStorage' in ds.variables: + area = ds['basin__GlacierArea'] + area = np.where(area <= 0, np.nan, area) + total_tws = ds['basin__GlacierStorage']/area + # convert from Gt/m² (km³ of water/m² to mm + total_tws = total_tws * 1e9 * 1000.0 + else: + self.logger.warning(f"Glacier mass balance variables not found in SUMMA output for mass balance calculation") + return None + else: + if 'basin__StorageChange' in ds.data_vars: + total_tws = ds['basin__StorageChange'] + total_tws = np.where(total_tws < -999, np.nan, total_tws) + # integrate: mm/s * seconds -> mm per timestep, then cumulative sum over time + if hasattr(total_tws, 'sel') or hasattr(total_tws, 'dims'): + dt = self.config.get('FORCING_TIME_STEP_SIZE', 1) + total_tws = (total_tws * dt).cumsum(dim=time_coord) + else: + dt = self.config.get('FORCING_TIME_STEP_SIZE', 1) + total_tws = np.cumsum(total_tws * dt, axis=0) + else: + for var in self.storage_vars: + if var in ds.data_vars: + data = ds[var].values.astype(float) + + # Replace fill values (-9999) with NaN + # SUMMA uses -9999 for missing/invalid data in some domains + data = np.where(data < -999, np.nan, data) + + # Unit conversion: aquifer storage is in meters, others usually in mm + if 'aquifer' in var.lower(): + self.logger.debug(f"Converting {var} from meters to mm") + data = data * 1000.0 + + if data.ndim > 1: + axes_to_sum = tuple(range(1, data.ndim)) + data = np.nanmean(data, axis=axes_to_sum) + + if total_tws is None: + total_tws = data.copy() + else: + # Use nansum behavior: NaN + value = value + total_tws = np.where(np.isnan(total_tws), data, + np.where(np.isnan(data), total_tws, total_tws + data)) + else: + self.logger.warning(f"Storage variable {var} not found in {output_file}") + + if total_tws is None: + raise ValueError(f"No storage variables {self.storage_vars} found in {output_file}") + tws_series = pd.Series(total_tws.flatten(), index=time_coord, name='simulated_tws') tws_series = tws_series * self.unit_conversion ds.close() + return tws_series except Exception as e: self.logger.error(f"Error loading SUMMA output: {e}") @@ -350,19 +382,31 @@ def get_observed_data_path(self) -> Path: # Search in multiple possible locations for GRACE data # Support both observations/grace and observations/storage/grace paths obs_base = self.project_dir / 'observations' - search_dirs = [ - obs_base / 'storage' / 'grace', # Ashley's glacier domain setup - obs_base / 'grace', # Standard location - ] - potential_patterns = [ - f'{self.domain_name}_HRUs_GRUs_grace_tws_anomaly.csv', # HRU-specific format - f'{self.domain_name}_HRUs_elevation_grace_tws_anomaly_by_hru.csv', # Elevation-based HRU - f'{self.domain_name}_grace_tws_processed.csv', - f'{self.domain_name}_grace_tws_anomaly.csv', - 'grace_tws_anomaly.csv', - 'tws_anomaly.csv', - ] + if self.optimization_target == 'stor_mb': + # Independent storage anomaly observations (e.g. glacier mass balance) + search_dirs = [ + obs_base / 'storage' / 'mass_balance', + obs_base / 'mass_balance', + ] + potential_patterns = [ + f'{self.domain_name}_mass_balance.csv', + 'obs_mass_balance.csv', + 'mass_balance.csv', + ] + else: + search_dirs = [ + obs_base / 'storage' / 'grace', # Ashley's glacier domain setup + obs_base / 'grace', # Standard location + ] + potential_patterns = [ + f'{self.domain_name}_HRUs_GRUs_grace_tws_anomaly.csv', # HRU-specific format + f'{self.domain_name}_HRUs_elevation_grace_tws_anomaly_by_hru.csv', # Elevation-based HRU + f'{self.domain_name}_grace_tws_processed.csv', + f'{self.domain_name}_grace_tws_anomaly.csv', + 'grace_tws_anomaly.csv', + 'tws_anomaly.csv', + ] for obs_dir in search_dirs: if not obs_dir.exists(): @@ -400,13 +444,22 @@ def _get_observed_data_column(self, columns: List[str]) -> Optional[str]: Returns: Optional[str]: Name of GRACE anomaly column, or None if not found """ - if self.grace_column in columns: - return self.grace_column - # Fallback to known columns - for fallback in ['grace_jpl_anomaly', 'grace_csr_anomaly', 'grace_gsfc_anomaly']: - if fallback in columns: - return fallback - return next((c for c in columns if 'grace' in c.lower() and 'anomaly' in c.lower()), None) + if self.optimization_target == 'stor_mb': + # For independent storage anomaly, expect a specific column + for col in columns: + if 'mass_balance' in col.lower() or 'stor_mb' in col.lower(): + return col + # Fallback to exact match + if 'Mass_Balance' in columns: + return 'Mass_Balance' + else: + if self.grace_column in columns: + return self.grace_column + # Fallback to known columns + for fallback in ['grace_jpl_anomaly', 'grace_csr_anomaly', 'grace_gsfc_anomaly']: + if fallback in columns: + return fallback + return next((c for c in columns if 'grace' in c.lower() and 'anomaly' in c.lower()), None) def needs_routing(self) -> bool: """Determine if water storage evaluation requires routing model. @@ -561,51 +614,175 @@ def calculate_metrics(self, sim: Any, obs: Optional[pd.Series] = None, obs_df = pd.read_csv(obs_path, index_col=0, parse_dates=True) col = self._get_observed_data_column(obs_df.columns) if not col: - self.logger.error(f"[TWS] Could not find GRACE column in {obs_path}. Available: {list(obs_df.columns)}") + self.logger.error(f"[TWS] Could not find GRACE/MB column in {obs_path}. Available: {list(obs_df.columns)}") return None obs_tws = obs_df[col].dropna() - # Aggregate simulated to monthly means to match GRACE - sim_monthly = sim_tws.resample('MS').mean() - - # Check if we have any simulated data - if len(sim_monthly) == 0: - self.logger.error("[TWS] No simulated TWS data available for metrics calculation") - return None - - # Find common period - common_idx = sim_monthly.index.intersection(obs_tws.index) - if len(common_idx) == 0: - sim_start, sim_end = sim_monthly.index[0], sim_monthly.index[-1] - obs_start, obs_end = obs_tws.index[0], obs_tws.index[-1] - self.logger.error(f"[TWS] No overlapping period between simulation ({sim_start} to {sim_end}) and observations ({obs_start} to {obs_end})") - return None - - self.logger.debug(f"[TWS] Found common period with {len(common_idx)} points") - sim_matched = sim_monthly.loc[common_idx] - obs_matched = obs_tws.loc[common_idx] - - # Calculate anomaly for simulated data - baseline_mean = sim_matched.mean() - sim_anomaly = sim_matched - baseline_mean - - # Apply detrending if configured - # This is critical for glacierized basins where glacier mass loss trend - # obscures the seasonal signal (improves correlation ~0.46 -> ~0.78) - if self.detrend: - self.logger.info("[TWS] Applying linear detrending to both series") - sim_anomaly = self._detrend_series(sim_anomaly) - obs_matched = self._detrend_series(obs_matched) - - # Apply variability scaling if configured - # This forces model variability to match observed (removes alpha penalty in KGE) - if self.scale_to_obs: - self.logger.info("[TWS] Scaling model variability to match observed") - sim_anomaly = self._scale_to_obs_variability(sim_anomaly, obs_matched) + if self.optimization_target == 'stor_mb': + # make a timeseries of maximum and minimum values and repeat value on month + # compute annual maxima/minima from simulated TWS and their timestamps + annual_max = sim_tws.resample('A').max() + annual_min = sim_tws.resample('A').min() + annual_max_idx = sim_tws.resample('A').idxmax() + annual_min_idx = sim_tws.resample('A').idxmin() + + # For each annual max, find the most recent annual min occurring <= that max + max_minus_prev_min = [] + for max_ts in annual_max_idx.values: + if pd.isna(max_ts): + max_minus_prev_min.append(np.nan) + continue + # candidate minima up to and including the max timestamp + candidates = annual_min_idx[annual_min_idx <= max_ts] + if len(candidates) == 0: + max_minus_prev_min.append(np.nan) + else: + prev_min_ts = candidates.iloc[-1] + try: + max_val = float(sim_tws.loc[max_ts]) + prev_min_val = float(sim_tws.loc[prev_min_ts]) + max_minus_prev_min.append(max_val - prev_min_val) + except Exception: + max_minus_prev_min.append(np.nan) + + annual_max_mb = pd.Series(max_minus_prev_min, index=annual_max.index) + + # For each annual min, find the most recent annual max occurring <= that min + min_minus_prev_max = [] + for min_ts in annual_min_idx.values: + if pd.isna(min_ts): + min_minus_prev_max.append(np.nan) + continue + candidates = annual_max_idx[annual_max_idx <= min_ts] + if len(candidates) == 0: + min_minus_prev_max.append(np.nan) + else: + prev_max_ts = candidates.iloc[-1] + try: + min_val = float(sim_tws.loc[min_ts]) + prev_max_val = float(sim_tws.loc[prev_max_ts]) + min_minus_prev_max.append(min_val - prev_max_val) + except Exception: + min_minus_prev_max.append(np.nan) + + annual_min_mb = pd.Series(min_minus_prev_max, index=annual_min.index) + + # Map to observation frequency: + obs_years = pd.DatetimeIndex(obs_tws.index).year + annual_years = pd.DatetimeIndex(annual_max_mb.index).year + + # count observations per year to detect annual vs bi-annual + obs_counts = obs_tws.groupby(obs_tws.index.year).size() + is_annual = obs_counts.max() == 1 and obs_counts.min() == 1 + is_biannual = obs_counts.max() == 2 and obs_counts.min() == 2 + + if is_annual: + # annual observations: sum of the year's max and min-derived metrics + mapped = [] + for y in obs_years: + if y in annual_years: + try: + v_max = annual_max_mb.loc[annual_max_mb.index.year == y].values + v_min = annual_min_mb.loc[annual_min_mb.index.year == y].values + vm = (float(v_max[0]) if len(v_max) > 0 and not pd.isna(v_max[0]) else 0.0) + vn = (float(v_min[0]) if len(v_min) > 0 and not pd.isna(v_min[0]) else 0.0) + mapped.append(vm + vn) + except Exception: + mapped.append(np.nan) + else: + mapped.append(np.nan) + sim_anomaly = pd.Series(mapped, index=obs_tws.index, name='sim_tws') + obs_anomaly = obs_tws + elif is_biannual: + # bi-annual observations: assign +value at the month matching annual max, -value at month matching annual min + mapped = [] + # build dicts for quick lookup by year + max_month_by_year = {iy: ts.month for iy, ts in zip(annual_max_mb.index.year, annual_max_idx.values) if not pd.isna(ts)} + min_month_by_year = {iy: ts.month for iy, ts in zip(annual_min_mb.index.year, annual_min_idx.values) if not pd.isna(ts)} + max_val_by_year = {iy: (float(v) if not pd.isna(v) else np.nan) for iy, v in zip(annual_max_mb.index.year, annual_max_mb.values)} + min_val_by_year = {iy: (float(v) if not pd.isna(v) else np.nan) for iy, v in zip(annual_min_mb.index.year, annual_min_mb.values)} + + for ts in obs_tws.index: + y = ts.year + m = ts.month + if y not in annual_years: + mapped.append(np.nan) + continue + if max_month_by_year.get(y) == m: + mapped.append(abs(max_val_by_year.get(y, np.nan))) + elif min_month_by_year.get(y) == m: + mapped.append(-abs(min_val_by_year.get(y, np.nan))) + else: + mapped.append(np.nan) + sim_anomaly = pd.Series(mapped, index=obs_tws.index, name='sim_tws') + obs_anomaly = obs_tws + else: + self.logger.error("Mass balance observation data not annual or biannual") + + else: # GRACE type data + # Aggregate simulated to monthly means to match GRACE + sim_monthly = sim_tws.resample('MS').mean() + + # Check if we have any simulated data + if len(sim_monthly) == 0: + self.logger.error("[TWS] No simulated TWS data available for metrics calculation") + return None + + # Find common period + common_idx = sim_monthly.index.intersection(obs_tws.index) + if len(common_idx) == 0: + sim_start, sim_end = sim_monthly.index[0], sim_monthly.index[-1] + obs_start, obs_end = obs_tws.index[0], obs_tws.index[-1] + self.logger.error(f"[TWS] No overlapping period between simulation ({sim_start} to {sim_end}) and observations ({obs_start} to {obs_end})") + return None + + self.logger.debug(f"[TWS] Found common period with {len(common_idx)} points") + sim_matched = sim_monthly.loc[common_idx] + obs_matched = obs_tws.loc[common_idx] + + # Calculate anomalies + if self.anomaly_baseline == 'overlap': + baseline_mean_sim = sim_matched.mean() + baseline_mean_obs = obs_matched.mean() + else: # year range XXXX-XXXX + year_start = self.anomaly_baseline[0:4] + year_end = self.anomaly_baseline[-4:] + # Parse the year-range baseline and compute mean over that period + try: + ys = int(self.anomaly_baseline[:4]) + ye = int(self.anomaly_baseline[-4:]) + # Ensure DatetimeIndex for year selection + idx_years = pd.DatetimeIndex(sim_matched.index).year + mask = (idx_years >= ys) & (idx_years <= ye) + if mask.any(): + baseline_mean_sim = sim_matched.loc[mask].mean() + baseline_mean_obs = obs_matched.loc[mask].mean() + else: + self.logger.warning(f"[TWS] Baseline years {ys}-{ye} not in simulation overlap; using full-overlap mean") + baseline_mean_sim = sim_matched.mean() + baseline_mean_obs = obs_matched.mean() + except Exception as exc: + self.logger.warning(f"[TWS] Invalid TWS_ANOMALY_BASELINE '{self.anomaly_baseline}': {exc}; using overlap mean") + baseline_mean_sim = sim_matched.mean() + baseline_mean_obs = obs_matched.mean() + sim_anomaly = sim_matched - baseline_mean_sim + obs_anomaly = obs_matched - baseline_mean_obs + + # Apply detrending if configured + if self.detrend: + self.logger.info("[TWS] Applying linear detrending to both series") + sim_anomaly = self._detrend_series(sim_anomaly) + obs_anomaly = self._detrend_series(obs_anomaly) + + # Apply variability scaling if configured + # This forces model variability to match observed (removes alpha penalty in KGE) + if self.scale_to_obs: + self.logger.info("[TWS] Scaling model variability to match observed") + sim_anomaly = self._scale_to_obs_variability(sim_anomaly, obs_anomaly) # Calculate metrics - metrics = self._calculate_performance_metrics(obs_matched, sim_anomaly) + metrics = self._calculate_performance_metrics(obs_anomaly, sim_anomaly) self.logger.debug(f"[TWS] Calculated metrics: {metrics}") return metrics @@ -685,29 +862,54 @@ def get_diagnostic_data(self, sim_dir: Path) -> Dict[str, Any]: sim_matched = sim_monthly.loc[common_index][valid_mask] obs_matched = obs_monthly.loc[common_index][valid_mask] - baseline_mean = sim_matched.mean() - sim_anomaly = sim_matched - baseline_mean + # Calculate anomalies + if self.anomaly_baseline == 'overlap': + baseline_mean_sim = sim_matched.mean() + baseline_mean_obs = obs_matched.mean() + else: # year range XXXX-XXXX + year_start = self.anomaly_baseline[0:4] + year_end = self.anomaly_baseline[-4:] + # Parse the year-range baseline and compute mean over that period + try: + ys = int(self.anomaly_baseline[:4]) + ye = int(self.anomaly_baseline[-4:]) + # Ensure DatetimeIndex for year selection + idx_years = pd.DatetimeIndex(sim_matched.index).year + mask = (idx_years >= ys) & (idx_years <= ye) + if mask.any(): + baseline_mean_sim = sim_matched.loc[mask].mean() + baseline_mean_obs = obs_matched.loc[mask].mean() + else: + self.logger.warning(f"[TWS] Baseline years {ys}-{ye} not in simulation overlap; using full-overlap mean") + baseline_mean_sim = sim_matched.mean() + baseline_mean_obs = obs_matched.mean() + except Exception as exc: + self.logger.warning(f"[TWS] Invalid TWS_ANOMALY_BASELINE '{self.anomaly_baseline}': {exc}; using overlap mean") + baseline_mean_sim = sim_matched.mean() + baseline_mean_obs = obs_matched.mean() + sim_anomaly = sim_matched - baseline_mean_sim + obs_anomaly = obs_matched - baseline_mean_obs # Store raw values for diagnostics sim_anomaly_raw = sim_anomaly.copy() - obs_matched_raw = obs_matched.copy() + obs_anomaly_raw = obs_anomaly.copy() # Apply detrending if configured if self.detrend: sim_anomaly = self._detrend_series(sim_anomaly) - obs_matched = self._detrend_series(obs_matched) + obs_anomaly = self._detrend_series(obs_anomaly) # Apply scaling if configured if self.scale_to_obs: - sim_anomaly = self._scale_to_obs_variability(sim_anomaly, obs_matched) + sim_anomaly = self._scale_to_obs_variability(sim_anomaly, obs_anomaly) return { 'time': sim_matched.index, 'sim_tws': sim_matched.values, 'sim_anomaly': sim_anomaly.values, 'sim_anomaly_raw': sim_anomaly_raw.values, - 'obs_anomaly': obs_matched.values, - 'obs_anomaly_raw': obs_matched_raw.values, + 'obs_anomaly': obs_anomaly.values, + 'obs_anomaly_raw': obs_anomaly_raw.values, 'grace_column': self.grace_column, 'grace_all_columns': obs_df.loc[common_index][valid_mask], 'detrend_applied': self.detrend, diff --git a/src/symfluence/optimization/workers/summa/metrics_calculation.py b/src/symfluence/optimization/workers/summa/metrics_calculation.py index c8927f78..59248b6a 100644 --- a/src/symfluence/optimization/workers/summa/metrics_calculation.py +++ b/src/symfluence/optimization/workers/summa/metrics_calculation.py @@ -234,7 +234,7 @@ def _calculate_metrics_with_target(summa_dir: Path, mizuroute_dir: Path, config: target = ETTarget(config, project_dir, logger) elif optimization_target in ['sm_point', 'sm_smap', 'sm_esa', 'sm_ismn', 'soil_moisture', 'sm']: target = SoilMoistureTarget(config, project_dir, logger) - elif optimization_target in ['tws', 'grace', 'grace_tws', 'total_storage']: + elif optimization_target in ['tws', 'grace', 'grace_tws', 'total_storage','stor_grace','stor_mb']: target = TWSTarget(config, project_dir, logger) elif optimization_target == 'multivariate': target = MultivariateTarget(config, project_dir, logger) From 63cc9e8d795dc9de26d0e04e3447dda90eed85b0 Mon Sep 17 00:00:00 2001 From: ashleymedin Date: Thu, 5 Feb 2026 15:23:28 -0600 Subject: [PATCH 5/6] fixing executable fine --- src/symfluence/evaluation/metrics.py | 4 ++-- src/symfluence/models/summa/calibration/worker.py | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/symfluence/evaluation/metrics.py b/src/symfluence/evaluation/metrics.py index 4c7a6956..fd13a587 100644 --- a/src/symfluence/evaluation/metrics.py +++ b/src/symfluence/evaluation/metrics.py @@ -465,8 +465,8 @@ def kge_np( # Non-parametric alpha: based on normalized flow duration curves # Per Pool et al. (2018), normalize by sum so values represent probabilities # that sum to 1. This ensures alpha_np ranges from 0 to 1. - fdc_obs = np.sort(obs) / np.sum(obs) - fdc_sim = np.sort(sim) / np.sum(sim) + fdc_obs = np.sort(obs) / np.sum(obs) if mean_obs != 0 else np.nan + fdc_sim = np.sort(sim) / np.sum(sim) if mean_sim != 0 else np.nan alpha_np = 1.0 - 0.5 * np.sum(np.abs(fdc_sim - fdc_obs)) # Beta remains the same (bias ratio) diff --git a/src/symfluence/models/summa/calibration/worker.py b/src/symfluence/models/summa/calibration/worker.py index 29aebc98..78df41c9 100644 --- a/src/symfluence/models/summa/calibration/worker.py +++ b/src/symfluence/models/summa/calibration/worker.py @@ -181,11 +181,14 @@ def run_model( # Import existing functions from symfluence.optimization.workers.summa import _run_summa_worker, _run_mizuroute_worker - summa_exe = Path(config.get('SUMMA_INSTALL_PATH', 'default')) - if str(summa_exe) == 'default': + summa_install_path = config.get('SUMMA_INSTALL_PATH', 'default') + summa_exe = config.get("SUMMA_EXE", "summa_sundials.exe") + if summa_install_path == 'default': data_dir = Path(config.get('SYMFLUENCE_DATA_DIR', '.')) - summa_exe_name = config.get('SUMMA_EXE', 'summa_sundials.exe') - summa_exe = data_dir / 'installs' / 'summa' / 'bin' / summa_exe_name + summa_exe = data_dir / 'installs' / 'summa' / 'bin' / summa_exe + else: + summa_install_path = Path(summa_install_path) + summa_exe = summa_install_path / summa_exe file_manager = settings_dir / 'fileManager.txt' if not file_manager.exists(): From 4855f3e30b023cc29356de2a4e5e7d28389290a5 Mon Sep 17 00:00:00 2001 From: ashleymedin Date: Thu, 5 Feb 2026 15:27:53 -0600 Subject: [PATCH 6/6] fixing summa executable name --- src/symfluence/models/summa/calibration/worker.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/symfluence/models/summa/calibration/worker.py b/src/symfluence/models/summa/calibration/worker.py index 78df41c9..2c8c617f 100644 --- a/src/symfluence/models/summa/calibration/worker.py +++ b/src/symfluence/models/summa/calibration/worker.py @@ -275,10 +275,13 @@ def _run_summa_direct( import subprocess summa_exe = Path(config.get('SUMMA_INSTALL_PATH', 'default')) + summa_exe_name = config.get('SUMMA_EXE', 'summa_sundials.exe') if str(summa_exe) == 'default': data_dir = Path(config.get('SYMFLUENCE_DATA_DIR', '.')) summa_exe_name = config.get('SUMMA_EXE', 'summa_sundials.exe') summa_exe = data_dir / 'installs' / 'summa' / 'bin' / summa_exe_name + else: + summa_exe = summa_exe / summa_exe_name file_manager = settings_dir / 'fileManager.txt'